diff options
author | Daniel Baumann <daniel.baumann@progress-linux.org> | 2024-04-17 12:19:13 +0000 |
---|---|---|
committer | Daniel Baumann <daniel.baumann@progress-linux.org> | 2024-04-17 12:19:13 +0000 |
commit | 218caa410aa38c29984be31a5229b9fa717560ee (patch) | |
tree | c54bd55eeb6e4c508940a30e94c0032fbd45d677 /compiler/rustc_parse | |
parent | Releasing progress-linux version 1.67.1+dfsg1-1~progress7.99u1. (diff) | |
download | rustc-218caa410aa38c29984be31a5229b9fa717560ee.tar.xz rustc-218caa410aa38c29984be31a5229b9fa717560ee.zip |
Merging upstream version 1.68.2+dfsg1.
Signed-off-by: Daniel Baumann <daniel.baumann@progress-linux.org>
Diffstat (limited to '')
-rw-r--r-- | compiler/rustc_parse/src/errors.rs | 58 | ||||
-rw-r--r-- | compiler/rustc_parse/src/lexer/mod.rs | 57 | ||||
-rw-r--r-- | compiler/rustc_parse/src/lexer/unicode_chars.rs | 650 | ||||
-rw-r--r-- | compiler/rustc_parse/src/parser/attr.rs | 1 | ||||
-rw-r--r-- | compiler/rustc_parse/src/parser/attr_wrapper.rs | 1 | ||||
-rw-r--r-- | compiler/rustc_parse/src/parser/diagnostics.rs | 174 | ||||
-rw-r--r-- | compiler/rustc_parse/src/parser/expr.rs | 199 | ||||
-rw-r--r-- | compiler/rustc_parse/src/parser/generics.rs | 118 | ||||
-rw-r--r-- | compiler/rustc_parse/src/parser/item.rs | 96 | ||||
-rw-r--r-- | compiler/rustc_parse/src/parser/mod.rs | 28 | ||||
-rw-r--r-- | compiler/rustc_parse/src/parser/pat.rs | 42 | ||||
-rw-r--r-- | compiler/rustc_parse/src/parser/path.rs | 3 | ||||
-rw-r--r-- | compiler/rustc_parse/src/parser/stmt.rs | 31 | ||||
-rw-r--r-- | compiler/rustc_parse/src/parser/ty.rs | 205 | ||||
-rw-r--r-- | compiler/rustc_parse_format/Cargo.toml | 1 | ||||
-rw-r--r-- | compiler/rustc_parse_format/src/lib.rs | 162 | ||||
-rw-r--r-- | compiler/rustc_parse_format/src/tests.rs | 80 |
17 files changed, 1294 insertions, 612 deletions
diff --git a/compiler/rustc_parse/src/errors.rs b/compiler/rustc_parse/src/errors.rs index 9875cde4a..06b970ad9 100644 --- a/compiler/rustc_parse/src/errors.rs +++ b/compiler/rustc_parse/src/errors.rs @@ -971,6 +971,24 @@ pub(crate) struct StructLiteralBodyWithoutPathSugg { } #[derive(Diagnostic)] +#[diag(parse_struct_literal_needing_parens)] +pub(crate) struct StructLiteralNeedingParens { + #[primary_span] + pub span: Span, + #[subdiagnostic] + pub sugg: StructLiteralNeedingParensSugg, +} + +#[derive(Subdiagnostic)] +#[multipart_suggestion(suggestion, applicability = "machine-applicable")] +pub(crate) struct StructLiteralNeedingParensSugg { + #[suggestion_part(code = "(")] + pub before: Span, + #[suggestion_part(code = ")")] + pub after: Span, +} + +#[derive(Diagnostic)] #[diag(parse_unmatched_angle_brackets)] pub(crate) struct UnmatchedAngleBrackets { #[primary_span] @@ -1221,3 +1239,43 @@ pub(crate) struct UnexpectedIfWithIf( #[suggestion(applicability = "machine-applicable", code = " ", style = "verbose")] pub Span, ); + +#[derive(Diagnostic)] +#[diag(parse_maybe_fn_typo_with_impl)] +pub(crate) struct FnTypoWithImpl { + #[primary_span] + #[suggestion(applicability = "maybe-incorrect", code = "impl", style = "verbose")] + pub fn_span: Span, +} + +#[derive(Diagnostic)] +#[diag(parse_expected_fn_path_found_fn_keyword)] +pub(crate) struct ExpectedFnPathFoundFnKeyword { + #[primary_span] + #[suggestion(applicability = "machine-applicable", code = "Fn", style = "verbose")] + pub fn_token_span: Span, +} + +#[derive(Diagnostic)] +#[diag(parse_where_clause_before_tuple_struct_body)] +pub(crate) struct WhereClauseBeforeTupleStructBody { + #[primary_span] + #[label] + pub span: Span, + #[label(name_label)] + pub name: Span, + #[label(body_label)] + pub body: Span, + #[subdiagnostic] + pub sugg: Option<WhereClauseBeforeTupleStructBodySugg>, +} + +#[derive(Subdiagnostic)] +#[multipart_suggestion(suggestion, applicability = "machine-applicable")] +pub(crate) struct WhereClauseBeforeTupleStructBodySugg { + #[suggestion_part(code = "{snippet}")] + pub left: Span, + pub snippet: String, + #[suggestion_part(code = "")] + pub right: Span, +} diff --git a/compiler/rustc_parse/src/lexer/mod.rs b/compiler/rustc_parse/src/lexer/mod.rs index f027843e6..9fe8d9836 100644 --- a/compiler/rustc_parse/src/lexer/mod.rs +++ b/compiler/rustc_parse/src/lexer/mod.rs @@ -52,8 +52,15 @@ pub(crate) fn parse_token_trees<'a>( } let cursor = Cursor::new(src); - let string_reader = - StringReader { sess, start_pos, pos: start_pos, src, cursor, override_span }; + let string_reader = StringReader { + sess, + start_pos, + pos: start_pos, + src, + cursor, + override_span, + nbsp_is_whitespace: false, + }; tokentrees::TokenTreesReader::parse_all_token_trees(string_reader) } @@ -68,6 +75,10 @@ struct StringReader<'a> { /// Cursor for getting lexer tokens. cursor: Cursor<'a>, override_span: Option<Span>, + /// When a "unknown start of token: \u{a0}" has already been emitted earlier + /// in this file, it's safe to treat further occurrences of the non-breaking + /// space character as whitespace. + nbsp_is_whitespace: bool, } impl<'a> StringReader<'a> { @@ -79,7 +90,7 @@ impl<'a> StringReader<'a> { /// preceded by whitespace. fn next_token(&mut self) -> (Token, bool) { let mut preceded_by_whitespace = false; - + let mut swallow_next_invalid = 0; // Skip trivial (whitespace & comments) tokens loop { let token = self.cursor.advance_token(); @@ -232,19 +243,44 @@ impl<'a> StringReader<'a> { rustc_lexer::TokenKind::Percent => token::BinOp(token::Percent), rustc_lexer::TokenKind::Unknown | rustc_lexer::TokenKind::InvalidIdent => { - let c = self.str_from(start).chars().next().unwrap(); + // Don't emit diagnostics for sequences of the same invalid token + if swallow_next_invalid > 0 { + swallow_next_invalid -= 1; + continue; + } + let mut it = self.str_from_to_end(start).chars(); + let c = it.next().unwrap(); + if c == '\u{00a0}' { + // If an error has already been reported on non-breaking + // space characters earlier in the file, treat all + // subsequent occurrences as whitespace. + if self.nbsp_is_whitespace { + preceded_by_whitespace = true; + continue; + } + self.nbsp_is_whitespace = true; + } + let repeats = it.take_while(|c1| *c1 == c).count(); let mut err = - self.struct_err_span_char(start, self.pos, "unknown start of token", c); + self.struct_err_span_char(start, self.pos + Pos::from_usize(repeats * c.len_utf8()), "unknown start of token", c); // FIXME: the lexer could be used to turn the ASCII version of unicode // homoglyphs, instead of keeping a table in `check_for_substitution`into the // token. Ideally, this should be inside `rustc_lexer`. However, we should // first remove compound tokens like `<<` from `rustc_lexer`, and then add // fancier error recovery to it, as there will be less overall work to do this // way. - let token = unicode_chars::check_for_substitution(self, start, c, &mut err); + let token = unicode_chars::check_for_substitution(self, start, c, &mut err, repeats+1); if c == '\x00' { err.help("source files must contain UTF-8 encoded text, unexpected null bytes might occur when a different encoding is used"); } + if repeats > 0 { + if repeats == 1 { + err.note(format!("character appears once more")); + } else { + err.note(format!("character appears {repeats} more times")); + } + swallow_next_invalid = repeats; + } err.emit(); if let Some(token) = token { token @@ -471,7 +507,7 @@ impl<'a> StringReader<'a> { /// Slice of the source text from `start` up to but excluding `self.pos`, /// meaning the slice does not include the character `self.ch`. - fn str_from(&self, start: BytePos) -> &str { + fn str_from(&self, start: BytePos) -> &'a str { self.str_from_to(start, self.pos) } @@ -482,10 +518,15 @@ impl<'a> StringReader<'a> { } /// Slice of the source text spanning from `start` up to but excluding `end`. - fn str_from_to(&self, start: BytePos, end: BytePos) -> &str { + fn str_from_to(&self, start: BytePos, end: BytePos) -> &'a str { &self.src[self.src_index(start)..self.src_index(end)] } + /// Slice of the source text spanning from `start` until the end + fn str_from_to_end(&self, start: BytePos) -> &'a str { + &self.src[self.src_index(start)..] + } + fn report_raw_str_error(&self, start: BytePos, prefix_len: u32) -> ! { match rustc_lexer::validate_raw_str(self.str_from(start), prefix_len) { Err(RawStrError::InvalidStarter { bad_char }) => { diff --git a/compiler/rustc_parse/src/lexer/unicode_chars.rs b/compiler/rustc_parse/src/lexer/unicode_chars.rs index f1b50296e..34d003ccf 100644 --- a/compiler/rustc_parse/src/lexer/unicode_chars.rs +++ b/compiler/rustc_parse/src/lexer/unicode_chars.rs @@ -7,329 +7,331 @@ use rustc_errors::{Applicability, Diagnostic}; use rustc_span::{symbol::kw, BytePos, Pos, Span}; #[rustfmt::skip] // for line breaks -pub(crate) const UNICODE_ARRAY: &[(char, &str, char)] = &[ - ('
', "Line Separator", ' '), - ('
', "Paragraph Separator", ' '), - (' ', "Ogham Space mark", ' '), - (' ', "En Quad", ' '), - (' ', "Em Quad", ' '), - (' ', "En Space", ' '), - (' ', "Em Space", ' '), - (' ', "Three-Per-Em Space", ' '), - (' ', "Four-Per-Em Space", ' '), - (' ', "Six-Per-Em Space", ' '), - (' ', "Punctuation Space", ' '), - (' ', "Thin Space", ' '), - (' ', "Hair Space", ' '), - (' ', "Medium Mathematical Space", ' '), - (' ', "No-Break Space", ' '), - (' ', "Figure Space", ' '), - (' ', "Narrow No-Break Space", ' '), - (' ', "Ideographic Space", ' '), - - ('ߺ', "Nko Lajanyalan", '_'), - ('﹍', "Dashed Low Line", '_'), - ('﹎', "Centreline Low Line", '_'), - ('﹏', "Wavy Low Line", '_'), - ('_', "Fullwidth Low Line", '_'), - - ('‐', "Hyphen", '-'), - ('‑', "Non-Breaking Hyphen", '-'), - ('‒', "Figure Dash", '-'), - ('–', "En Dash", '-'), - ('—', "Em Dash", '-'), - ('﹘', "Small Em Dash", '-'), - ('۔', "Arabic Full Stop", '-'), - ('⁃', "Hyphen Bullet", '-'), - ('˗', "Modifier Letter Minus Sign", '-'), - ('−', "Minus Sign", '-'), - ('➖', "Heavy Minus Sign", '-'), - ('Ⲻ', "Coptic Letter Dialect-P Ni", '-'), - ('ー', "Katakana-Hiragana Prolonged Sound Mark", '-'), - ('-', "Fullwidth Hyphen-Minus", '-'), - ('―', "Horizontal Bar", '-'), - ('─', "Box Drawings Light Horizontal", '-'), - ('━', "Box Drawings Heavy Horizontal", '-'), - ('㇐', "CJK Stroke H", '-'), - ('ꟷ', "Latin Epigraphic Letter Sideways I", '-'), - ('ᅳ', "Hangul Jungseong Eu", '-'), - ('ㅡ', "Hangul Letter Eu", '-'), - ('一', "CJK Unified Ideograph-4E00", '-'), - ('⼀', "Kangxi Radical One", '-'), - - ('؍', "Arabic Date Separator", ','), - ('٫', "Arabic Decimal Separator", ','), - ('‚', "Single Low-9 Quotation Mark", ','), - ('¸', "Cedilla", ','), - ('ꓹ', "Lisu Letter Tone Na Po", ','), - (',', "Fullwidth Comma", ','), - - (';', "Greek Question Mark", ';'), - (';', "Fullwidth Semicolon", ';'), - ('︔', "Presentation Form For Vertical Semicolon", ';'), - - ('ः', "Devanagari Sign Visarga", ':'), - ('ઃ', "Gujarati Sign Visarga", ':'), - (':', "Fullwidth Colon", ':'), - ('։', "Armenian Full Stop", ':'), - ('܃', "Syriac Supralinear Colon", ':'), - ('܄', "Syriac Sublinear Colon", ':'), - ('᛬', "Runic Multiple Punctuation", ':'), - ('︰', "Presentation Form For Vertical Two Dot Leader", ':'), - ('᠃', "Mongolian Full Stop", ':'), - ('᠉', "Mongolian Manchu Full Stop", ':'), - ('⁚', "Two Dot Punctuation", ':'), - ('׃', "Hebrew Punctuation Sof Pasuq", ':'), - ('˸', "Modifier Letter Raised Colon", ':'), - ('꞉', "Modifier Letter Colon", ':'), - ('∶', "Ratio", ':'), - ('ː', "Modifier Letter Triangular Colon", ':'), - ('ꓽ', "Lisu Letter Tone Mya Jeu", ':'), - ('︓', "Presentation Form For Vertical Colon", ':'), - - ('!', "Fullwidth Exclamation Mark", '!'), - ('ǃ', "Latin Letter Retroflex Click", '!'), - ('ⵑ', "Tifinagh Letter Tuareg Yang", '!'), - ('︕', "Presentation Form For Vertical Exclamation Mark", '!'), - - ('ʔ', "Latin Letter Glottal Stop", '?'), - ('Ɂ', "Latin Capital Letter Glottal Stop", '?'), - ('ॽ', "Devanagari Letter Glottal Stop", '?'), - ('Ꭾ', "Cherokee Letter He", '?'), - ('ꛫ', "Bamum Letter Ntuu", '?'), - ('?', "Fullwidth Question Mark", '?'), - ('︖', "Presentation Form For Vertical Question Mark", '?'), - - ('𝅭', "Musical Symbol Combining Augmentation Dot", '.'), - ('․', "One Dot Leader", '.'), - ('܁', "Syriac Supralinear Full Stop", '.'), - ('܂', "Syriac Sublinear Full Stop", '.'), - ('꘎', "Vai Full Stop", '.'), - ('𐩐', "Kharoshthi Punctuation Dot", '.'), - ('٠', "Arabic-Indic Digit Zero", '.'), - ('۰', "Extended Arabic-Indic Digit Zero", '.'), - ('ꓸ', "Lisu Letter Tone Mya Ti", '.'), - ('·', "Middle Dot", '.'), - ('・', "Katakana Middle Dot", '.'), - ('・', "Halfwidth Katakana Middle Dot", '.'), - ('᛫', "Runic Single Punctuation", '.'), - ('·', "Greek Ano Teleia", '.'), - ('⸱', "Word Separator Middle Dot", '.'), - ('𐄁', "Aegean Word Separator Dot", '.'), - ('•', "Bullet", '.'), - ('‧', "Hyphenation Point", '.'), - ('∙', "Bullet Operator", '.'), - ('⋅', "Dot Operator", '.'), - ('ꞏ', "Latin Letter Sinological Dot", '.'), - ('ᐧ', "Canadian Syllabics Final Middle Dot", '.'), - ('ᐧ', "Canadian Syllabics Final Middle Dot", '.'), - ('.', "Fullwidth Full Stop", '.'), - ('。', "Ideographic Full Stop", '.'), - ('︒', "Presentation Form For Vertical Ideographic Full Stop", '.'), - - ('՝', "Armenian Comma", '\''), - (''', "Fullwidth Apostrophe", '\''), - ('‘', "Left Single Quotation Mark", '\''), - ('’', "Right Single Quotation Mark", '\''), - ('‛', "Single High-Reversed-9 Quotation Mark", '\''), - ('′', "Prime", '\''), - ('‵', "Reversed Prime", '\''), - ('՚', "Armenian Apostrophe", '\''), - ('׳', "Hebrew Punctuation Geresh", '\''), - ('`', "Grave Accent", '\''), - ('`', "Greek Varia", '\''), - ('`', "Fullwidth Grave Accent", '\''), - ('´', "Acute Accent", '\''), - ('΄', "Greek Tonos", '\''), - ('´', "Greek Oxia", '\''), - ('᾽', "Greek Koronis", '\''), - ('᾿', "Greek Psili", '\''), - ('῾', "Greek Dasia", '\''), - ('ʹ', "Modifier Letter Prime", '\''), - ('ʹ', "Greek Numeral Sign", '\''), - ('ˈ', "Modifier Letter Vertical Line", '\''), - ('ˊ', "Modifier Letter Acute Accent", '\''), - ('ˋ', "Modifier Letter Grave Accent", '\''), - ('˴', "Modifier Letter Middle Grave Accent", '\''), - ('ʻ', "Modifier Letter Turned Comma", '\''), - ('ʽ', "Modifier Letter Reversed Comma", '\''), - ('ʼ', "Modifier Letter Apostrophe", '\''), - ('ʾ', "Modifier Letter Right Half Ring", '\''), - ('ꞌ', "Latin Small Letter Saltillo", '\''), - ('י', "Hebrew Letter Yod", '\''), - ('ߴ', "Nko High Tone Apostrophe", '\''), - ('ߵ', "Nko Low Tone Apostrophe", '\''), - ('ᑊ', "Canadian Syllabics West-Cree P", '\''), - ('ᛌ', "Runic Letter Short-Twig-Sol S", '\''), - ('𖽑', "Miao Sign Aspiration", '\''), - ('𖽒', "Miao Sign Reformed Voicing", '\''), - - ('᳓', "Vedic Sign Nihshvasa", '"'), - ('"', "Fullwidth Quotation Mark", '"'), - ('“', "Left Double Quotation Mark", '"'), - ('”', "Right Double Quotation Mark", '"'), - ('‟', "Double High-Reversed-9 Quotation Mark", '"'), - ('″', "Double Prime", '"'), - ('‶', "Reversed Double Prime", '"'), - ('〃', "Ditto Mark", '"'), - ('״', "Hebrew Punctuation Gershayim", '"'), - ('˝', "Double Acute Accent", '"'), - ('ʺ', "Modifier Letter Double Prime", '"'), - ('˶', "Modifier Letter Middle Double Acute Accent", '"'), - ('˵', "Modifier Letter Middle Double Grave Accent", '"'), - ('ˮ', "Modifier Letter Double Apostrophe", '"'), - ('ײ', "Hebrew Ligature Yiddish Double Yod", '"'), - ('❞', "Heavy Double Comma Quotation Mark Ornament", '"'), - ('❝', "Heavy Double Turned Comma Quotation Mark Ornament", '"'), - - ('(', "Fullwidth Left Parenthesis", '('), - ('❨', "Medium Left Parenthesis Ornament", '('), - ('﴾', "Ornate Left Parenthesis", '('), - - (')', "Fullwidth Right Parenthesis", ')'), - ('❩', "Medium Right Parenthesis Ornament", ')'), - ('﴿', "Ornate Right Parenthesis", ')'), - - ('[', "Fullwidth Left Square Bracket", '['), - ('❲', "Light Left Tortoise Shell Bracket Ornament", '['), - ('「', "Left Corner Bracket", '['), - ('『', "Left White Corner Bracket", '['), - ('【', "Left Black Lenticular Bracket", '['), - ('〔', "Left Tortoise Shell Bracket", '['), - ('〖', "Left White Lenticular Bracket", '['), - ('〘', "Left White Tortoise Shell Bracket", '['), - ('〚', "Left White Square Bracket", '['), - - (']', "Fullwidth Right Square Bracket", ']'), - ('❳', "Light Right Tortoise Shell Bracket Ornament", ']'), - ('」', "Right Corner Bracket", ']'), - ('』', "Right White Corner Bracket", ']'), - ('】', "Right Black Lenticular Bracket", ']'), - ('〕', "Right Tortoise Shell Bracket", ']'), - ('〗', "Right White Lenticular Bracket", ']'), - ('〙', "Right White Tortoise Shell Bracket", ']'), - ('〛', "Right White Square Bracket", ']'), - - ('❴', "Medium Left Curly Bracket Ornament", '{'), - ('𝄔', "Musical Symbol Brace", '{'), - ('{', "Fullwidth Left Curly Bracket", '{'), - - ('❵', "Medium Right Curly Bracket Ornament", '}'), - ('}', "Fullwidth Right Curly Bracket", '}'), - - ('⁎', "Low Asterisk", '*'), - ('٭', "Arabic Five Pointed Star", '*'), - ('∗', "Asterisk Operator", '*'), - ('𐌟', "Old Italic Letter Ess", '*'), - ('*', "Fullwidth Asterisk", '*'), - - ('᜵', "Philippine Single Punctuation", '/'), - ('⁁', "Caret Insertion Point", '/'), - ('∕', "Division Slash", '/'), - ('⁄', "Fraction Slash", '/'), - ('╱', "Box Drawings Light Diagonal Upper Right To Lower Left", '/'), - ('⟋', "Mathematical Rising Diagonal", '/'), - ('⧸', "Big Solidus", '/'), - ('𝈺', "Greek Instrumental Notation Symbol-47", '/'), - ('㇓', "CJK Stroke Sp", '/'), - ('〳', "Vertical Kana Repeat Mark Upper Half", '/'), - ('Ⳇ', "Coptic Capital Letter Old Coptic Esh", '/'), - ('ノ', "Katakana Letter No", '/'), - ('丿', "CJK Unified Ideograph-4E3F", '/'), - ('⼃', "Kangxi Radical Slash", '/'), - ('/', "Fullwidth Solidus", '/'), - - ('\', "Fullwidth Reverse Solidus", '\\'), - ('﹨', "Small Reverse Solidus", '\\'), - ('∖', "Set Minus", '\\'), - ('⟍', "Mathematical Falling Diagonal", '\\'), - ('⧵', "Reverse Solidus Operator", '\\'), - ('⧹', "Big Reverse Solidus", '\\'), - ('⧹', "Greek Vocal Notation Symbol-16", '\\'), - ('⧹', "Greek Instrumental Symbol-48", '\\'), - ('㇔', "CJK Stroke D", '\\'), - ('丶', "CJK Unified Ideograph-4E36", '\\'), - ('⼂', "Kangxi Radical Dot", '\\'), - ('、', "Ideographic Comma", '\\'), - ('ヽ', "Katakana Iteration Mark", '\\'), - - ('ꝸ', "Latin Small Letter Um", '&'), - ('&', "Fullwidth Ampersand", '&'), - - ('᛭', "Runic Cross Punctuation", '+'), - ('➕', "Heavy Plus Sign", '+'), - ('𐊛', "Lycian Letter H", '+'), - ('﬩', "Hebrew Letter Alternative Plus Sign", '+'), - ('+', "Fullwidth Plus Sign", '+'), - - ('‹', "Single Left-Pointing Angle Quotation Mark", '<'), - ('❮', "Heavy Left-Pointing Angle Quotation Mark Ornament", '<'), - ('˂', "Modifier Letter Left Arrowhead", '<'), - ('𝈶', "Greek Instrumental Symbol-40", '<'), - ('ᐸ', "Canadian Syllabics Pa", '<'), - ('ᚲ', "Runic Letter Kauna", '<'), - ('❬', "Medium Left-Pointing Angle Bracket Ornament", '<'), - ('⟨', "Mathematical Left Angle Bracket", '<'), - ('〈', "Left-Pointing Angle Bracket", '<'), - ('〈', "Left Angle Bracket", '<'), - ('㇛', "CJK Stroke Pd", '<'), - ('く', "Hiragana Letter Ku", '<'), - ('𡿨', "CJK Unified Ideograph-21FE8", '<'), - ('《', "Left Double Angle Bracket", '<'), - ('<', "Fullwidth Less-Than Sign", '<'), - - ('᐀', "Canadian Syllabics Hyphen", '='), - ('⹀', "Double Hyphen", '='), - ('゠', "Katakana-Hiragana Double Hyphen", '='), - ('꓿', "Lisu Punctuation Full Stop", '='), - ('=', "Fullwidth Equals Sign", '='), - - ('›', "Single Right-Pointing Angle Quotation Mark", '>'), - ('❯', "Heavy Right-Pointing Angle Quotation Mark Ornament", '>'), - ('˃', "Modifier Letter Right Arrowhead", '>'), - ('𝈷', "Greek Instrumental Symbol-42", '>'), - ('ᐳ', "Canadian Syllabics Po", '>'), - ('𖼿', "Miao Letter Archaic Zza", '>'), - ('❭', "Medium Right-Pointing Angle Bracket Ornament", '>'), - ('⟩', "Mathematical Right Angle Bracket", '>'), - ('〉', "Right-Pointing Angle Bracket", '>'), - ('〉', "Right Angle Bracket", '>'), - ('》', "Right Double Angle Bracket", '>'), - ('>', "Fullwidth Greater-Than Sign", '>'), +pub(crate) const UNICODE_ARRAY: &[(char, &str, &str)] = &[ + ('
', "Line Separator", " "), + ('
', "Paragraph Separator", " "), + (' ', "Ogham Space mark", " "), + (' ', "En Quad", " "), + (' ', "Em Quad", " "), + (' ', "En Space", " "), + (' ', "Em Space", " "), + (' ', "Three-Per-Em Space", " "), + (' ', "Four-Per-Em Space", " "), + (' ', "Six-Per-Em Space", " "), + (' ', "Punctuation Space", " "), + (' ', "Thin Space", " "), + (' ', "Hair Space", " "), + (' ', "Medium Mathematical Space", " "), + (' ', "No-Break Space", " "), + (' ', "Figure Space", " "), + (' ', "Narrow No-Break Space", " "), + (' ', "Ideographic Space", " "), + + ('ߺ', "Nko Lajanyalan", "_"), + ('﹍', "Dashed Low Line", "_"), + ('﹎', "Centreline Low Line", "_"), + ('﹏', "Wavy Low Line", "_"), + ('_', "Fullwidth Low Line", "_"), + + ('‐', "Hyphen", "-"), + ('‑', "Non-Breaking Hyphen", "-"), + ('‒', "Figure Dash", "-"), + ('–', "En Dash", "-"), + ('—', "Em Dash", "-"), + ('﹘', "Small Em Dash", "-"), + ('۔', "Arabic Full Stop", "-"), + ('⁃', "Hyphen Bullet", "-"), + ('˗', "Modifier Letter Minus Sign", "-"), + ('−', "Minus Sign", "-"), + ('➖', "Heavy Minus Sign", "-"), + ('Ⲻ', "Coptic Letter Dialect-P Ni", "-"), + ('ー', "Katakana-Hiragana Prolonged Sound Mark", "-"), + ('-', "Fullwidth Hyphen-Minus", "-"), + ('―', "Horizontal Bar", "-"), + ('─', "Box Drawings Light Horizontal", "-"), + ('━', "Box Drawings Heavy Horizontal", "-"), + ('㇐', "CJK Stroke H", "-"), + ('ꟷ', "Latin Epigraphic Letter Sideways I", "-"), + ('ᅳ', "Hangul Jungseong Eu", "-"), + ('ㅡ', "Hangul Letter Eu", "-"), + ('一', "CJK Unified Ideograph-4E00", "-"), + ('⼀', "Kangxi Radical One", "-"), + + ('؍', "Arabic Date Separator", ","), + ('٫', "Arabic Decimal Separator", ","), + ('‚', "Single Low-9 Quotation Mark", ","), + ('¸', "Cedilla", ","), + ('ꓹ', "Lisu Letter Tone Na Po", ","), + (',', "Fullwidth Comma", ","), + + (';', "Greek Question Mark", ";"), + (';', "Fullwidth Semicolon", ";"), + ('︔', "Presentation Form For Vertical Semicolon", ";"), + + ('ः', "Devanagari Sign Visarga", ":"), + ('ઃ', "Gujarati Sign Visarga", ":"), + (':', "Fullwidth Colon", ":"), + ('։', "Armenian Full Stop", ":"), + ('܃', "Syriac Supralinear Colon", ":"), + ('܄', "Syriac Sublinear Colon", ":"), + ('᛬', "Runic Multiple Punctuation", ":"), + ('︰', "Presentation Form For Vertical Two Dot Leader", ":"), + ('᠃', "Mongolian Full Stop", ":"), + ('᠉', "Mongolian Manchu Full Stop", ":"), + ('⁚', "Two Dot Punctuation", ":"), + ('׃', "Hebrew Punctuation Sof Pasuq", ":"), + ('˸', "Modifier Letter Raised Colon", ":"), + ('꞉', "Modifier Letter Colon", ":"), + ('∶', "Ratio", ":"), + ('ː', "Modifier Letter Triangular Colon", ":"), + ('ꓽ', "Lisu Letter Tone Mya Jeu", ":"), + ('︓', "Presentation Form For Vertical Colon", ":"), + + ('!', "Fullwidth Exclamation Mark", "!"), + ('ǃ', "Latin Letter Retroflex Click", "!"), + ('ⵑ', "Tifinagh Letter Tuareg Yang", "!"), + ('︕', "Presentation Form For Vertical Exclamation Mark", "!"), + + ('ʔ', "Latin Letter Glottal Stop", "?"), + ('Ɂ', "Latin Capital Letter Glottal Stop", "?"), + ('ॽ', "Devanagari Letter Glottal Stop", "?"), + ('Ꭾ', "Cherokee Letter He", "?"), + ('ꛫ', "Bamum Letter Ntuu", "?"), + ('?', "Fullwidth Question Mark", "?"), + ('︖', "Presentation Form For Vertical Question Mark", "?"), + + ('𝅭', "Musical Symbol Combining Augmentation Dot", "."), + ('․', "One Dot Leader", "."), + ('܁', "Syriac Supralinear Full Stop", "."), + ('܂', "Syriac Sublinear Full Stop", "."), + ('꘎', "Vai Full Stop", "."), + ('𐩐', "Kharoshthi Punctuation Dot", "."), + ('٠', "Arabic-Indic Digit Zero", "."), + ('۰', "Extended Arabic-Indic Digit Zero", "."), + ('ꓸ', "Lisu Letter Tone Mya Ti", "."), + ('·', "Middle Dot", "."), + ('・', "Katakana Middle Dot", "."), + ('・', "Halfwidth Katakana Middle Dot", "."), + ('᛫', "Runic Single Punctuation", "."), + ('·', "Greek Ano Teleia", "."), + ('⸱', "Word Separator Middle Dot", "."), + ('𐄁', "Aegean Word Separator Dot", "."), + ('•', "Bullet", "."), + ('‧', "Hyphenation Point", "."), + ('∙', "Bullet Operator", "."), + ('⋅', "Dot Operator", "."), + ('ꞏ', "Latin Letter Sinological Dot", "."), + ('ᐧ', "Canadian Syllabics Final Middle Dot", "."), + ('ᐧ', "Canadian Syllabics Final Middle Dot", "."), + ('.', "Fullwidth Full Stop", "."), + ('。', "Ideographic Full Stop", "."), + ('︒', "Presentation Form For Vertical Ideographic Full Stop", "."), + + ('՝', "Armenian Comma", "\'"), + (''', "Fullwidth Apostrophe", "\'"), + ('‘', "Left Single Quotation Mark", "\'"), + ('’', "Right Single Quotation Mark", "\'"), + ('‛', "Single High-Reversed-9 Quotation Mark", "\'"), + ('′', "Prime", "\'"), + ('‵', "Reversed Prime", "\'"), + ('՚', "Armenian Apostrophe", "\'"), + ('׳', "Hebrew Punctuation Geresh", "\'"), + ('`', "Grave Accent", "\'"), + ('`', "Greek Varia", "\'"), + ('`', "Fullwidth Grave Accent", "\'"), + ('´', "Acute Accent", "\'"), + ('΄', "Greek Tonos", "\'"), + ('´', "Greek Oxia", "\'"), + ('᾽', "Greek Koronis", "\'"), + ('᾿', "Greek Psili", "\'"), + ('῾', "Greek Dasia", "\'"), + ('ʹ', "Modifier Letter Prime", "\'"), + ('ʹ', "Greek Numeral Sign", "\'"), + ('ˈ', "Modifier Letter Vertical Line", "\'"), + ('ˊ', "Modifier Letter Acute Accent", "\'"), + ('ˋ', "Modifier Letter Grave Accent", "\'"), + ('˴', "Modifier Letter Middle Grave Accent", "\'"), + ('ʻ', "Modifier Letter Turned Comma", "\'"), + ('ʽ', "Modifier Letter Reversed Comma", "\'"), + ('ʼ', "Modifier Letter Apostrophe", "\'"), + ('ʾ', "Modifier Letter Right Half Ring", "\'"), + ('ꞌ', "Latin Small Letter Saltillo", "\'"), + ('י', "Hebrew Letter Yod", "\'"), + ('ߴ', "Nko High Tone Apostrophe", "\'"), + ('ߵ', "Nko Low Tone Apostrophe", "\'"), + ('ᑊ', "Canadian Syllabics West-Cree P", "\'"), + ('ᛌ', "Runic Letter Short-Twig-Sol S", "\'"), + ('𖽑', "Miao Sign Aspiration", "\'"), + ('𖽒', "Miao Sign Reformed Voicing", "\'"), + + ('᳓', "Vedic Sign Nihshvasa", "\""), + ('"', "Fullwidth Quotation Mark", "\""), + ('“', "Left Double Quotation Mark", "\""), + ('”', "Right Double Quotation Mark", "\""), + ('‟', "Double High-Reversed-9 Quotation Mark", "\""), + ('″', "Double Prime", "\""), + ('‶', "Reversed Double Prime", "\""), + ('〃', "Ditto Mark", "\""), + ('״', "Hebrew Punctuation Gershayim", "\""), + ('˝', "Double Acute Accent", "\""), + ('ʺ', "Modifier Letter Double Prime", "\""), + ('˶', "Modifier Letter Middle Double Acute Accent", "\""), + ('˵', "Modifier Letter Middle Double Grave Accent", "\""), + ('ˮ', "Modifier Letter Double Apostrophe", "\""), + ('ײ', "Hebrew Ligature Yiddish Double Yod", "\""), + ('❞', "Heavy Double Comma Quotation Mark Ornament", "\""), + ('❝', "Heavy Double Turned Comma Quotation Mark Ornament", "\""), + + ('(', "Fullwidth Left Parenthesis", "("), + ('❨', "Medium Left Parenthesis Ornament", "("), + ('﴾', "Ornate Left Parenthesis", "("), + + (')', "Fullwidth Right Parenthesis", ")"), + ('❩', "Medium Right Parenthesis Ornament", ")"), + ('﴿', "Ornate Right Parenthesis", ")"), + + ('[', "Fullwidth Left Square Bracket", "["), + ('❲', "Light Left Tortoise Shell Bracket Ornament", "["), + ('「', "Left Corner Bracket", "["), + ('『', "Left White Corner Bracket", "["), + ('【', "Left Black Lenticular Bracket", "["), + ('〔', "Left Tortoise Shell Bracket", "["), + ('〖', "Left White Lenticular Bracket", "["), + ('〘', "Left White Tortoise Shell Bracket", "["), + ('〚', "Left White Square Bracket", "["), + + (']', "Fullwidth Right Square Bracket", "]"), + ('❳', "Light Right Tortoise Shell Bracket Ornament", "]"), + ('」', "Right Corner Bracket", "]"), + ('』', "Right White Corner Bracket", "]"), + ('】', "Right Black Lenticular Bracket", "]"), + ('〕', "Right Tortoise Shell Bracket", "]"), + ('〗', "Right White Lenticular Bracket", "]"), + ('〙', "Right White Tortoise Shell Bracket", "]"), + ('〛', "Right White Square Bracket", "]"), + + ('❴', "Medium Left Curly Bracket Ornament", "{"), + ('𝄔', "Musical Symbol Brace", "{"), + ('{', "Fullwidth Left Curly Bracket", "{"), + + ('❵', "Medium Right Curly Bracket Ornament", "}"), + ('}', "Fullwidth Right Curly Bracket", "}"), + + ('⁎', "Low Asterisk", "*"), + ('٭', "Arabic Five Pointed Star", "*"), + ('∗', "Asterisk Operator", "*"), + ('𐌟', "Old Italic Letter Ess", "*"), + ('*', "Fullwidth Asterisk", "*"), + + ('᜵', "Philippine Single Punctuation", "/"), + ('⁁', "Caret Insertion Point", "/"), + ('∕', "Division Slash", "/"), + ('⁄', "Fraction Slash", "/"), + ('╱', "Box Drawings Light Diagonal Upper Right To Lower Left", "/"), + ('⟋', "Mathematical Rising Diagonal", "/"), + ('⧸', "Big Solidus", "/"), + ('𝈺', "Greek Instrumental Notation Symbol-47", "/"), + ('㇓', "CJK Stroke Sp", "/"), + ('〳', "Vertical Kana Repeat Mark Upper Half", "/"), + ('Ⳇ', "Coptic Capital Letter Old Coptic Esh", "/"), + ('ノ', "Katakana Letter No", "/"), + ('丿', "CJK Unified Ideograph-4E3F", "/"), + ('⼃', "Kangxi Radical Slash", "/"), + ('/', "Fullwidth Solidus", "/"), + + ('\', "Fullwidth Reverse Solidus", "\\"), + ('﹨', "Small Reverse Solidus", "\\"), + ('∖', "Set Minus", "\\"), + ('⟍', "Mathematical Falling Diagonal", "\\"), + ('⧵', "Reverse Solidus Operator", "\\"), + ('⧹', "Big Reverse Solidus", "\\"), + ('⧹', "Greek Vocal Notation Symbol-16", "\\"), + ('⧹', "Greek Instrumental Symbol-48", "\\"), + ('㇔', "CJK Stroke D", "\\"), + ('丶', "CJK Unified Ideograph-4E36", "\\"), + ('⼂', "Kangxi Radical Dot", "\\"), + ('、', "Ideographic Comma", "\\"), + ('ヽ', "Katakana Iteration Mark", "\\"), + + ('ꝸ', "Latin Small Letter Um", "&"), + ('&', "Fullwidth Ampersand", "&"), + + ('᛭', "Runic Cross Punctuation", "+"), + ('➕', "Heavy Plus Sign", "+"), + ('𐊛', "Lycian Letter H", "+"), + ('﬩', "Hebrew Letter Alternative Plus Sign", "+"), + ('+', "Fullwidth Plus Sign", "+"), + + ('‹', "Single Left-Pointing Angle Quotation Mark", "<"), + ('❮', "Heavy Left-Pointing Angle Quotation Mark Ornament", "<"), + ('˂', "Modifier Letter Left Arrowhead", "<"), + ('𝈶', "Greek Instrumental Symbol-40", "<"), + ('ᐸ', "Canadian Syllabics Pa", "<"), + ('ᚲ', "Runic Letter Kauna", "<"), + ('❬', "Medium Left-Pointing Angle Bracket Ornament", "<"), + ('⟨', "Mathematical Left Angle Bracket", "<"), + ('〈', "Left-Pointing Angle Bracket", "<"), + ('〈', "Left Angle Bracket", "<"), + ('㇛', "CJK Stroke Pd", "<"), + ('く', "Hiragana Letter Ku", "<"), + ('𡿨', "CJK Unified Ideograph-21FE8", "<"), + ('《', "Left Double Angle Bracket", "<"), + ('<', "Fullwidth Less-Than Sign", "<"), + + ('᐀', "Canadian Syllabics Hyphen", "="), + ('⹀', "Double Hyphen", "="), + ('゠', "Katakana-Hiragana Double Hyphen", "="), + ('꓿', "Lisu Punctuation Full Stop", "="), + ('=', "Fullwidth Equals Sign", "="), + + ('›', "Single Right-Pointing Angle Quotation Mark", ">"), + ('❯', "Heavy Right-Pointing Angle Quotation Mark Ornament", ">"), + ('˃', "Modifier Letter Right Arrowhead", ">"), + ('𝈷', "Greek Instrumental Symbol-42", ">"), + ('ᐳ', "Canadian Syllabics Po", ">"), + ('𖼿', "Miao Letter Archaic Zza", ">"), + ('❭', "Medium Right-Pointing Angle Bracket Ornament", ">"), + ('⟩', "Mathematical Right Angle Bracket", ">"), + ('〉', "Right-Pointing Angle Bracket", ">"), + ('〉', "Right Angle Bracket", ">"), + ('》', "Right Double Angle Bracket", ">"), + ('>', "Fullwidth Greater-Than Sign", ">"), + ('⩵', "Two Consecutive Equals Signs", "==") ]; // FIXME: the lexer could be used to turn the ASCII version of unicode homoglyphs, instead of // keeping the substitution token in this table. Ideally, this should be inside `rustc_lexer`. // However, we should first remove compound tokens like `<<` from `rustc_lexer`, and then add // fancier error recovery to it, as there will be less overall work to do this way. -const ASCII_ARRAY: &[(char, &str, Option<token::TokenKind>)] = &[ - (' ', "Space", None), - ('_', "Underscore", Some(token::Ident(kw::Underscore, false))), - ('-', "Minus/Hyphen", Some(token::BinOp(token::Minus))), - (',', "Comma", Some(token::Comma)), - (';', "Semicolon", Some(token::Semi)), - (':', "Colon", Some(token::Colon)), - ('!', "Exclamation Mark", Some(token::Not)), - ('?', "Question Mark", Some(token::Question)), - ('.', "Period", Some(token::Dot)), - ('(', "Left Parenthesis", Some(token::OpenDelim(Delimiter::Parenthesis))), - (')', "Right Parenthesis", Some(token::CloseDelim(Delimiter::Parenthesis))), - ('[', "Left Square Bracket", Some(token::OpenDelim(Delimiter::Bracket))), - (']', "Right Square Bracket", Some(token::CloseDelim(Delimiter::Bracket))), - ('{', "Left Curly Brace", Some(token::OpenDelim(Delimiter::Brace))), - ('}', "Right Curly Brace", Some(token::CloseDelim(Delimiter::Brace))), - ('*', "Asterisk", Some(token::BinOp(token::Star))), - ('/', "Slash", Some(token::BinOp(token::Slash))), - ('\\', "Backslash", None), - ('&', "Ampersand", Some(token::BinOp(token::And))), - ('+', "Plus Sign", Some(token::BinOp(token::Plus))), - ('<', "Less-Than Sign", Some(token::Lt)), - ('=', "Equals Sign", Some(token::Eq)), - ('>', "Greater-Than Sign", Some(token::Gt)), +const ASCII_ARRAY: &[(&str, &str, Option<token::TokenKind>)] = &[ + (" ", "Space", None), + ("_", "Underscore", Some(token::Ident(kw::Underscore, false))), + ("-", "Minus/Hyphen", Some(token::BinOp(token::Minus))), + (",", "Comma", Some(token::Comma)), + (";", "Semicolon", Some(token::Semi)), + (":", "Colon", Some(token::Colon)), + ("!", "Exclamation Mark", Some(token::Not)), + ("?", "Question Mark", Some(token::Question)), + (".", "Period", Some(token::Dot)), + ("(", "Left Parenthesis", Some(token::OpenDelim(Delimiter::Parenthesis))), + (")", "Right Parenthesis", Some(token::CloseDelim(Delimiter::Parenthesis))), + ("[", "Left Square Bracket", Some(token::OpenDelim(Delimiter::Bracket))), + ("]", "Right Square Bracket", Some(token::CloseDelim(Delimiter::Bracket))), + ("{", "Left Curly Brace", Some(token::OpenDelim(Delimiter::Brace))), + ("}", "Right Curly Brace", Some(token::CloseDelim(Delimiter::Brace))), + ("*", "Asterisk", Some(token::BinOp(token::Star))), + ("/", "Slash", Some(token::BinOp(token::Slash))), + ("\\", "Backslash", None), + ("&", "Ampersand", Some(token::BinOp(token::And))), + ("+", "Plus Sign", Some(token::BinOp(token::Plus))), + ("<", "Less-Than Sign", Some(token::Lt)), + ("=", "Equals Sign", Some(token::Eq)), + ("==", "Double Equals Sign", Some(token::EqEq)), + (">", "Greater-Than Sign", Some(token::Gt)), // FIXME: Literals are already lexed by this point, so we can't recover gracefully just by // spitting the correct token out. - ('\'', "Single Quote", None), - ('"', "Quotation Mark", None), + ("\'", "Single Quote", None), + ("\"", "Quotation Mark", None), ]; pub(super) fn check_for_substitution<'a>( @@ -337,12 +339,13 @@ pub(super) fn check_for_substitution<'a>( pos: BytePos, ch: char, err: &mut Diagnostic, + count: usize, ) -> Option<token::TokenKind> { - let &(_u_char, u_name, ascii_char) = UNICODE_ARRAY.iter().find(|&&(c, _, _)| c == ch)?; + let &(_, u_name, ascii_str) = UNICODE_ARRAY.iter().find(|&&(c, _, _)| c == ch)?; - let span = Span::with_root_ctxt(pos, pos + Pos::from_usize(ch.len_utf8())); + let span = Span::with_root_ctxt(pos, pos + Pos::from_usize(ch.len_utf8() * count)); - let Some((_ascii_char, ascii_name, token)) = ASCII_ARRAY.iter().find(|&&(c, _, _)| c == ascii_char) else { + let Some((_, ascii_name, token)) = ASCII_ARRAY.iter().find(|&&(s, _, _)| s == ascii_str) else { let msg = format!("substitution character not found for '{}'", ch); reader.sess.span_diagnostic.span_bug_no_panic(span, &msg); return None; @@ -353,7 +356,7 @@ pub(super) fn check_for_substitution<'a>( let msg = format!( "Unicode characters '“' (Left Double Quotation Mark) and \ '”' (Right Double Quotation Mark) look like '{}' ({}), but are not", - ascii_char, ascii_name + ascii_str, ascii_name ); err.span_suggestion( Span::with_root_ctxt( @@ -367,9 +370,14 @@ pub(super) fn check_for_substitution<'a>( } else { let msg = format!( "Unicode character '{}' ({}) looks like '{}' ({}), but it is not", - ch, u_name, ascii_char, ascii_name + ch, u_name, ascii_str, ascii_name + ); + err.span_suggestion( + span, + &msg, + ascii_str.to_string().repeat(count), + Applicability::MaybeIncorrect, ); - err.span_suggestion(span, &msg, ascii_char, Applicability::MaybeIncorrect); } token.clone() } diff --git a/compiler/rustc_parse/src/parser/attr.rs b/compiler/rustc_parse/src/parser/attr.rs index c7d239b64..686454a8f 100644 --- a/compiler/rustc_parse/src/parser/attr.rs +++ b/compiler/rustc_parse/src/parser/attr.rs @@ -6,7 +6,6 @@ use rustc_ast::attr; use rustc_ast::token::{self, Delimiter, Nonterminal}; use rustc_errors::{error_code, fluent, Diagnostic, IntoDiagnostic, PResult}; use rustc_span::{sym, BytePos, Span}; -use std::convert::TryInto; // Public for rustfmt usage #[derive(Debug)] diff --git a/compiler/rustc_parse/src/parser/attr_wrapper.rs b/compiler/rustc_parse/src/parser/attr_wrapper.rs index a084a7010..b97f22417 100644 --- a/compiler/rustc_parse/src/parser/attr_wrapper.rs +++ b/compiler/rustc_parse/src/parser/attr_wrapper.rs @@ -8,7 +8,6 @@ use rustc_errors::PResult; use rustc_session::parse::ParseSess; use rustc_span::{sym, Span, DUMMY_SP}; -use std::convert::TryInto; use std::ops::Range; /// A wrapper type to ensure that the parser handles outer attributes correctly. diff --git a/compiler/rustc_parse/src/parser/diagnostics.rs b/compiler/rustc_parse/src/parser/diagnostics.rs index c316a4dd6..4c918c670 100644 --- a/compiler/rustc_parse/src/parser/diagnostics.rs +++ b/compiler/rustc_parse/src/parser/diagnostics.rs @@ -12,9 +12,10 @@ use crate::errors::{ IncorrectAwait, IncorrectSemicolon, IncorrectUseOfAwait, ParenthesesInForHead, ParenthesesInForHeadSugg, PatternMethodParamWithoutBody, QuestionMarkInType, QuestionMarkInTypeSugg, SelfParamNotFirst, StructLiteralBodyWithoutPath, - StructLiteralBodyWithoutPathSugg, SuggEscapeToUseAsIdentifier, SuggRemoveComma, - UnexpectedConstInGenericParam, UnexpectedConstParamDeclaration, - UnexpectedConstParamDeclarationSugg, UnmatchedAngleBrackets, UseEqInstead, + StructLiteralBodyWithoutPathSugg, StructLiteralNeedingParens, StructLiteralNeedingParensSugg, + SuggEscapeToUseAsIdentifier, SuggRemoveComma, UnexpectedConstInGenericParam, + UnexpectedConstParamDeclaration, UnexpectedConstParamDeclarationSugg, UnmatchedAngleBrackets, + UseEqInstead, }; use crate::lexer::UnmatchedBrace; @@ -31,7 +32,8 @@ use rustc_ast::{ use rustc_ast_pretty::pprust; use rustc_data_structures::fx::FxHashSet; use rustc_errors::{ - fluent, Applicability, DiagnosticBuilder, DiagnosticMessage, Handler, MultiSpan, PResult, + fluent, Applicability, DiagnosticBuilder, DiagnosticMessage, FatalError, Handler, MultiSpan, + PResult, }; use rustc_errors::{pluralize, Diagnostic, ErrorGuaranteed, IntoDiagnostic}; use rustc_session::errors::ExprParenthesesNeeded; @@ -41,7 +43,6 @@ use rustc_span::{Span, SpanSnippetError, DUMMY_SP}; use std::mem::take; use std::ops::{Deref, DerefMut}; use thin_vec::{thin_vec, ThinVec}; -use tracing::{debug, trace}; /// Creates a placeholder argument. pub(super) fn dummy_arg(ident: Ident) -> Param { @@ -159,8 +160,6 @@ enum IsStandalone { Standalone, /// It's a subexpression, i.e., *not* standalone. Subexpr, - /// It's maybe standalone; we're not sure. - Maybe, } #[derive(Debug, Copy, Clone, PartialEq, Eq)] @@ -213,14 +212,8 @@ impl MultiSugg { err.multipart_suggestion(&self.msg, self.patches, self.applicability); } - /// Overrides individual messages and applicabilities. - fn emit_many( - err: &mut Diagnostic, - msg: &str, - applicability: Applicability, - suggestions: impl Iterator<Item = Self>, - ) { - err.multipart_suggestions(msg, suggestions.map(|s| s.patches), applicability); + fn emit_verbose(self, err: &mut Diagnostic) { + err.multipart_suggestion_verbose(&self.msg, self.patches, self.applicability); } } @@ -631,12 +624,15 @@ impl<'a> Parser<'a> { &mut self, lo: Span, s: BlockCheckMode, + maybe_struct_name: token::Token, + can_be_struct_literal: bool, ) -> Option<PResult<'a, P<Block>>> { if self.token.is_ident() && self.look_ahead(1, |t| t == &token::Colon) { // We might be having a struct literal where people forgot to include the path: // fn foo() -> Foo { // field: value, // } + info!(?maybe_struct_name, ?self.token); let mut snapshot = self.create_snapshot_for_diagnostic(); let path = Path { segments: ThinVec::new(), @@ -656,13 +652,6 @@ impl<'a> Parser<'a> { // field: value, // } } err.delay_as_bug(); - self.sess.emit_err(StructLiteralBodyWithoutPath { - span: expr.span, - sugg: StructLiteralBodyWithoutPathSugg { - before: expr.span.shrink_to_lo(), - after: expr.span.shrink_to_hi(), - }, - }); self.restore_snapshot(snapshot); let mut tail = self.mk_block( vec![self.mk_stmt_err(expr.span)], @@ -670,7 +659,25 @@ impl<'a> Parser<'a> { lo.to(self.prev_token.span), ); tail.could_be_bare_literal = true; - Ok(tail) + if maybe_struct_name.is_ident() && can_be_struct_literal { + // Account for `if Example { a: one(), }.is_pos() {}`. + Err(self.sess.create_err(StructLiteralNeedingParens { + span: maybe_struct_name.span.to(expr.span), + sugg: StructLiteralNeedingParensSugg { + before: maybe_struct_name.span.shrink_to_lo(), + after: expr.span.shrink_to_hi(), + }, + })) + } else { + self.sess.emit_err(StructLiteralBodyWithoutPath { + span: expr.span, + sugg: StructLiteralBodyWithoutPathSugg { + before: expr.span.shrink_to_lo(), + after: expr.span.shrink_to_hi(), + }, + }); + Ok(tail) + } } (Err(err), Ok(tail)) => { // We have a block tail that contains a somehow valid type ascription expr. @@ -1112,7 +1119,11 @@ impl<'a> Parser<'a> { return if token::ModSep == self.token.kind { // We have some certainty that this was a bad turbofish at this point. // `foo< bar >::` - err.suggest_turbofish = Some(op.span.shrink_to_lo()); + if let ExprKind::Binary(o, ..) = inner_op.kind && o.node == BinOpKind::Lt { + err.suggest_turbofish = Some(op.span.shrink_to_lo()); + } else { + err.help_turbofish = Some(()); + } let snapshot = self.create_snapshot_for_diagnostic(); self.bump(); // `::` @@ -1138,7 +1149,11 @@ impl<'a> Parser<'a> { } else if token::OpenDelim(Delimiter::Parenthesis) == self.token.kind { // We have high certainty that this was a bad turbofish at this point. // `foo< bar >(` - err.suggest_turbofish = Some(op.span.shrink_to_lo()); + if let ExprKind::Binary(o, ..) = inner_op.kind && o.node == BinOpKind::Lt { + err.suggest_turbofish = Some(op.span.shrink_to_lo()); + } else { + err.help_turbofish = Some(()); + } // Consume the fn call arguments. match self.consume_fn_args() { Err(()) => Err(err.into_diagnostic(&self.sess.span_diagnostic)), @@ -1238,7 +1253,7 @@ impl<'a> Parser<'a> { let sum_span = ty.span.to(self.prev_token.span); let sub = match &ty.kind { - TyKind::Rptr(lifetime, mut_ty) => { + TyKind::Ref(lifetime, mut_ty) => { let sum_with_parens = pprust::to_string(|s| { s.s.word("&"); s.print_opt_lifetime(lifetime); @@ -1267,12 +1282,10 @@ impl<'a> Parser<'a> { &mut self, operand_expr: P<Expr>, op_span: Span, - prev_is_semi: bool, + start_stmt: bool, ) -> PResult<'a, P<Expr>> { - let standalone = - if prev_is_semi { IsStandalone::Standalone } else { IsStandalone::Subexpr }; + let standalone = if start_stmt { IsStandalone::Standalone } else { IsStandalone::Subexpr }; let kind = IncDecRecovery { standalone, op: IncOrDec::Inc, fixity: UnaryFixity::Pre }; - self.recover_from_inc_dec(operand_expr, kind, op_span) } @@ -1280,13 +1293,13 @@ impl<'a> Parser<'a> { &mut self, operand_expr: P<Expr>, op_span: Span, + start_stmt: bool, ) -> PResult<'a, P<Expr>> { let kind = IncDecRecovery { - standalone: IsStandalone::Maybe, + standalone: if start_stmt { IsStandalone::Standalone } else { IsStandalone::Subexpr }, op: IncOrDec::Inc, fixity: UnaryFixity::Post, }; - self.recover_from_inc_dec(operand_expr, kind, op_span) } @@ -1315,34 +1328,25 @@ impl<'a> Parser<'a> { }; match kind.standalone { - IsStandalone::Standalone => self.inc_dec_standalone_suggest(kind, spans).emit(&mut err), + IsStandalone::Standalone => { + self.inc_dec_standalone_suggest(kind, spans).emit_verbose(&mut err) + } IsStandalone::Subexpr => { let Ok(base_src) = self.span_to_snippet(base.span) - else { return help_base_case(err, base) }; + else { return help_base_case(err, base) }; match kind.fixity { UnaryFixity::Pre => { self.prefix_inc_dec_suggest(base_src, kind, spans).emit(&mut err) } UnaryFixity::Post => { - self.postfix_inc_dec_suggest(base_src, kind, spans).emit(&mut err) + // won't suggest since we can not handle the precedences + // for example: `a + b++` has been parsed (a + b)++ and we can not suggest here + if !matches!(base.kind, ExprKind::Binary(_, _, _)) { + self.postfix_inc_dec_suggest(base_src, kind, spans).emit(&mut err) + } } } } - IsStandalone::Maybe => { - let Ok(base_src) = self.span_to_snippet(base.span) - else { return help_base_case(err, base) }; - let sugg1 = match kind.fixity { - UnaryFixity::Pre => self.prefix_inc_dec_suggest(base_src, kind, spans), - UnaryFixity::Post => self.postfix_inc_dec_suggest(base_src, kind, spans), - }; - let sugg2 = self.inc_dec_standalone_suggest(kind, spans); - MultiSugg::emit_many( - &mut err, - "use `+= 1` instead", - Applicability::Unspecified, - [sugg1, sugg2].into_iter(), - ) - } } Err(err) } @@ -1392,7 +1396,6 @@ impl<'a> Parser<'a> { } patches.push((post_span, format!(" {}= 1", kind.op.chr()))); - MultiSugg { msg: format!("use `{}= 1` instead", kind.op.chr()), patches, @@ -2577,6 +2580,75 @@ impl<'a> Parser<'a> { Ok(()) } + pub fn is_diff_marker(&mut self, long_kind: &TokenKind, short_kind: &TokenKind) -> bool { + (0..3).all(|i| self.look_ahead(i, |tok| tok == long_kind)) + && self.look_ahead(3, |tok| tok == short_kind) + } + + fn diff_marker(&mut self, long_kind: &TokenKind, short_kind: &TokenKind) -> Option<Span> { + if self.is_diff_marker(long_kind, short_kind) { + let lo = self.token.span; + for _ in 0..4 { + self.bump(); + } + return Some(lo.to(self.prev_token.span)); + } + None + } + + pub fn recover_diff_marker(&mut self) { + let Some(start) = self.diff_marker(&TokenKind::BinOp(token::Shl), &TokenKind::Lt) else { + return; + }; + let mut spans = Vec::with_capacity(3); + spans.push(start); + let mut middlediff3 = None; + let mut middle = None; + let mut end = None; + loop { + if self.token.kind == TokenKind::Eof { + break; + } + if let Some(span) = self.diff_marker(&TokenKind::OrOr, &TokenKind::BinOp(token::Or)) { + middlediff3 = Some(span); + } + if let Some(span) = self.diff_marker(&TokenKind::EqEq, &TokenKind::Eq) { + middle = Some(span); + } + if let Some(span) = self.diff_marker(&TokenKind::BinOp(token::Shr), &TokenKind::Gt) { + spans.push(span); + end = Some(span); + break; + } + self.bump(); + } + let mut err = self.struct_span_err(spans, "encountered diff marker"); + err.span_label(start, "after this is the code before the merge"); + if let Some(middle) = middlediff3 { + err.span_label(middle, ""); + } + if let Some(middle) = middle { + err.span_label(middle, ""); + } + if let Some(end) = end { + err.span_label(end, "above this are the incoming code changes"); + } + err.help( + "if you're having merge conflicts after pulling new code, the top section is the code \ + you already had and the bottom section is the remote code", + ); + err.help( + "if you're in the middle of a rebase, the top section is the code being rebased onto \ + and the bottom section is the code coming from the current commit being rebased", + ); + err.note( + "for an explanation on these markers from the `git` documentation, visit \ + <https://git-scm.com/book/en/v2/Git-Tools-Advanced-Merging#_checking_out_conflicts>", + ); + err.emit(); + FatalError.raise() + } + /// Parse and throw away a parenthesized comma separated /// sequence of patterns until `)` is reached. fn skip_pat_list(&mut self) -> PResult<'a, ()> { diff --git a/compiler/rustc_parse/src/parser/expr.rs b/compiler/rustc_parse/src/parser/expr.rs index f6a6ed379..bf93a89f0 100644 --- a/compiler/rustc_parse/src/parser/expr.rs +++ b/compiler/rustc_parse/src/parser/expr.rs @@ -83,7 +83,7 @@ macro_rules! maybe_whole_expr { pub(super) enum LhsExpr { NotYetParsed, AttributesParsed(AttrWrapper), - AlreadyParsed(P<Expr>), + AlreadyParsed { expr: P<Expr>, starts_statement: bool }, } impl From<Option<AttrWrapper>> for LhsExpr { @@ -97,11 +97,11 @@ impl From<Option<AttrWrapper>> for LhsExpr { } impl From<P<Expr>> for LhsExpr { - /// Converts the `expr: P<Expr>` into `LhsExpr::AlreadyParsed(expr)`. + /// Converts the `expr: P<Expr>` into `LhsExpr::AlreadyParsed { expr, starts_statement: false }`. /// /// This conversion does not allocate. fn from(expr: P<Expr>) -> Self { - LhsExpr::AlreadyParsed(expr) + LhsExpr::AlreadyParsed { expr, starts_statement: false } } } @@ -173,14 +173,16 @@ impl<'a> Parser<'a> { min_prec: usize, lhs: LhsExpr, ) -> PResult<'a, P<Expr>> { - let mut lhs = if let LhsExpr::AlreadyParsed(expr) = lhs { + let mut starts_stmt = false; + let mut lhs = if let LhsExpr::AlreadyParsed { expr, starts_statement } = lhs { + starts_stmt = starts_statement; expr } else { let attrs = match lhs { LhsExpr::AttributesParsed(attrs) => Some(attrs), _ => None, }; - if [token::DotDot, token::DotDotDot, token::DotDotEq].contains(&self.token.kind) { + if self.token.is_range_separator() { return self.parse_prefix_range_expr(attrs); } else { self.parse_prefix_expr(attrs)? @@ -292,7 +294,7 @@ impl<'a> Parser<'a> { let op_span = self.prev_token.span.to(self.token.span); // Eat the second `+` self.bump(); - lhs = self.recover_from_postfix_increment(lhs, op_span)?; + lhs = self.recover_from_postfix_increment(lhs, op_span, starts_stmt)?; continue; } @@ -512,7 +514,7 @@ impl<'a> Parser<'a> { } debug_assert!( - [token::DotDot, token::DotDotDot, token::DotDotEq].contains(&self.token.kind), + self.token.is_range_separator(), "parse_prefix_range_expr: token {:?} is not DotDot/DotDotEq", self.token ); @@ -560,17 +562,23 @@ impl<'a> Parser<'a> { // Note: when adding new unary operators, don't forget to adjust TokenKind::can_begin_expr() match this.token.uninterpolate().kind { - token::Not => make_it!(this, attrs, |this, _| this.parse_unary_expr(lo, UnOp::Not)), // `!expr` - token::Tilde => make_it!(this, attrs, |this, _| this.recover_tilde_expr(lo)), // `~expr` + // `!expr` + token::Not => make_it!(this, attrs, |this, _| this.parse_unary_expr(lo, UnOp::Not)), + // `~expr` + token::Tilde => make_it!(this, attrs, |this, _| this.recover_tilde_expr(lo)), + // `-expr` token::BinOp(token::Minus) => { make_it!(this, attrs, |this, _| this.parse_unary_expr(lo, UnOp::Neg)) - } // `-expr` + } + // `*expr` token::BinOp(token::Star) => { make_it!(this, attrs, |this, _| this.parse_unary_expr(lo, UnOp::Deref)) - } // `*expr` + } + // `&expr` and `&&expr` token::BinOp(token::And) | token::AndAnd => { make_it!(this, attrs, |this, _| this.parse_borrow_expr(lo)) } + // `+lit` token::BinOp(token::Plus) if this.look_ahead(1, |tok| tok.is_numeric_lit()) => { let mut err = LeadingPlusNotSupported { span: lo, remove_plus: None, add_parentheses: None }; @@ -585,19 +593,20 @@ impl<'a> Parser<'a> { this.bump(); this.parse_prefix_expr(None) - } // `+expr` + } // Recover from `++x`: token::BinOp(token::Plus) if this.look_ahead(1, |t| *t == token::BinOp(token::Plus)) => { - let prev_is_semi = this.prev_token == token::Semi; + let starts_stmt = this.prev_token == token::Semi + || this.prev_token == token::CloseDelim(Delimiter::Brace); let pre_span = this.token.span.to(this.look_ahead(1, |t| t.span)); // Eat both `+`s. this.bump(); this.bump(); let operand_expr = this.parse_dot_or_call_expr(Default::default())?; - this.recover_from_prefix_increment(operand_expr, pre_span, prev_is_semi) + this.recover_from_prefix_increment(operand_expr, pre_span, starts_stmt) } token::Ident(..) if this.token.is_keyword(kw::Box) => { make_it!(this, attrs, |this, _| this.parse_box_expr(lo)) @@ -621,7 +630,7 @@ impl<'a> Parser<'a> { Ok((span, self.mk_unary(op, expr))) } - // Recover on `!` suggesting for bitwise negation instead. + /// Recover on `~expr` in favor of `!expr`. fn recover_tilde_expr(&mut self, lo: Span) -> PResult<'a, (Span, ExprKind)> { self.sess.emit_err(TildeAsUnaryOperator(lo)); @@ -648,7 +657,6 @@ impl<'a> Parser<'a> { /// Recover on `not expr` in favor of `!expr`. fn recover_not_expr(&mut self, lo: Span) -> PResult<'a, (Span, ExprKind)> { - // Emit the error... let negated_token = self.look_ahead(1, |t| t.clone()); let sub_diag = if negated_token.is_numeric_lit() { @@ -669,7 +677,6 @@ impl<'a> Parser<'a> { ), }); - // ...and recover! self.parse_unary_expr(lo, UnOp::Not) } @@ -896,7 +903,11 @@ impl<'a> Parser<'a> { let has_lifetime = self.token.is_lifetime() && self.look_ahead(1, |t| t != &token::Colon); let lifetime = has_lifetime.then(|| self.expect_lifetime()); // For recovery, see below. let (borrow_kind, mutbl) = self.parse_borrow_modifiers(lo); - let expr = self.parse_prefix_expr(None); + let expr = if self.token.is_range_separator() { + self.parse_prefix_range_expr(None) + } else { + self.parse_prefix_expr(None) + }; let (hi, expr) = self.interpolated_or_expr_span(expr)?; let span = lo.to(hi); if let Some(lt) = lifetime { @@ -1318,7 +1329,10 @@ impl<'a> Parser<'a> { self.parse_array_or_repeat_expr(Delimiter::Bracket) } else if self.check_path() { self.parse_path_start_expr() - } else if self.check_keyword(kw::Move) || self.check_keyword(kw::Static) { + } else if self.check_keyword(kw::Move) + || self.check_keyword(kw::Static) + || self.check_const_closure() + { self.parse_closure_expr() } else if self.eat_keyword(kw::If) { self.parse_if_expr() @@ -1339,9 +1353,6 @@ impl<'a> Parser<'a> { err.span_label(sp, "while parsing this `loop` expression"); err }) - } else if self.eat_keyword(kw::Continue) { - let kind = ExprKind::Continue(self.eat_label()); - Ok(self.mk_expr(lo.to(self.prev_token.span), kind)) } else if self.eat_keyword(kw::Match) { let match_sp = self.prev_token.span; self.parse_match_expr().map_err(|mut err| { @@ -1365,6 +1376,8 @@ impl<'a> Parser<'a> { self.parse_try_block(lo) } else if self.eat_keyword(kw::Return) { self.parse_return_expr() + } else if self.eat_keyword(kw::Continue) { + self.parse_continue_expr(lo) } else if self.eat_keyword(kw::Break) { self.parse_break_expr() } else if self.eat_keyword(kw::Yield) { @@ -1461,9 +1474,8 @@ impl<'a> Parser<'a> { } else if self.eat(&token::Comma) { // Vector with two or more elements. let sep = SeqSep::trailing_allowed(token::Comma); - let (remaining_exprs, _) = self.parse_seq_to_end(close, sep, |p| p.parse_expr())?; - let mut exprs = vec![first_expr]; - exprs.extend(remaining_exprs); + let (mut exprs, _) = self.parse_seq_to_end(close, sep, |p| p.parse_expr())?; + exprs.insert(0, first_expr); ExprKind::Array(exprs) } else { // Vector with one element @@ -1496,12 +1508,13 @@ impl<'a> Parser<'a> { prior_type_ascription: self.last_type_ascription, }); (lo.to(self.prev_token.span), ExprKind::MacCall(mac)) - } else if self.check(&token::OpenDelim(Delimiter::Brace)) && - let Some(expr) = self.maybe_parse_struct_expr(&qself, &path) { - if qself.is_some() { - self.sess.gated_spans.gate(sym::more_qualified_paths, path.span); - } - return expr; + } else if self.check(&token::OpenDelim(Delimiter::Brace)) + && let Some(expr) = self.maybe_parse_struct_expr(&qself, &path) + { + if qself.is_some() { + self.sess.gated_spans.gate(sym::more_qualified_paths, path.span); + } + return expr; } else { (path.span, ExprKind::Path(qself, path)) }; @@ -1534,15 +1547,16 @@ impl<'a> Parser<'a> { && (matches!(self.token.kind, token::CloseDelim(_) | token::Comma) || self.token.is_op()) { - let lit = self.recover_unclosed_char(label_.ident, |self_| { - self_.sess.create_err(UnexpectedTokenAfterLabel { - span: self_.token.span, - remove_label: None, - enclose_in_block: None, - }) - }); + let (lit, _) = + self.recover_unclosed_char(label_.ident, Parser::mk_token_lit_char, |self_| { + self_.sess.create_err(UnexpectedTokenAfterLabel { + span: self_.token.span, + remove_label: None, + enclose_in_block: None, + }) + }); consume_colon = false; - Ok(self.mk_expr(lo, ExprKind::Lit(lit.token_lit))) + Ok(self.mk_expr(lo, ExprKind::Lit(lit))) } else if !ate_colon && (self.check_noexpect(&TokenKind::Comma) || self.check_noexpect(&TokenKind::Gt)) { @@ -1581,7 +1595,7 @@ impl<'a> Parser<'a> { vis.0 }; - // Suggestion involves adding a (as of time of writing this, unstable) labeled block. + // Suggestion involves adding a labeled block. // // If there are no breaks that may use this label, suggest removing the label and // recover to the unmodified expression. @@ -1617,12 +1631,13 @@ impl<'a> Parser<'a> { Ok(expr) } - /// Emit an error when a char is parsed as a lifetime because of a missing quote - pub(super) fn recover_unclosed_char( + /// Emit an error when a char is parsed as a lifetime because of a missing quote. + pub(super) fn recover_unclosed_char<L>( &self, lifetime: Ident, + mk_lit_char: impl FnOnce(Symbol, Span) -> L, err: impl FnOnce(&Self) -> DiagnosticBuilder<'a, ErrorGuaranteed>, - ) -> ast::MetaItemLit { + ) -> L { if let Some(mut diag) = self.sess.span_diagnostic.steal_diagnostic(lifetime.span, StashKey::LifetimeIsChar) { @@ -1644,11 +1659,7 @@ impl<'a> Parser<'a> { .emit(); } let name = lifetime.without_first_quote().name; - ast::MetaItemLit { - token_lit: token::Lit::new(token::LitKind::Char, name, None), - kind: ast::LitKind::Char(name.as_str().chars().next().unwrap_or('_')), - span: lifetime.span, - } + mk_lit_char(name, lifetime.span) } /// Recover on the syntax `do catch { ... }` suggesting `try { ... }` instead. @@ -1703,10 +1714,10 @@ impl<'a> Parser<'a> { fn parse_break_expr(&mut self) -> PResult<'a, P<Expr>> { let lo = self.prev_token.span; let mut label = self.eat_label(); - let kind = if label.is_some() && self.token == token::Colon { + let kind = if self.token == token::Colon && let Some(label) = label.take() { // The value expression can be a labeled loop, see issue #86948, e.g.: // `loop { break 'label: loop { break 'label 42; }; }` - let lexpr = self.parse_labeled_expr(label.take().unwrap(), true)?; + let lexpr = self.parse_labeled_expr(label, true)?; self.sess.emit_err(LabeledLoopInBreak { span: lexpr.span, sub: WrapExpressionInParentheses { @@ -1718,8 +1729,8 @@ impl<'a> Parser<'a> { } else if self.token != token::OpenDelim(Delimiter::Brace) || !self.restrictions.contains(Restrictions::NO_STRUCT_LITERAL) { - let expr = self.parse_expr_opt()?; - if let Some(expr) = &expr { + let mut expr = self.parse_expr_opt()?; + if let Some(expr) = &mut expr { if label.is_some() && matches!( expr.kind, @@ -1737,7 +1748,19 @@ impl<'a> Parser<'a> { BuiltinLintDiagnostics::BreakWithLabelAndLoop(expr.span), ); } + + // Recover `break label aaaaa` + if self.may_recover() + && let ExprKind::Path(None, p) = &expr.kind + && let [segment] = &*p.segments + && let &ast::PathSegment { ident, args: None, .. } = segment + && let Some(next) = self.parse_expr_opt()? + { + label = Some(self.recover_ident_into_label(ident)); + *expr = next; + } } + expr } else { None @@ -1746,6 +1769,23 @@ impl<'a> Parser<'a> { self.maybe_recover_from_bad_qpath(expr) } + /// Parse `"continue" label?`. + fn parse_continue_expr(&mut self, lo: Span) -> PResult<'a, P<Expr>> { + let mut label = self.eat_label(); + + // Recover `continue label` -> `continue 'label` + if self.may_recover() + && label.is_none() + && let Some((ident, _)) = self.token.ident() + { + self.bump(); + label = Some(self.recover_ident_into_label(ident)); + } + + let kind = ExprKind::Continue(label); + Ok(self.mk_expr(lo.to(self.prev_token.span), kind)) + } + /// Parse `"yield" expr?`. fn parse_yield_expr(&mut self) -> PResult<'a, P<Expr>> { let lo = self.prev_token.span; @@ -1764,8 +1804,8 @@ impl<'a> Parser<'a> { Some(lit) => match lit.kind { ast::LitKind::Str(symbol_unescaped, style) => Ok(ast::StrLit { style, - symbol: lit.token_lit.symbol, - suffix: lit.token_lit.suffix, + symbol: lit.symbol, + suffix: lit.suffix, span: lit.span, symbol_unescaped, }), @@ -1775,7 +1815,23 @@ impl<'a> Parser<'a> { } } - fn handle_missing_lit(&mut self) -> PResult<'a, MetaItemLit> { + pub(crate) fn mk_token_lit_char(name: Symbol, span: Span) -> (token::Lit, Span) { + (token::Lit { symbol: name, suffix: None, kind: token::Char }, span) + } + + fn mk_meta_item_lit_char(name: Symbol, span: Span) -> MetaItemLit { + ast::MetaItemLit { + symbol: name, + suffix: None, + kind: ast::LitKind::Char(name.as_str().chars().next().unwrap_or('_')), + span, + } + } + + fn handle_missing_lit<L>( + &mut self, + mk_lit_char: impl FnOnce(Symbol, Span) -> L, + ) -> PResult<'a, L> { if let token::Interpolated(inner) = &self.token.kind { let expr = match inner.as_ref() { token::NtExpr(expr) => Some(expr), @@ -1799,7 +1855,7 @@ impl<'a> Parser<'a> { // On an error path, eagerly consider a lifetime to be an unclosed character lit if self.token.is_lifetime() { let lt = self.expect_lifetime(); - Ok(self.recover_unclosed_char(lt.ident, err)) + Ok(self.recover_unclosed_char(lt.ident, mk_lit_char, err)) } else { Err(err(self)) } @@ -1808,11 +1864,13 @@ impl<'a> Parser<'a> { pub(super) fn parse_token_lit(&mut self) -> PResult<'a, (token::Lit, Span)> { self.parse_opt_token_lit() .ok_or(()) - .or_else(|()| self.handle_missing_lit().map(|lit| (lit.token_lit, lit.span))) + .or_else(|()| self.handle_missing_lit(Parser::mk_token_lit_char)) } pub(super) fn parse_meta_item_lit(&mut self) -> PResult<'a, MetaItemLit> { - self.parse_opt_meta_item_lit().ok_or(()).or_else(|()| self.handle_missing_lit()) + self.parse_opt_meta_item_lit() + .ok_or(()) + .or_else(|()| self.handle_missing_lit(Parser::mk_meta_item_lit_char)) } fn recover_after_dot(&mut self) -> Option<Token> { @@ -2015,7 +2073,7 @@ impl<'a> Parser<'a> { }); } - let (attrs, blk) = self.parse_block_common(lo, blk_mode)?; + let (attrs, blk) = self.parse_block_common(lo, blk_mode, true)?; Ok(self.mk_expr_with_attrs(blk.span, ExprKind::Block(blk, opt_label), attrs)) } @@ -2041,6 +2099,8 @@ impl<'a> Parser<'a> { ClosureBinder::NotPresent }; + let constness = self.parse_constness(Case::Sensitive); + let movability = if self.eat_keyword(kw::Static) { Movability::Static } else { Movability::Movable }; @@ -2087,6 +2147,7 @@ impl<'a> Parser<'a> { ExprKind::Closure(Box::new(ast::Closure { binder, capture_clause, + constness, asyncness, movability, fn_decl, @@ -3013,9 +3074,29 @@ impl<'a> Parser<'a> { false } + /// Converts an ident into 'label and emits an "expected a label, found an identifier" error. + fn recover_ident_into_label(&mut self, ident: Ident) -> Label { + // Convert `label` -> `'label`, + // so that nameres doesn't complain about non-existing label + let label = format!("'{}", ident.name); + let ident = Ident { name: Symbol::intern(&label), span: ident.span }; + + self.struct_span_err(ident.span, "expected a label, found an identifier") + .span_suggestion( + ident.span, + "labels start with a tick", + label, + Applicability::MachineApplicable, + ) + .emit(); + + Label { ident } + } + /// Parses `ident (COLON expr)?`. fn parse_expr_field(&mut self) -> PResult<'a, ExprField> { let attrs = self.parse_outer_attributes()?; + self.recover_diff_marker(); self.collect_tokens_trailing_token(attrs, ForceCollect::No, |this, attrs| { let lo = this.token.span; diff --git a/compiler/rustc_parse/src/parser/generics.rs b/compiler/rustc_parse/src/parser/generics.rs index fa75670b2..8ba811715 100644 --- a/compiler/rustc_parse/src/parser/generics.rs +++ b/compiler/rustc_parse/src/parser/generics.rs @@ -1,11 +1,20 @@ +use crate::errors::{WhereClauseBeforeTupleStructBody, WhereClauseBeforeTupleStructBodySugg}; + use super::{ForceCollect, Parser, TrailingToken}; +use ast::token::Delimiter; use rustc_ast::token; use rustc_ast::{ self as ast, AttrVec, GenericBounds, GenericParam, GenericParamKind, TyKind, WhereClause, }; use rustc_errors::{Applicability, PResult}; -use rustc_span::symbol::kw; +use rustc_span::symbol::{kw, Ident}; +use rustc_span::Span; + +enum PredicateOrStructBody { + Predicate(ast::WherePredicate), + StructBody(Vec<ast::FieldDef>), +} impl<'a> Parser<'a> { /// Parses bounds of a lifetime parameter `BOUND + BOUND + BOUND`, possibly with trailing `+`. @@ -240,23 +249,39 @@ impl<'a> Parser<'a> { }) } - /// Parses an optional where-clause and places it in `generics`. + /// Parses an optional where-clause. /// /// ```ignore (only-for-syntax-highlight) /// where T : Trait<U, V> + 'b, 'a : 'b /// ``` pub(super) fn parse_where_clause(&mut self) -> PResult<'a, WhereClause> { + self.parse_where_clause_common(None).map(|(clause, _)| clause) + } + + pub(super) fn parse_struct_where_clause( + &mut self, + struct_name: Ident, + body_insertion_point: Span, + ) -> PResult<'a, (WhereClause, Option<Vec<ast::FieldDef>>)> { + self.parse_where_clause_common(Some((struct_name, body_insertion_point))) + } + + fn parse_where_clause_common( + &mut self, + struct_: Option<(Ident, Span)>, + ) -> PResult<'a, (WhereClause, Option<Vec<ast::FieldDef>>)> { let mut where_clause = WhereClause { has_where_token: false, predicates: Vec::new(), span: self.prev_token.span.shrink_to_hi(), }; + let mut tuple_struct_body = None; if !self.eat_keyword(kw::Where) { - return Ok(where_clause); + return Ok((where_clause, None)); } where_clause.has_where_token = true; - let lo = self.prev_token.span; + let where_lo = self.prev_token.span; // We are considering adding generics to the `where` keyword as an alternative higher-rank // parameter syntax (as in `where<'a>` or `where<T>`. To avoid that being a breaking @@ -272,7 +297,8 @@ impl<'a> Parser<'a> { } loop { - let lo = self.token.span; + let where_sp = where_lo.to(self.prev_token.span); + let pred_lo = self.token.span; if self.check_lifetime() && self.look_ahead(1, |t| !t.is_like_plus()) { let lifetime = self.expect_lifetime(); // Bounds starting with a colon are mandatory, but possibly empty. @@ -280,13 +306,21 @@ impl<'a> Parser<'a> { let bounds = self.parse_lt_param_bounds(); where_clause.predicates.push(ast::WherePredicate::RegionPredicate( ast::WhereRegionPredicate { - span: lo.to(self.prev_token.span), + span: pred_lo.to(self.prev_token.span), lifetime, bounds, }, )); } else if self.check_type() { - where_clause.predicates.push(self.parse_ty_where_predicate()?); + match self.parse_ty_where_predicate_or_recover_tuple_struct_body( + struct_, pred_lo, where_sp, + )? { + PredicateOrStructBody::Predicate(pred) => where_clause.predicates.push(pred), + PredicateOrStructBody::StructBody(body) => { + tuple_struct_body = Some(body); + break; + } + } } else { break; } @@ -297,7 +331,7 @@ impl<'a> Parser<'a> { if self.eat_keyword_noexpect(kw::Where) { let msg = "cannot define duplicate `where` clauses on an item"; let mut err = self.struct_span_err(self.token.span, msg); - err.span_label(lo, "previous `where` clause starts here"); + err.span_label(pred_lo, "previous `where` clause starts here"); err.span_suggestion_verbose( prev_token.shrink_to_hi().to(self.prev_token.span), "consider joining the two `where` clauses into one", @@ -310,8 +344,72 @@ impl<'a> Parser<'a> { } } - where_clause.span = lo.to(self.prev_token.span); - Ok(where_clause) + where_clause.span = where_lo.to(self.prev_token.span); + Ok((where_clause, tuple_struct_body)) + } + + fn parse_ty_where_predicate_or_recover_tuple_struct_body( + &mut self, + struct_: Option<(Ident, Span)>, + pred_lo: Span, + where_sp: Span, + ) -> PResult<'a, PredicateOrStructBody> { + let mut snapshot = None; + + if let Some(struct_) = struct_ + && self.may_recover() + && self.token.kind == token::OpenDelim(Delimiter::Parenthesis) + { + snapshot = Some((struct_, self.create_snapshot_for_diagnostic())); + }; + + match self.parse_ty_where_predicate() { + Ok(pred) => Ok(PredicateOrStructBody::Predicate(pred)), + Err(type_err) => { + let Some(((struct_name, body_insertion_point), mut snapshot)) = snapshot else { + return Err(type_err); + }; + + // Check if we might have encountered an out of place tuple struct body. + match snapshot.parse_tuple_struct_body() { + // Since we don't know the exact reason why we failed to parse the + // predicate (we might have stumbled upon something bogus like `(T): ?`), + // employ a simple heuristic to weed out some pathological cases: + // Look for a semicolon (strong indicator) or anything that might mark + // the end of the item (weak indicator) following the body. + Ok(body) + if matches!(snapshot.token.kind, token::Semi | token::Eof) + || snapshot.token.can_begin_item() => + { + type_err.cancel(); + + let body_sp = pred_lo.to(snapshot.prev_token.span); + let map = self.sess.source_map(); + + self.sess.emit_err(WhereClauseBeforeTupleStructBody { + span: where_sp, + name: struct_name.span, + body: body_sp, + sugg: map.span_to_snippet(body_sp).ok().map(|body| { + WhereClauseBeforeTupleStructBodySugg { + left: body_insertion_point.shrink_to_hi(), + snippet: body, + right: map.end_point(where_sp).to(body_sp), + } + }), + }); + + self.restore_snapshot(snapshot); + Ok(PredicateOrStructBody::StructBody(body)) + } + Ok(_) => Err(type_err), + Err(body_err) => { + body_err.cancel(); + Err(type_err) + } + } + } + } } fn parse_ty_where_predicate(&mut self) -> PResult<'a, ast::WherePredicate> { diff --git a/compiler/rustc_parse/src/parser/item.rs b/compiler/rustc_parse/src/parser/item.rs index 03f25392a..53680a82b 100644 --- a/compiler/rustc_parse/src/parser/item.rs +++ b/compiler/rustc_parse/src/parser/item.rs @@ -3,6 +3,7 @@ use crate::errors::{DocCommentDoesNotDocumentAnything, UseEmptyBlockNotSemi}; use super::diagnostics::{dummy_arg, ConsumeClosingDelim}; use super::ty::{AllowPlus, RecoverQPath, RecoverReturnSign}; use super::{AttrWrapper, FollowedByType, ForceCollect, Parser, PathStyle, TrailingToken}; +use crate::errors::FnTypoWithImpl; use rustc_ast::ast::*; use rustc_ast::ptr::P; use rustc_ast::token::{self, Delimiter, TokenKind}; @@ -21,10 +22,8 @@ use rustc_span::lev_distance::lev_distance; use rustc_span::source_map::{self, Span}; use rustc_span::symbol::{kw, sym, Ident, Symbol}; use rustc_span::DUMMY_SP; -use std::convert::TryFrom; use std::mem; use thin_vec::ThinVec; -use tracing::debug; impl<'a> Parser<'a> { /// Parses a source module as a crate. This is the main entry point for the parser. @@ -99,7 +98,9 @@ impl<'a> Parser<'a> { fn_parse_mode: FnParseMode, force_collect: ForceCollect, ) -> PResult<'a, Option<Item>> { + self.recover_diff_marker(); let attrs = self.parse_outer_attributes()?; + self.recover_diff_marker(); self.parse_item_common(attrs, true, false, fn_parse_mode, force_collect) } @@ -705,11 +706,12 @@ impl<'a> Parser<'a> { if self.recover_doc_comment_before_brace() { continue; } + self.recover_diff_marker(); match parse_item(self) { Ok(None) => { - let is_unnecessary_semicolon = !items.is_empty() + let mut is_unnecessary_semicolon = !items.is_empty() // When the close delim is `)` in a case like the following, `token.kind` is expected to be `token::CloseDelim(Delimiter::Parenthesis)`, - // but the actual `token.kind` is `token::CloseDelim(Delimiter::Bracket)`. + // but the actual `token.kind` is `token::CloseDelim(Delimiter::Brace)`. // This is because the `token.kind` of the close delim is treated as the same as // that of the open delim in `TokenTreesReader::parse_token_tree`, even if the delimiters of them are different. // Therefore, `token.kind` should not be compared here. @@ -728,7 +730,13 @@ impl<'a> Parser<'a> { .span_to_snippet(self.prev_token.span) .map_or(false, |snippet| snippet == "}") && self.token.kind == token::Semi; - let semicolon_span = self.token.span; + let mut semicolon_span = self.token.span; + if !is_unnecessary_semicolon { + // #105369, Detect spurious `;` before assoc fn body + is_unnecessary_semicolon = self.token == token::OpenDelim(Delimiter::Brace) + && self.prev_token.kind == token::Semi; + semicolon_span = self.prev_token.span; + } // We have to bail or we'll potentially never make progress. let non_item_span = self.token.span; let is_let = self.token.is_keyword(kw::Let); @@ -1034,8 +1042,11 @@ impl<'a> Parser<'a> { /// USE_TREE_LIST = Ø | (USE_TREE `,`)* USE_TREE [`,`] /// ``` fn parse_use_tree_list(&mut self) -> PResult<'a, Vec<(UseTree, ast::NodeId)>> { - self.parse_delim_comma_seq(Delimiter::Brace, |p| Ok((p.parse_use_tree()?, DUMMY_NODE_ID))) - .map(|(r, _)| r) + self.parse_delim_comma_seq(Delimiter::Brace, |p| { + p.recover_diff_marker(); + Ok((p.parse_use_tree()?, DUMMY_NODE_ID)) + }) + .map(|(r, _)| r) } fn parse_rename(&mut self) -> PResult<'a, Option<Ident>> { @@ -1374,7 +1385,9 @@ impl<'a> Parser<'a> { } fn parse_enum_variant(&mut self) -> PResult<'a, Option<Variant>> { + self.recover_diff_marker(); let variant_attrs = self.parse_outer_attributes()?; + self.recover_diff_marker(); self.collect_tokens_trailing_token( variant_attrs, ForceCollect::No, @@ -1441,8 +1454,16 @@ impl<'a> Parser<'a> { // struct. let vdata = if self.token.is_keyword(kw::Where) { - generics.where_clause = self.parse_where_clause()?; - if self.eat(&token::Semi) { + let tuple_struct_body; + (generics.where_clause, tuple_struct_body) = + self.parse_struct_where_clause(class_name, generics.span)?; + + if let Some(body) = tuple_struct_body { + // If we see a misplaced tuple struct body: `struct Foo<T> where T: Copy, (T);` + let body = VariantData::Tuple(body, DUMMY_NODE_ID); + self.expect_semi()?; + body + } else if self.eat(&token::Semi) { // If we see a: `struct Foo<T> where T: Copy;` style decl. VariantData::Unit(DUMMY_NODE_ID) } else { @@ -1562,15 +1583,38 @@ impl<'a> Parser<'a> { Ok((fields, recovered)) } - fn parse_tuple_struct_body(&mut self) -> PResult<'a, Vec<FieldDef>> { + pub(super) fn parse_tuple_struct_body(&mut self) -> PResult<'a, Vec<FieldDef>> { // This is the case where we find `struct Foo<T>(T) where T: Copy;` // Unit like structs are handled in parse_item_struct function self.parse_paren_comma_seq(|p| { let attrs = p.parse_outer_attributes()?; p.collect_tokens_trailing_token(attrs, ForceCollect::No, |p, attrs| { + let mut snapshot = None; + if p.is_diff_marker(&TokenKind::BinOp(token::Shl), &TokenKind::Lt) { + // Account for `<<<<<<<` diff markers. We can't proactively error here because + // that can be a valid type start, so we snapshot and reparse only we've + // encountered another parse error. + snapshot = Some(p.create_snapshot_for_diagnostic()); + } let lo = p.token.span; - let vis = p.parse_visibility(FollowedByType::Yes)?; - let ty = p.parse_ty()?; + let vis = match p.parse_visibility(FollowedByType::Yes) { + Ok(vis) => vis, + Err(err) => { + if let Some(ref mut snapshot) = snapshot { + snapshot.recover_diff_marker(); + } + return Err(err); + } + }; + let ty = match p.parse_ty() { + Ok(ty) => ty, + Err(err) => { + if let Some(ref mut snapshot) = snapshot { + snapshot.recover_diff_marker(); + } + return Err(err); + } + }; Ok(( FieldDef { @@ -1591,7 +1635,9 @@ impl<'a> Parser<'a> { /// Parses an element of a struct declaration. fn parse_field_def(&mut self, adt_ty: &str) -> PResult<'a, FieldDef> { + self.recover_diff_marker(); let attrs = self.parse_outer_attributes()?; + self.recover_diff_marker(); self.collect_tokens_trailing_token(attrs, ForceCollect::No, |this, attrs| { let lo = this.token.span; let vis = this.parse_visibility(FollowedByType::No)?; @@ -2126,11 +2172,26 @@ impl<'a> Parser<'a> { vis: &Visibility, case: Case, ) -> PResult<'a, (Ident, FnSig, Generics, Option<P<Block>>)> { + let fn_span = self.token.span; let header = self.parse_fn_front_matter(vis, case)?; // `const ... fn` let ident = self.parse_ident()?; // `foo` let mut generics = self.parse_generics()?; // `<'a, T, ...>` - let decl = - self.parse_fn_decl(fn_parse_mode.req_name, AllowPlus::Yes, RecoverReturnSign::Yes)?; // `(p: u8, ...)` + let decl = match self.parse_fn_decl( + fn_parse_mode.req_name, + AllowPlus::Yes, + RecoverReturnSign::Yes, + ) { + Ok(decl) => decl, + Err(old_err) => { + // If we see `for Ty ...` then user probably meant `impl` item. + if self.token.is_keyword(kw::For) { + old_err.cancel(); + return Err(self.sess.create_err(FnTypoWithImpl { fn_span })); + } else { + return Err(old_err); + } + } + }; generics.where_clause = self.parse_where_clause()?; // `where T: Ord` let mut sig_hi = self.prev_token.span; @@ -2161,7 +2222,8 @@ impl<'a> Parser<'a> { *sig_hi = self.prev_token.span; (AttrVec::new(), None) } else if self.check(&token::OpenDelim(Delimiter::Brace)) || self.token.is_whole_block() { - self.parse_inner_attrs_and_block().map(|(attrs, body)| (attrs, Some(body)))? + self.parse_block_common(self.token.span, BlockCheckMode::Default, false) + .map(|(attrs, body)| (attrs, Some(body)))? } else if self.token.kind == token::Eq { // Recover `fn foo() = $expr;`. self.bump(); // `=` @@ -2403,10 +2465,11 @@ impl<'a> Parser<'a> { } /// Parses the parameter list of a function, including the `(` and `)` delimiters. - fn parse_fn_params(&mut self, req_name: ReqName) -> PResult<'a, Vec<Param>> { + pub(super) fn parse_fn_params(&mut self, req_name: ReqName) -> PResult<'a, Vec<Param>> { let mut first_param = true; // Parse the arguments, starting out with `self` being allowed... let (mut params, _) = self.parse_paren_comma_seq(|p| { + p.recover_diff_marker(); let param = p.parse_param_general(req_name, first_param).or_else(|mut e| { e.emit(); let lo = p.prev_token.span; @@ -2444,7 +2507,6 @@ impl<'a> Parser<'a> { }; let (pat, ty) = if is_name_required || this.is_named_param() { debug!("parse_param_general parse_pat (is_name_required:{})", is_name_required); - let (pat, colon) = this.parse_fn_param_pat_colon()?; if !colon { let mut err = this.unexpected::<()>().unwrap_err(); diff --git a/compiler/rustc_parse/src/parser/mod.rs b/compiler/rustc_parse/src/parser/mod.rs index bebb01266..ffb23b50a 100644 --- a/compiler/rustc_parse/src/parser/mod.rs +++ b/compiler/rustc_parse/src/parser/mod.rs @@ -25,7 +25,7 @@ use rustc_ast::tokenstream::{TokenStream, TokenTree}; use rustc_ast::util::case::Case; use rustc_ast::AttrId; use rustc_ast::DUMMY_NODE_ID; -use rustc_ast::{self as ast, AnonConst, AttrStyle, AttrVec, Const, DelimArgs, Extern}; +use rustc_ast::{self as ast, AnonConst, AttrStyle, Const, DelimArgs, Extern}; use rustc_ast::{Async, AttrArgs, AttrArgsEq, Expr, ExprKind, MacDelimiter, Mutability, StrLit}; use rustc_ast::{HasAttrs, HasTokens, Unsafe, Visibility, VisibilityKind}; use rustc_ast_pretty::pprust; @@ -317,7 +317,7 @@ impl TokenCursor { // required to wrap the text. E.g. // - `abc d` is wrapped as `r"abc d"` (num_of_hashes = 0) // - `abc "d"` is wrapped as `r#"abc "d""#` (num_of_hashes = 1) - // - `abc "##d##"` is wrapped as `r###"abc "d""###` (num_of_hashes = 3) + // - `abc "##d##"` is wrapped as `r###"abc ##"d"##"###` (num_of_hashes = 3) let mut num_of_hashes = 0; let mut count = 0; for ch in data.as_str().chars() { @@ -542,9 +542,9 @@ impl<'a> Parser<'a> { } } - /// Expect next token to be edible or inedible token. If edible, + /// Expect next token to be edible or inedible token. If edible, /// then consume it; if inedible, then return without consuming - /// anything. Signal a fatal error if next token is unexpected. + /// anything. Signal a fatal error if next token is unexpected. pub fn expect_one_of( &mut self, edible: &[TokenKind], @@ -736,6 +736,16 @@ impl<'a> Parser<'a> { self.check_or_expected(self.token.can_begin_const_arg(), TokenType::Const) } + fn check_const_closure(&self) -> bool { + self.is_keyword_ahead(0, &[kw::Const]) + && self.look_ahead(1, |t| match &t.kind { + token::Ident(kw::Move | kw::Static | kw::Async, _) + | token::OrOr + | token::BinOp(token::Or) => true, + _ => false, + }) + } + fn check_inline_const(&self, dist: usize) -> bool { self.is_keyword_ahead(dist, &[kw::Const]) && self.look_ahead(dist + 1, |t| match &t.kind { @@ -1217,11 +1227,7 @@ impl<'a> Parser<'a> { value: self.mk_expr(blk.span, ExprKind::Block(blk, None)), }; let blk_span = anon_const.value.span; - Ok(self.mk_expr_with_attrs( - span.to(blk_span), - ExprKind::ConstBlock(anon_const), - AttrVec::from(attrs), - )) + Ok(self.mk_expr_with_attrs(span.to(blk_span), ExprKind::ConstBlock(anon_const), attrs)) } /// Parses mutability (`mut` or nothing). @@ -1503,6 +1509,10 @@ impl<'a> Parser<'a> { pub fn clear_expected_tokens(&mut self) { self.expected_tokens.clear(); } + + pub fn approx_token_stream_pos(&self) -> usize { + self.token_cursor.num_next_calls + } } pub(crate) fn make_unclosed_delims_error( diff --git a/compiler/rustc_parse/src/parser/pat.rs b/compiler/rustc_parse/src/parser/pat.rs index cbeec951e..e73a17ced 100644 --- a/compiler/rustc_parse/src/parser/pat.rs +++ b/compiler/rustc_parse/src/parser/pat.rs @@ -411,16 +411,20 @@ impl<'a> Parser<'a> { { // Recover a `'a` as a `'a'` literal let lt = self.expect_lifetime(); - let lit = self.recover_unclosed_char(lt.ident, |self_| { - let expected = expected.unwrap_or("pattern"); - let msg = - format!("expected {}, found {}", expected, super::token_descr(&self_.token)); + let (lit, _) = + self.recover_unclosed_char(lt.ident, Parser::mk_token_lit_char, |self_| { + let expected = expected.unwrap_or("pattern"); + let msg = format!( + "expected {}, found {}", + expected, + super::token_descr(&self_.token) + ); - let mut err = self_.struct_span_err(self_.token.span, &msg); - err.span_label(self_.token.span, format!("expected {}", expected)); - err - }); - PatKind::Lit(self.mk_expr(lo, ExprKind::Lit(lit.token_lit))) + let mut err = self_.struct_span_err(self_.token.span, &msg); + err.span_label(self_.token.span, format!("expected {}", expected)); + err + }); + PatKind::Lit(self.mk_expr(lo, ExprKind::Lit(lit))) } else { // Try to parse everything else as literal with optional minus match self.parse_literal_maybe_minus() { @@ -465,7 +469,7 @@ impl<'a> Parser<'a> { /// Try to recover the more general form `intersect ::= $pat_lhs @ $pat_rhs`. /// /// Allowed binding patterns generated by `binding ::= ref? mut? $ident @ $pat_rhs` - /// should already have been parsed by now at this point, + /// should already have been parsed by now at this point, /// if the next token is `@` then we can try to parse the more general form. /// /// Consult `parse_pat_ident` for the `binding` grammar. @@ -487,17 +491,6 @@ impl<'a> Parser<'a> { if let PatKind::Ident(_, _, sub @ None) = &mut rhs.kind { // The user inverted the order, so help them fix that. - let mut applicability = Applicability::MachineApplicable; - // FIXME(bindings_after_at): Remove this code when stabilizing the feature. - lhs.walk(&mut |p| match p.kind { - // `check_match` is unhappy if the subpattern has a binding anywhere. - PatKind::Ident(..) => { - applicability = Applicability::MaybeIncorrect; - false // Short-circuit. - } - _ => true, - }); - let lhs_span = lhs.span; // Move the LHS into the RHS as a subpattern. // The RHS is now the full pattern. @@ -506,7 +499,12 @@ impl<'a> Parser<'a> { self.struct_span_err(sp, "pattern on wrong side of `@`") .span_label(lhs_span, "pattern on the left, should be on the right") .span_label(rhs.span, "binding on the right, should be on the left") - .span_suggestion(sp, "switch the order", pprust::pat_to_string(&rhs), applicability) + .span_suggestion( + sp, + "switch the order", + pprust::pat_to_string(&rhs), + Applicability::MachineApplicable, + ) .emit(); } else { // The special case above doesn't apply so we may have e.g. `A(x) @ B(y)`. diff --git a/compiler/rustc_parse/src/parser/path.rs b/compiler/rustc_parse/src/parser/path.rs index 2d432e3f5..5333d3b85 100644 --- a/compiler/rustc_parse/src/parser/path.rs +++ b/compiler/rustc_parse/src/parser/path.rs @@ -277,8 +277,7 @@ impl<'a> Parser<'a> { if let Some(arg) = args .iter() .rev() - .skip_while(|arg| matches!(arg, AngleBracketedArg::Constraint(_))) - .next() + .find(|arg| !matches!(arg, AngleBracketedArg::Constraint(_))) { err.span_suggestion_verbose( arg.span().shrink_to_hi(), diff --git a/compiler/rustc_parse/src/parser/stmt.rs b/compiler/rustc_parse/src/parser/stmt.rs index 42197e637..4ff9927aa 100644 --- a/compiler/rustc_parse/src/parser/stmt.rs +++ b/compiler/rustc_parse/src/parser/stmt.rs @@ -164,7 +164,10 @@ impl<'a> Parser<'a> { // Perform this outside of the `collect_tokens_trailing_token` closure, // since our outer attributes do not apply to this part of the expression let expr = self.with_res(Restrictions::STMT_EXPR, |this| { - this.parse_assoc_expr_with(0, LhsExpr::AlreadyParsed(expr)) + this.parse_assoc_expr_with( + 0, + LhsExpr::AlreadyParsed { expr, starts_statement: true }, + ) })?; Ok(self.mk_stmt(lo.to(self.prev_token.span), StmtKind::Expr(expr))) } else { @@ -198,7 +201,10 @@ impl<'a> Parser<'a> { let e = self.mk_expr(lo.to(hi), ExprKind::MacCall(mac)); let e = self.maybe_recover_from_bad_qpath(e)?; let e = self.parse_dot_or_call_expr_with(e, lo, attrs)?; - let e = self.parse_assoc_expr_with(0, LhsExpr::AlreadyParsed(e))?; + let e = self.parse_assoc_expr_with( + 0, + LhsExpr::AlreadyParsed { expr: e, starts_statement: false }, + )?; StmtKind::Expr(e) }; Ok(self.mk_stmt(lo.to(hi), kind)) @@ -498,7 +504,7 @@ impl<'a> Parser<'a> { /// Parses a block. Inner attributes are allowed. pub(super) fn parse_inner_attrs_and_block(&mut self) -> PResult<'a, (AttrVec, P<Block>)> { - self.parse_block_common(self.token.span, BlockCheckMode::Default) + self.parse_block_common(self.token.span, BlockCheckMode::Default, true) } /// Parses a block. Inner attributes are allowed. @@ -506,16 +512,23 @@ impl<'a> Parser<'a> { &mut self, lo: Span, blk_mode: BlockCheckMode, + can_be_struct_literal: bool, ) -> PResult<'a, (AttrVec, P<Block>)> { maybe_whole!(self, NtBlock, |x| (AttrVec::new(), x)); + let maybe_ident = self.prev_token.clone(); self.maybe_recover_unexpected_block_label(); if !self.eat(&token::OpenDelim(Delimiter::Brace)) { return self.error_block_no_opening_brace(); } let attrs = self.parse_inner_attributes()?; - let tail = match self.maybe_suggest_struct_literal(lo, blk_mode) { + let tail = match self.maybe_suggest_struct_literal( + lo, + blk_mode, + maybe_ident, + can_be_struct_literal, + ) { Some(tail) => tail?, None => self.parse_block_tail(lo, blk_mode, AttemptLocalParseRecovery::Yes)?, }; @@ -531,13 +544,23 @@ impl<'a> Parser<'a> { recover: AttemptLocalParseRecovery, ) -> PResult<'a, P<Block>> { let mut stmts = vec![]; + let mut snapshot = None; while !self.eat(&token::CloseDelim(Delimiter::Brace)) { if self.token == token::Eof { break; } + if self.is_diff_marker(&TokenKind::BinOp(token::Shl), &TokenKind::Lt) { + // Account for `<<<<<<<` diff markers. We can't proactively error here because + // that can be a valid path start, so we snapshot and reparse only we've + // encountered another parse error. + snapshot = Some(self.create_snapshot_for_diagnostic()); + } let stmt = match self.parse_full_stmt(recover) { Err(mut err) if recover.yes() => { self.maybe_annotate_with_ascription(&mut err, false); + if let Some(ref mut snapshot) = snapshot { + snapshot.recover_diff_marker(); + } err.emit(); self.recover_stmt_(SemiColonMode::Ignore, BlockMode::Ignore); Some(self.mk_stmt_err(self.token.span)) diff --git a/compiler/rustc_parse/src/parser/ty.rs b/compiler/rustc_parse/src/parser/ty.rs index b7206b576..867974672 100644 --- a/compiler/rustc_parse/src/parser/ty.rs +++ b/compiler/rustc_parse/src/parser/ty.rs @@ -1,8 +1,9 @@ use super::{Parser, PathStyle, TokenType}; -use crate::errors::{FnPtrWithGenerics, FnPtrWithGenericsSugg}; +use crate::errors::{ExpectedFnPathFoundFnKeyword, FnPtrWithGenerics, FnPtrWithGenericsSugg}; use crate::{maybe_recover_from_interpolated_ty_qpath, maybe_whole}; +use ast::DUMMY_NODE_ID; use rustc_ast::ptr::P; use rustc_ast::token::{self, Delimiter, Token, TokenKind}; use rustc_ast::util::case::Case; @@ -12,7 +13,9 @@ use rustc_ast::{ }; use rustc_errors::{pluralize, struct_span_err, Applicability, PResult}; use rustc_span::source_map::Span; -use rustc_span::symbol::{kw, sym}; +use rustc_span::symbol::{kw, sym, Ident}; +use rustc_span::Symbol; +use thin_vec::thin_vec; /// Any `?` or `~const` modifiers that appear at the start of a bound. struct BoundModifiers { @@ -501,7 +504,7 @@ impl<'a> Parser<'a> { self.bump_with((dyn_tok, dyn_tok_sp)); } let ty = self.parse_ty_no_plus()?; - Ok(TyKind::Rptr(opt_lifetime, MutTy { ty, mutbl })) + Ok(TyKind::Ref(opt_lifetime, MutTy { ty, mutbl })) } // Parses the `typeof(EXPR)`. @@ -613,6 +616,25 @@ impl<'a> Parser<'a> { /// Parses an `impl B0 + ... + Bn` type. fn parse_impl_ty(&mut self, impl_dyn_multi: &mut bool) -> PResult<'a, TyKind> { // Always parse bounds greedily for better error recovery. + if self.token.is_lifetime() { + self.look_ahead(1, |t| { + if let token::Ident(symname, _) = t.kind { + // parse pattern with "'a Sized" we're supposed to give suggestion like + // "'a + Sized" + self.struct_span_err( + self.token.span, + &format!("expected `+` between lifetime and {}", symname), + ) + .span_suggestion_verbose( + self.token.span.shrink_to_hi(), + "add `+`", + " +", + Applicability::MaybeIncorrect, + ) + .emit(); + } + }) + } let bounds = self.parse_generic_bounds(None)?; *impl_dyn_multi = bounds.len() > 1 || self.prev_token.kind == TokenKind::BinOp(token::Plus); Ok(TyKind::ImplTrait(ast::DUMMY_NODE_ID, bounds)) @@ -705,12 +727,15 @@ impl<'a> Parser<'a> { let mut bounds = Vec::new(); let mut negative_bounds = Vec::new(); + // In addition to looping while we find generic bounds: + // We continue even if we find a keyword. This is necessary for error recovery on, + // for example, `impl fn()`. The only keyword that can go after generic bounds is + // `where`, so stop if it's it. + // We also continue if we find types (not traits), again for error recovery. while self.can_begin_bound() - // Continue even if we find a keyword. - // This is necessary for error recover on, for example, `impl fn()`. - // - // The only keyword that can go after generic bounds is `where`, so stop if it's it. - || (self.token.is_reserved_ident() && !self.token.is_keyword(kw::Where)) + || (self.may_recover() + && (self.token.can_begin_type() + || (self.token.is_reserved_ident() && !self.token.is_keyword(kw::Where)))) { if self.token.is_keyword(kw::Dyn) { // Account for `&dyn Trait + dyn Other`. @@ -911,8 +936,50 @@ impl<'a> Parser<'a> { has_parens: bool, modifiers: BoundModifiers, ) -> PResult<'a, GenericBound> { - let lifetime_defs = self.parse_late_bound_lifetime_defs()?; - let path = self.parse_path(PathStyle::Type)?; + let mut lifetime_defs = self.parse_late_bound_lifetime_defs()?; + let mut path = if self.token.is_keyword(kw::Fn) + && self.look_ahead(1, |tok| tok.kind == TokenKind::OpenDelim(Delimiter::Parenthesis)) + && let Some(path) = self.recover_path_from_fn() + { + path + } else if !self.token.is_path_start() && self.token.can_begin_type() { + let ty = self.parse_ty_no_plus()?; + // Instead of finding a path (a trait), we found a type. + let mut err = self.struct_span_err(ty.span, "expected a trait, found type"); + + // If we can recover, try to extract a path from the type. Note + // that we do not use the try operator when parsing the type because + // if it fails then we get a parser error which we don't want (we're trying + // to recover from errors, not make more). + let path = if self.may_recover() + && matches!(ty.kind, TyKind::Ptr(..) | TyKind::Ref(..)) + && let TyKind::Path(_, path) = &ty.peel_refs().kind { + // Just get the indirection part of the type. + let span = ty.span.until(path.span); + + err.span_suggestion_verbose( + span, + "consider removing the indirection", + "", + Applicability::MaybeIncorrect, + ); + + path.clone() + } else { + return Err(err); + }; + + err.emit(); + + path + } else { + self.parse_path(PathStyle::Type)? + }; + + if self.may_recover() && self.token == TokenKind::OpenDelim(Delimiter::Parenthesis) { + self.recover_fn_trait_with_lifetime_params(&mut path, &mut lifetime_defs)?; + } + if has_parens { if self.token.is_like_plus() { // Someone has written something like `&dyn (Trait + Other)`. The correct code @@ -941,6 +1008,38 @@ impl<'a> Parser<'a> { Ok(GenericBound::Trait(poly_trait, modifier)) } + // recovers a `Fn(..)` parenthesized-style path from `fn(..)` + fn recover_path_from_fn(&mut self) -> Option<ast::Path> { + let fn_token_span = self.token.span; + self.bump(); + let args_lo = self.token.span; + let snapshot = self.create_snapshot_for_diagnostic(); + match self.parse_fn_decl(|_| false, AllowPlus::No, RecoverReturnSign::OnlyFatArrow) { + Ok(decl) => { + self.sess.emit_err(ExpectedFnPathFoundFnKeyword { fn_token_span }); + Some(ast::Path { + span: fn_token_span.to(self.prev_token.span), + segments: thin_vec![ast::PathSegment { + ident: Ident::new(Symbol::intern("Fn"), fn_token_span), + id: DUMMY_NODE_ID, + args: Some(P(ast::GenericArgs::Parenthesized(ast::ParenthesizedArgs { + span: args_lo.to(self.prev_token.span), + inputs: decl.inputs.iter().map(|a| a.ty.clone()).collect(), + inputs_span: args_lo.until(decl.output.span()), + output: decl.output.clone(), + }))), + }], + tokens: None, + }) + } + Err(diag) => { + diag.cancel(); + self.restore_snapshot(snapshot); + None + } + } + } + /// Optionally parses `for<$generic_params>`. pub(super) fn parse_late_bound_lifetime_defs(&mut self) -> PResult<'a, Vec<GenericParam>> { if self.eat_keyword(kw::For) { @@ -955,6 +1054,92 @@ impl<'a> Parser<'a> { } } + /// Recover from `Fn`-family traits (Fn, FnMut, FnOnce) with lifetime arguments + /// (e.g. `FnOnce<'a>(&'a str) -> bool`). Up to generic arguments have already + /// been eaten. + fn recover_fn_trait_with_lifetime_params( + &mut self, + fn_path: &mut ast::Path, + lifetime_defs: &mut Vec<GenericParam>, + ) -> PResult<'a, ()> { + let fn_path_segment = fn_path.segments.last_mut().unwrap(); + let generic_args = if let Some(p_args) = &fn_path_segment.args { + p_args.clone().into_inner() + } else { + // Normally it wouldn't come here because the upstream should have parsed + // generic parameters (otherwise it's impossible to call this function). + return Ok(()); + }; + let lifetimes = + if let ast::GenericArgs::AngleBracketed(ast::AngleBracketedArgs { span: _, args }) = + &generic_args + { + args.into_iter() + .filter_map(|arg| { + if let ast::AngleBracketedArg::Arg(generic_arg) = arg + && let ast::GenericArg::Lifetime(lifetime) = generic_arg { + Some(lifetime) + } else { + None + } + }) + .collect() + } else { + Vec::new() + }; + // Only try to recover if the trait has lifetime params. + if lifetimes.is_empty() { + return Ok(()); + } + + // Parse `(T, U) -> R`. + let inputs_lo = self.token.span; + let inputs: Vec<_> = + self.parse_fn_params(|_| false)?.into_iter().map(|input| input.ty).collect(); + let inputs_span = inputs_lo.to(self.prev_token.span); + let output = self.parse_ret_ty(AllowPlus::No, RecoverQPath::No, RecoverReturnSign::No)?; + let args = ast::ParenthesizedArgs { + span: fn_path_segment.span().to(self.prev_token.span), + inputs, + inputs_span, + output, + } + .into(); + *fn_path_segment = + ast::PathSegment { ident: fn_path_segment.ident, args, id: ast::DUMMY_NODE_ID }; + + // Convert parsed `<'a>` in `Fn<'a>` into `for<'a>`. + let mut generic_params = lifetimes + .iter() + .map(|lt| GenericParam { + id: lt.id, + ident: lt.ident, + attrs: ast::AttrVec::new(), + bounds: Vec::new(), + is_placeholder: false, + kind: ast::GenericParamKind::Lifetime, + colon_span: None, + }) + .collect::<Vec<GenericParam>>(); + lifetime_defs.append(&mut generic_params); + + let generic_args_span = generic_args.span(); + let mut err = + self.struct_span_err(generic_args_span, "`Fn` traits cannot take lifetime parameters"); + let snippet = format!( + "for<{}> ", + lifetimes.iter().map(|lt| lt.ident.as_str()).intersperse(", ").collect::<String>(), + ); + let before_fn_path = fn_path.span.shrink_to_lo(); + err.multipart_suggestion( + "consider using a higher-ranked trait bound instead", + vec![(generic_args_span, "".to_owned()), (before_fn_path, snippet)], + Applicability::MaybeIncorrect, + ) + .emit(); + Ok(()) + } + pub(super) fn check_lifetime(&mut self) -> bool { self.expected_tokens.push(TokenType::Lifetime); self.token.is_lifetime() diff --git a/compiler/rustc_parse_format/Cargo.toml b/compiler/rustc_parse_format/Cargo.toml index fcc68b3a2..72da398d3 100644 --- a/compiler/rustc_parse_format/Cargo.toml +++ b/compiler/rustc_parse_format/Cargo.toml @@ -5,3 +5,4 @@ edition = "2021" [dependencies] rustc_lexer = { path = "../rustc_lexer" } +rustc_data_structures = { path = "../rustc_data_structures" } diff --git a/compiler/rustc_parse_format/src/lib.rs b/compiler/rustc_parse_format/src/lib.rs index 0113eb4e3..7b016cada 100644 --- a/compiler/rustc_parse_format/src/lib.rs +++ b/compiler/rustc_parse_format/src/lib.rs @@ -38,6 +38,31 @@ impl InnerSpan { } } +/// The location and before/after width of a character whose width has changed from its source code +/// representation +#[derive(Copy, Clone, PartialEq, Eq)] +pub struct InnerWidthMapping { + /// Index of the character in the source + pub position: usize, + /// The inner width in characters + pub before: usize, + /// The transformed width in characters + pub after: usize, +} + +impl InnerWidthMapping { + pub fn new(position: usize, before: usize, after: usize) -> InnerWidthMapping { + InnerWidthMapping { position, before, after } + } +} + +/// Whether the input string is a literal. If yes, it contains the inner width mappings. +#[derive(Clone, PartialEq, Eq)] +enum InputStringKind { + NotALiteral, + Literal { width_mappings: Vec<InnerWidthMapping> }, +} + /// The type of format string that we are parsing. #[derive(Copy, Clone, Debug, Eq, PartialEq)] pub enum ParseMode { @@ -58,13 +83,13 @@ impl InnerOffset { /// A piece is a portion of the format string which represents the next part /// to emit. These are emitted as a stream by the `Parser` class. -#[derive(Copy, Clone, Debug, PartialEq)] +#[derive(Clone, Debug, PartialEq)] pub enum Piece<'a> { /// A literal string which should directly be emitted String(&'a str), /// This describes that formatting should process the next argument (as /// specified inside) for emission. - NextArgument(Argument<'a>), + NextArgument(Box<Argument<'a>>), } /// Representation of an argument specification. @@ -200,8 +225,8 @@ pub struct Parser<'a> { style: Option<usize>, /// Start and end byte offset of every successfully parsed argument pub arg_places: Vec<InnerSpan>, - /// Characters that need to be shifted - skips: Vec<usize>, + /// Characters whose length has been changed from their in-code representation + width_map: Vec<InnerWidthMapping>, /// Span of the last opening brace seen, used for error reporting last_opening_brace: Option<InnerSpan>, /// Whether the source string is comes from `println!` as opposed to `format!` or `print!` @@ -224,7 +249,7 @@ impl<'a> Iterator for Parser<'a> { '{' => { let curr_last_brace = self.last_opening_brace; let byte_pos = self.to_span_index(pos); - let lbrace_end = self.to_span_index(pos + 1); + let lbrace_end = InnerOffset(byte_pos.0 + self.to_span_width(pos)); self.last_opening_brace = Some(byte_pos.to(lbrace_end)); self.cur.next(); if self.consume('{') { @@ -233,18 +258,21 @@ impl<'a> Iterator for Parser<'a> { Some(String(self.string(pos + 1))) } else { let arg = self.argument(lbrace_end); - if let Some(rbrace_byte_idx) = self.must_consume('}') { - let lbrace_inner_offset = self.to_span_index(pos); - let rbrace_inner_offset = self.to_span_index(rbrace_byte_idx); + if let Some(rbrace_pos) = self.must_consume('}') { if self.is_literal { + let lbrace_byte_pos = self.to_span_index(pos); + let rbrace_byte_pos = self.to_span_index(rbrace_pos); + + let width = self.to_span_width(rbrace_pos); + self.arg_places.push( - lbrace_inner_offset.to(InnerOffset(rbrace_inner_offset.0 + 1)), + lbrace_byte_pos.to(InnerOffset(rbrace_byte_pos.0 + width)), ); } } else { self.suggest_positional_arg_instead_of_captured_arg(arg); } - Some(NextArgument(arg)) + Some(NextArgument(Box::new(arg))) } } '}' => { @@ -285,7 +313,12 @@ impl<'a> Parser<'a> { append_newline: bool, mode: ParseMode, ) -> Parser<'a> { - let (skips, is_literal) = find_skips_from_snippet(snippet, style); + let input_string_kind = find_width_map_from_snippet(snippet, style); + let (width_map, is_literal) = match input_string_kind { + InputStringKind::Literal { width_mappings } => (width_mappings, true), + InputStringKind::NotALiteral => (Vec::new(), false), + }; + Parser { mode, input: s, @@ -294,7 +327,7 @@ impl<'a> Parser<'a> { curarg: 0, style, arg_places: vec![], - skips, + width_map, last_opening_brace: None, append_newline, is_literal, @@ -367,21 +400,34 @@ impl<'a> Parser<'a> { None } + fn remap_pos(&self, mut pos: usize) -> InnerOffset { + for width in &self.width_map { + if pos > width.position { + pos += width.before - width.after; + } else if pos == width.position && width.after == 0 { + pos += width.before; + } else { + break; + } + } + + InnerOffset(pos) + } + fn to_span_index(&self, pos: usize) -> InnerOffset { - let mut pos = pos; // This handles the raw string case, the raw argument is the number of # // in r###"..."### (we need to add one because of the `r`). let raw = self.style.map_or(0, |raw| raw + 1); - for skip in &self.skips { - if pos > *skip { - pos += 1; - } else if pos == *skip && raw == 0 { - pos += 1; - } else { - break; - } + let pos = self.remap_pos(pos); + InnerOffset(raw + pos.0 + 1) + } + + fn to_span_width(&self, pos: usize) -> usize { + let pos = self.remap_pos(pos); + match self.width_map.iter().find(|w| w.position == pos.0) { + Some(w) => w.before, + None => 1, } - InnerOffset(raw + pos + 1) } fn span(&self, start_pos: usize, end_pos: usize) -> InnerSpan { @@ -401,7 +447,7 @@ impl<'a> Parser<'a> { Some(pos) } else { let pos = self.to_span_index(pos); - let description = format!("expected `'}}'`, found `{:?}`", maybe); + let description = format!("expected `'}}'`, found `{maybe:?}`"); let label = "expected `}`".to_owned(); let (note, secondary_label) = if c == '}' { ( @@ -425,12 +471,12 @@ impl<'a> Parser<'a> { None } } else { - let description = format!("expected `{:?}` but string was terminated", c); + let description = format!("expected `{c:?}` but string was terminated"); // point at closing `"` let pos = self.input.len() - if self.append_newline { 1 } else { 0 }; let pos = self.to_span_index(pos); if c == '}' { - let label = format!("expected `{:?}`", c); + let label = format!("expected `{c:?}`"); let (note, secondary_label) = if c == '}' { ( Some( @@ -451,7 +497,7 @@ impl<'a> Parser<'a> { should_be_replaced_with_positional_argument: false, }); } else { - self.err(description, format!("expected `{:?}`", c), pos.to(pos)); + self.err(description, format!("expected `{c:?}`"), pos.to(pos)); } None } @@ -809,59 +855,55 @@ impl<'a> Parser<'a> { /// Finds the indices of all characters that have been processed and differ between the actual /// written code (code snippet) and the `InternedString` that gets processed in the `Parser` /// in order to properly synthesise the intra-string `Span`s for error diagnostics. -fn find_skips_from_snippet( +fn find_width_map_from_snippet( snippet: Option<string::String>, str_style: Option<usize>, -) -> (Vec<usize>, bool) { +) -> InputStringKind { let snippet = match snippet { Some(ref s) if s.starts_with('"') || s.starts_with("r\"") || s.starts_with("r#") => s, - _ => return (vec![], false), + _ => return InputStringKind::NotALiteral, }; if str_style.is_some() { - return (vec![], true); + return InputStringKind::Literal { width_mappings: Vec::new() }; } let snippet = &snippet[1..snippet.len() - 1]; let mut s = snippet.char_indices(); - let mut skips = vec![]; + let mut width_mappings = vec![]; while let Some((pos, c)) = s.next() { match (c, s.clone().next()) { // skip whitespace and empty lines ending in '\\' - ('\\', Some((next_pos, '\n'))) => { - skips.push(pos); - skips.push(next_pos); + ('\\', Some((_, '\n'))) => { let _ = s.next(); + let mut width = 2; - while let Some((pos, c)) = s.clone().next() { + while let Some((_, c)) = s.clone().next() { if matches!(c, ' ' | '\n' | '\t') { - skips.push(pos); + width += 1; let _ = s.next(); } else { break; } } + + width_mappings.push(InnerWidthMapping::new(pos, width, 0)); } - ('\\', Some((next_pos, 'n' | 't' | 'r' | '0' | '\\' | '\'' | '\"'))) => { - skips.push(next_pos); + ('\\', Some((_, 'n' | 't' | 'r' | '0' | '\\' | '\'' | '\"'))) => { + width_mappings.push(InnerWidthMapping::new(pos, 2, 1)); let _ = s.next(); } ('\\', Some((_, 'x'))) => { - for _ in 0..3 { - // consume `\xAB` literal - if let Some((pos, _)) = s.next() { - skips.push(pos); - } else { - break; - } - } + // consume `\xAB` literal + s.nth(2); + width_mappings.push(InnerWidthMapping::new(pos, 4, 1)); } ('\\', Some((_, 'u'))) => { - if let Some((pos, _)) = s.next() { - skips.push(pos); - } - if let Some((next_pos, next_c)) = s.next() { + let mut width = 2; + let _ = s.next(); + + if let Some((_, next_c)) = s.next() { if next_c == '{' { // consume up to 6 hexanumeric chars let digits_len = @@ -881,19 +923,18 @@ fn find_skips_from_snippet( let required_skips = digits_len.saturating_sub(len_utf8.saturating_sub(1)); // skip '{' and '}' also - for pos in (next_pos..).take(required_skips + 2) { - skips.push(pos) - } + width += required_skips + 2; s.nth(digits_len); } else if next_c.is_digit(16) { - skips.push(next_pos); + width += 1; + // We suggest adding `{` and `}` when appropriate, accept it here as if // it were correct let mut i = 0; // consume up to 6 hexanumeric chars - while let (Some((next_pos, c)), _) = (s.next(), i < 6) { + while let (Some((_, c)), _) = (s.next(), i < 6) { if c.is_digit(16) { - skips.push(next_pos); + width += 1; } else { break; } @@ -901,12 +942,19 @@ fn find_skips_from_snippet( } } } + + width_mappings.push(InnerWidthMapping::new(pos, width, 1)); } _ => {} } } - (skips, true) + + InputStringKind::Literal { width_mappings } } +// Assert a reasonable size for `Piece` +#[cfg(all(target_arch = "x86_64", target_pointer_width = "64"))] +rustc_data_structures::static_assert_size!(Piece<'_>, 16); + #[cfg(test)] mod tests; diff --git a/compiler/rustc_parse_format/src/tests.rs b/compiler/rustc_parse_format/src/tests.rs index 3f9cb149b..2992ba845 100644 --- a/compiler/rustc_parse_format/src/tests.rs +++ b/compiler/rustc_parse_format/src/tests.rs @@ -76,51 +76,51 @@ fn invalid_precision() { fn format_nothing() { same( "{}", - &[NextArgument(Argument { + &[NextArgument(Box::new(Argument { position: ArgumentImplicitlyIs(0), position_span: InnerSpan { start: 2, end: 2 }, format: fmtdflt(), - })], + }))], ); } #[test] fn format_position() { same( "{3}", - &[NextArgument(Argument { + &[NextArgument(Box::new(Argument { position: ArgumentIs(3), position_span: InnerSpan { start: 2, end: 3 }, format: fmtdflt(), - })], + }))], ); } #[test] fn format_position_nothing_else() { same( "{3:}", - &[NextArgument(Argument { + &[NextArgument(Box::new(Argument { position: ArgumentIs(3), position_span: InnerSpan { start: 2, end: 3 }, format: fmtdflt(), - })], + }))], ); } #[test] fn format_named() { same( "{name}", - &[NextArgument(Argument { + &[NextArgument(Box::new(Argument { position: ArgumentNamed("name"), position_span: InnerSpan { start: 2, end: 6 }, format: fmtdflt(), - })], + }))], ) } #[test] fn format_type() { same( "{3:x}", - &[NextArgument(Argument { + &[NextArgument(Box::new(Argument { position: ArgumentIs(3), position_span: InnerSpan { start: 2, end: 3 }, format: FormatSpec { @@ -134,14 +134,14 @@ fn format_type() { ty: "x", ty_span: None, }, - })], + }))], ); } #[test] fn format_align_fill() { same( "{3:>}", - &[NextArgument(Argument { + &[NextArgument(Box::new(Argument { position: ArgumentIs(3), position_span: InnerSpan { start: 2, end: 3 }, format: FormatSpec { @@ -155,11 +155,11 @@ fn format_align_fill() { ty: "", ty_span: None, }, - })], + }))], ); same( "{3:0<}", - &[NextArgument(Argument { + &[NextArgument(Box::new(Argument { position: ArgumentIs(3), position_span: InnerSpan { start: 2, end: 3 }, format: FormatSpec { @@ -173,11 +173,11 @@ fn format_align_fill() { ty: "", ty_span: None, }, - })], + }))], ); same( "{3:*<abcd}", - &[NextArgument(Argument { + &[NextArgument(Box::new(Argument { position: ArgumentIs(3), position_span: InnerSpan { start: 2, end: 3 }, format: FormatSpec { @@ -191,14 +191,14 @@ fn format_align_fill() { ty: "abcd", ty_span: Some(InnerSpan::new(6, 10)), }, - })], + }))], ); } #[test] fn format_counts() { same( "{:10x}", - &[NextArgument(Argument { + &[NextArgument(Box::new(Argument { position: ArgumentImplicitlyIs(0), position_span: InnerSpan { start: 2, end: 2 }, format: FormatSpec { @@ -212,11 +212,11 @@ fn format_counts() { ty: "x", ty_span: None, }, - })], + }))], ); same( "{:10$.10x}", - &[NextArgument(Argument { + &[NextArgument(Box::new(Argument { position: ArgumentImplicitlyIs(0), position_span: InnerSpan { start: 2, end: 2 }, format: FormatSpec { @@ -230,11 +230,11 @@ fn format_counts() { ty: "x", ty_span: None, }, - })], + }))], ); same( "{1:0$.10x}", - &[NextArgument(Argument { + &[NextArgument(Box::new(Argument { position: ArgumentIs(1), position_span: InnerSpan { start: 2, end: 3 }, format: FormatSpec { @@ -248,11 +248,11 @@ fn format_counts() { ty: "x", ty_span: None, }, - })], + }))], ); same( "{:.*x}", - &[NextArgument(Argument { + &[NextArgument(Box::new(Argument { position: ArgumentImplicitlyIs(1), position_span: InnerSpan { start: 2, end: 2 }, format: FormatSpec { @@ -266,11 +266,11 @@ fn format_counts() { ty: "x", ty_span: None, }, - })], + }))], ); same( "{:.10$x}", - &[NextArgument(Argument { + &[NextArgument(Box::new(Argument { position: ArgumentImplicitlyIs(0), position_span: InnerSpan { start: 2, end: 2 }, format: FormatSpec { @@ -284,11 +284,11 @@ fn format_counts() { ty: "x", ty_span: None, }, - })], + }))], ); same( "{:a$.b$?}", - &[NextArgument(Argument { + &[NextArgument(Box::new(Argument { position: ArgumentImplicitlyIs(0), position_span: InnerSpan { start: 2, end: 2 }, format: FormatSpec { @@ -302,11 +302,11 @@ fn format_counts() { ty: "?", ty_span: None, }, - })], + }))], ); same( "{:.4}", - &[NextArgument(Argument { + &[NextArgument(Box::new(Argument { position: ArgumentImplicitlyIs(0), position_span: InnerSpan { start: 2, end: 2 }, format: FormatSpec { @@ -320,14 +320,14 @@ fn format_counts() { ty: "", ty_span: None, }, - })], + }))], ) } #[test] fn format_flags() { same( "{:-}", - &[NextArgument(Argument { + &[NextArgument(Box::new(Argument { position: ArgumentImplicitlyIs(0), position_span: InnerSpan { start: 2, end: 2 }, format: FormatSpec { @@ -341,11 +341,11 @@ fn format_flags() { ty: "", ty_span: None, }, - })], + }))], ); same( "{:+#}", - &[NextArgument(Argument { + &[NextArgument(Box::new(Argument { position: ArgumentImplicitlyIs(0), position_span: InnerSpan { start: 2, end: 2 }, format: FormatSpec { @@ -359,7 +359,7 @@ fn format_flags() { ty: "", ty_span: None, }, - })], + }))], ); } #[test] @@ -368,7 +368,7 @@ fn format_mixture() { "abcd {3:x} efg", &[ String("abcd "), - NextArgument(Argument { + NextArgument(Box::new(Argument { position: ArgumentIs(3), position_span: InnerSpan { start: 7, end: 8 }, format: FormatSpec { @@ -382,7 +382,7 @@ fn format_mixture() { ty: "x", ty_span: None, }, - }), + })), String(" efg"), ], ); @@ -391,18 +391,18 @@ fn format_mixture() { fn format_whitespace() { same( "{ }", - &[NextArgument(Argument { + &[NextArgument(Box::new(Argument { position: ArgumentImplicitlyIs(0), position_span: InnerSpan { start: 2, end: 3 }, format: fmtdflt(), - })], + }))], ); same( "{ }", - &[NextArgument(Argument { + &[NextArgument(Box::new(Argument { position: ArgumentImplicitlyIs(0), position_span: InnerSpan { start: 2, end: 4 }, format: fmtdflt(), - })], + }))], ); } |