diff options
Diffstat (limited to '')
-rw-r--r-- | compiler/rustc_parse/src/lexer/mod.rs | 126 | ||||
-rw-r--r-- | compiler/rustc_parse/src/lexer/unescape_error_reporting.rs | 42 | ||||
-rw-r--r-- | compiler/rustc_parse/src/lexer/unicode_chars.rs | 4 |
3 files changed, 78 insertions, 94 deletions
diff --git a/compiler/rustc_parse/src/lexer/mod.rs b/compiler/rustc_parse/src/lexer/mod.rs index 462bce16a..f027843e6 100644 --- a/compiler/rustc_parse/src/lexer/mod.rs +++ b/compiler/rustc_parse/src/lexer/mod.rs @@ -175,20 +175,10 @@ impl<'a> StringReader<'a> { if string == "_" { self.sess .span_diagnostic - .struct_span_warn( + .struct_span_err( self.mk_sp(suffix_start, self.pos), "underscore literal suffix is not allowed", ) - .warn( - "this was previously accepted by the compiler but is \ - being phased out; it will become a hard error in \ - a future release!", - ) - .note( - "see issue #42326 \ - <https://github.com/rust-lang/rust/issues/42326> \ - for more information", - ) .emit(); None } else { @@ -363,55 +353,55 @@ impl<'a> StringReader<'a> { fn cook_lexer_literal( &self, start: BytePos, - suffix_start: BytePos, + end: BytePos, kind: rustc_lexer::LiteralKind, ) -> (token::LitKind, Symbol) { - // prefix means `"` or `br"` or `r###"`, ... - let (lit_kind, mode, prefix_len, postfix_len) = match kind { + match kind { rustc_lexer::LiteralKind::Char { terminated } => { if !terminated { self.sess.span_diagnostic.span_fatal_with_code( - self.mk_sp(start, suffix_start), + self.mk_sp(start, end), "unterminated character literal", error_code!(E0762), ) } - (token::Char, Mode::Char, 1, 1) // ' ' + self.cook_quoted(token::Char, Mode::Char, start, end, 1, 1) // ' ' } rustc_lexer::LiteralKind::Byte { terminated } => { if !terminated { self.sess.span_diagnostic.span_fatal_with_code( - self.mk_sp(start + BytePos(1), suffix_start), + self.mk_sp(start + BytePos(1), end), "unterminated byte constant", error_code!(E0763), ) } - (token::Byte, Mode::Byte, 2, 1) // b' ' + self.cook_quoted(token::Byte, Mode::Byte, start, end, 2, 1) // b' ' } rustc_lexer::LiteralKind::Str { terminated } => { if !terminated { self.sess.span_diagnostic.span_fatal_with_code( - self.mk_sp(start, suffix_start), + self.mk_sp(start, end), "unterminated double quote string", error_code!(E0765), ) } - (token::Str, Mode::Str, 1, 1) // " " + self.cook_quoted(token::Str, Mode::Str, start, end, 1, 1) // " " } rustc_lexer::LiteralKind::ByteStr { terminated } => { if !terminated { self.sess.span_diagnostic.span_fatal_with_code( - self.mk_sp(start + BytePos(1), suffix_start), + self.mk_sp(start + BytePos(1), end), "unterminated double quote byte string", error_code!(E0766), ) } - (token::ByteStr, Mode::ByteStr, 2, 1) // b" " + self.cook_quoted(token::ByteStr, Mode::ByteStr, start, end, 2, 1) // b" " } rustc_lexer::LiteralKind::RawStr { n_hashes } => { if let Some(n_hashes) = n_hashes { let n = u32::from(n_hashes); - (token::StrRaw(n_hashes), Mode::RawStr, 2 + n, 1 + n) // r##" "## + let kind = token::StrRaw(n_hashes); + self.cook_quoted(kind, Mode::RawStr, start, end, 2 + n, 1 + n) // r##" "## } else { self.report_raw_str_error(start, 1); } @@ -419,56 +409,59 @@ impl<'a> StringReader<'a> { rustc_lexer::LiteralKind::RawByteStr { n_hashes } => { if let Some(n_hashes) = n_hashes { let n = u32::from(n_hashes); - (token::ByteStrRaw(n_hashes), Mode::RawByteStr, 3 + n, 1 + n) // br##" "## + let kind = token::ByteStrRaw(n_hashes); + self.cook_quoted(kind, Mode::RawByteStr, start, end, 3 + n, 1 + n) // br##" "## } else { self.report_raw_str_error(start, 2); } } rustc_lexer::LiteralKind::Int { base, empty_int } => { - return if empty_int { + if empty_int { self.sess .span_diagnostic .struct_span_err_with_code( - self.mk_sp(start, suffix_start), + self.mk_sp(start, end), "no valid digits found for number", error_code!(E0768), ) .emit(); (token::Integer, sym::integer(0)) } else { - self.validate_int_literal(base, start, suffix_start); - (token::Integer, self.symbol_from_to(start, suffix_start)) - }; + if matches!(base, Base::Binary | Base::Octal) { + let base = base as u32; + let s = self.str_from_to(start + BytePos(2), end); + for (idx, c) in s.char_indices() { + if c != '_' && c.to_digit(base).is_none() { + self.err_span_( + start + BytePos::from_usize(2 + idx), + start + BytePos::from_usize(2 + idx + c.len_utf8()), + &format!("invalid digit for a base {} literal", base), + ); + } + } + } + (token::Integer, self.symbol_from_to(start, end)) + } } rustc_lexer::LiteralKind::Float { base, empty_exponent } => { if empty_exponent { self.err_span_(start, self.pos, "expected at least one digit in exponent"); } - match base { - Base::Hexadecimal => self.err_span_( - start, - suffix_start, - "hexadecimal float literal is not supported", - ), + Base::Hexadecimal => { + self.err_span_(start, end, "hexadecimal float literal is not supported") + } Base::Octal => { - self.err_span_(start, suffix_start, "octal float literal is not supported") + self.err_span_(start, end, "octal float literal is not supported") } Base::Binary => { - self.err_span_(start, suffix_start, "binary float literal is not supported") + self.err_span_(start, end, "binary float literal is not supported") } - _ => (), + _ => {} } - - let id = self.symbol_from_to(start, suffix_start); - return (token::Float, id); + (token::Float, self.symbol_from_to(start, end)) } - }; - let content_start = start + BytePos(prefix_len); - let content_end = suffix_start - BytePos(postfix_len); - let id = self.symbol_from_to(content_start, content_end); - self.validate_literal_escape(mode, content_start, content_end, prefix_len, postfix_len); - (lit_kind, id) + } } #[inline] @@ -659,24 +652,30 @@ impl<'a> StringReader<'a> { ) } - fn validate_literal_escape( + fn cook_quoted( &self, + kind: token::LitKind, mode: Mode, - content_start: BytePos, - content_end: BytePos, + start: BytePos, + end: BytePos, prefix_len: u32, postfix_len: u32, - ) { + ) -> (token::LitKind, Symbol) { + let mut has_fatal_err = false; + let content_start = start + BytePos(prefix_len); + let content_end = end - BytePos(postfix_len); let lit_content = self.str_from_to(content_start, content_end); unescape::unescape_literal(lit_content, mode, &mut |range, result| { // Here we only check for errors. The actual unescaping is done later. if let Err(err) = result { - let span_with_quotes = self - .mk_sp(content_start - BytePos(prefix_len), content_end + BytePos(postfix_len)); + let span_with_quotes = self.mk_sp(start, end); let (start, end) = (range.start as u32, range.end as u32); let lo = content_start + BytePos(start); let hi = lo + BytePos(end - start); let span = self.mk_sp(lo, hi); + if err.is_fatal() { + has_fatal_err = true; + } emit_unescape_error( &self.sess.span_diagnostic, lit_content, @@ -688,22 +687,13 @@ impl<'a> StringReader<'a> { ); } }); - } - fn validate_int_literal(&self, base: Base, content_start: BytePos, content_end: BytePos) { - let base = match base { - Base::Binary => 2, - Base::Octal => 8, - _ => return, - }; - let s = self.str_from_to(content_start + BytePos(2), content_end); - for (idx, c) in s.char_indices() { - let idx = idx as u32; - if c != '_' && c.to_digit(base).is_none() { - let lo = content_start + BytePos(2 + idx); - let hi = content_start + BytePos(2 + idx + c.len_utf8() as u32); - self.err_span_(lo, hi, &format!("invalid digit for a base {} literal", base)); - } + // We normally exclude the quotes for the symbol, but for errors we + // include it because it results in clearer error messages. + if !has_fatal_err { + (kind, Symbol::intern(lit_content)) + } else { + (token::Err, self.symbol_from_to(start, end)) } } } diff --git a/compiler/rustc_parse/src/lexer/unescape_error_reporting.rs b/compiler/rustc_parse/src/lexer/unescape_error_reporting.rs index f075de714..6373f5b4f 100644 --- a/compiler/rustc_parse/src/lexer/unescape_error_reporting.rs +++ b/compiler/rustc_parse/src/lexer/unescape_error_reporting.rs @@ -108,7 +108,7 @@ pub(crate) fn emit_unescape_error( } if !has_help { - let (prefix, msg) = if mode.is_bytes() { + let (prefix, msg) = if mode.is_byte() { ("b", "if you meant to write a byte string literal, use double quotes") } else { ("", "if you meant to write a `str` literal, use double quotes") @@ -142,7 +142,7 @@ pub(crate) fn emit_unescape_error( EscapeError::EscapeOnlyChar => { let (c, char_span) = last_char(); - let msg = if mode.is_bytes() { + let msg = if mode.is_byte() { "byte constant must be escaped" } else { "character constant must be escaped" @@ -182,11 +182,11 @@ pub(crate) fn emit_unescape_error( let (c, span) = last_char(); let label = - if mode.is_bytes() { "unknown byte escape" } else { "unknown character escape" }; + if mode.is_byte() { "unknown byte escape" } else { "unknown character escape" }; let ec = escaped_char(c); let mut diag = handler.struct_span_err(span, &format!("{}: `{}`", label, ec)); diag.span_label(span, label); - if c == '{' || c == '}' && !mode.is_bytes() { + if c == '{' || c == '}' && !mode.is_byte() { diag.help( "if used in a formatting string, curly braces are escaped with `{{` and `}}`", ); @@ -196,7 +196,7 @@ pub(crate) fn emit_unescape_error( version control settings", ); } else { - if !mode.is_bytes() { + if !mode.is_byte() { diag.span_suggestion( span_with_quotes, "if you meant to write a literal backslash (perhaps escaping in a regular expression), consider a raw string literal", @@ -231,16 +231,23 @@ pub(crate) fn emit_unescape_error( .emit(); } EscapeError::NonAsciiCharInByte => { - assert!(mode.is_bytes()); let (c, span) = last_char(); - let mut err = handler.struct_span_err(span, "non-ASCII character in byte constant"); + let desc = match mode { + Mode::Byte => "byte literal", + Mode::ByteStr => "byte string literal", + Mode::RawByteStr => "raw byte string literal", + _ => panic!("non-is_byte literal paired with NonAsciiCharInByte"), + }; + let mut err = handler.struct_span_err(span, format!("non-ASCII character in {}", desc)); let postfix = if unicode_width::UnicodeWidthChar::width(c).unwrap_or(1) == 0 { format!(" but is {:?}", c) } else { String::new() }; - err.span_label(span, &format!("byte constant must be ASCII{}", postfix)); - if (c as u32) <= 0xFF { + err.span_label(span, &format!("must be ASCII{}", postfix)); + // Note: the \\xHH suggestions are not given for raw byte string + // literals, because they are araw and so cannot use any escapes. + if (c as u32) <= 0xFF && mode != Mode::RawByteStr { err.span_suggestion( span, &format!( @@ -250,9 +257,9 @@ pub(crate) fn emit_unescape_error( format!("\\x{:X}", c as u32), Applicability::MaybeIncorrect, ); - } else if matches!(mode, Mode::Byte) { + } else if mode == Mode::Byte { err.span_label(span, "this multibyte character does not fit into a single byte"); - } else if matches!(mode, Mode::ByteStr) { + } else if mode != Mode::RawByteStr { let mut utf8 = String::new(); utf8.push(c); err.span_suggestion( @@ -270,19 +277,6 @@ pub(crate) fn emit_unescape_error( } err.emit(); } - EscapeError::NonAsciiCharInByteString => { - assert!(mode.is_bytes()); - let (c, span) = last_char(); - let postfix = if unicode_width::UnicodeWidthChar::width(c).unwrap_or(1) == 0 { - format!(" but is {:?}", c) - } else { - String::new() - }; - handler - .struct_span_err(span, "raw byte string must be ASCII") - .span_label(span, &format!("must be ASCII{}", postfix)) - .emit(); - } EscapeError::OutOfRangeHexEscape => { handler .struct_span_err(span, "out of range hex escape") diff --git a/compiler/rustc_parse/src/lexer/unicode_chars.rs b/compiler/rustc_parse/src/lexer/unicode_chars.rs index 2c68cc589..f1b50296e 100644 --- a/compiler/rustc_parse/src/lexer/unicode_chars.rs +++ b/compiler/rustc_parse/src/lexer/unicode_chars.rs @@ -1,5 +1,5 @@ -// Characters and their corresponding confusables were collected from -// https://www.unicode.org/Public/security/10.0.0/confusables.txt +//! Characters and their corresponding confusables were collected from +//! <https://www.unicode.org/Public/security/10.0.0/confusables.txt> use super::StringReader; use crate::token::{self, Delimiter}; |