diff options
author | Daniel Baumann <daniel.baumann@progress-linux.org> | 2024-05-18 02:49:50 +0000 |
---|---|---|
committer | Daniel Baumann <daniel.baumann@progress-linux.org> | 2024-05-18 02:49:50 +0000 |
commit | 9835e2ae736235810b4ea1c162ca5e65c547e770 (patch) | |
tree | 3fcebf40ed70e581d776a8a4c65923e8ec20e026 /compiler/rustc_parse/src/lexer | |
parent | Releasing progress-linux version 1.70.0+dfsg2-1~progress7.99u1. (diff) | |
download | rustc-9835e2ae736235810b4ea1c162ca5e65c547e770.tar.xz rustc-9835e2ae736235810b4ea1c162ca5e65c547e770.zip |
Merging upstream version 1.71.1+dfsg1.
Signed-off-by: Daniel Baumann <daniel.baumann@progress-linux.org>
Diffstat (limited to 'compiler/rustc_parse/src/lexer')
-rw-r--r-- | compiler/rustc_parse/src/lexer/mod.rs | 77 | ||||
-rw-r--r-- | compiler/rustc_parse/src/lexer/tokentrees.rs | 3 | ||||
-rw-r--r-- | compiler/rustc_parse/src/lexer/unescape_error_reporting.rs | 33 | ||||
-rw-r--r-- | compiler/rustc_parse/src/lexer/unicode_chars.rs | 2 |
4 files changed, 85 insertions, 30 deletions
diff --git a/compiler/rustc_parse/src/lexer/mod.rs b/compiler/rustc_parse/src/lexer/mod.rs index 9e856c9f2..c6e6b46e4 100644 --- a/compiler/rustc_parse/src/lexer/mod.rs +++ b/compiler/rustc_parse/src/lexer/mod.rs @@ -1,3 +1,5 @@ +use std::ops::Range; + use crate::errors; use crate::lexer::unicode_chars::UNICODE_ARRAY; use crate::make_unclosed_delims_error; @@ -6,7 +8,7 @@ use rustc_ast::token::{self, CommentKind, Delimiter, Token, TokenKind}; use rustc_ast::tokenstream::TokenStream; use rustc_ast::util::unicode::contains_text_flow_control_chars; use rustc_errors::{error_code, Applicability, Diagnostic, DiagnosticBuilder, StashKey}; -use rustc_lexer::unescape::{self, Mode}; +use rustc_lexer::unescape::{self, EscapeError, Mode}; use rustc_lexer::Cursor; use rustc_lexer::{Base, DocStyle, RawStrError}; use rustc_session::lint::builtin::{ @@ -67,7 +69,7 @@ pub(crate) fn parse_token_trees<'a>( match token_trees { Ok(stream) if unmatched_delims.is_empty() => Ok(stream), _ => { - // Return error if there are unmatched delimiters or unclosng delimiters. + // Return error if there are unmatched delimiters or unclosed delimiters. // We emit delimiter mismatch errors first, then emit the unclosing delimiter mismatch // because the delimiter mismatch is more likely to be the root cause of error @@ -204,16 +206,15 @@ impl<'a> StringReader<'a> { rustc_lexer::TokenKind::Literal { kind, suffix_start } => { let suffix_start = start + BytePos(suffix_start); let (kind, symbol) = self.cook_lexer_literal(start, suffix_start, kind); + if let token::LitKind::CStr | token::LitKind::CStrRaw(_) = kind { + self.sess.gated_spans.gate(sym::c_str_literals, self.mk_sp(start, self.pos)); + } let suffix = if suffix_start < self.pos { let string = self.str_from(suffix_start); if string == "_" { self.sess .span_diagnostic - .struct_span_err( - self.mk_sp(suffix_start, self.pos), - "underscore literal suffix is not allowed", - ) - .emit(); + .emit_err(errors::UnderscoreLiteralSuffix { span: self.mk_sp(suffix_start, self.pos) }); None } else { Some(Symbol::intern(string)) @@ -325,7 +326,7 @@ impl<'a> StringReader<'a> { ) -> DiagnosticBuilder<'a, !> { self.sess .span_diagnostic - .struct_span_fatal(self.mk_sp(from_pos, to_pos), &format!("{}: {}", m, escaped_char(c))) + .struct_span_fatal(self.mk_sp(from_pos, to_pos), format!("{}: {}", m, escaped_char(c))) } /// Detect usages of Unicode codepoints changing the direction of the text on screen and loudly @@ -419,6 +420,16 @@ impl<'a> StringReader<'a> { } self.cook_quoted(token::ByteStr, Mode::ByteStr, start, end, 2, 1) // b" " } + rustc_lexer::LiteralKind::CStr { terminated } => { + if !terminated { + self.sess.span_diagnostic.span_fatal_with_code( + self.mk_sp(start + BytePos(1), end), + "unterminated C string", + error_code!(E0767), + ) + } + self.cook_c_string(token::CStr, Mode::CStr, start, end, 2, 1) // c" " + } rustc_lexer::LiteralKind::RawStr { n_hashes } => { if let Some(n_hashes) = n_hashes { let n = u32::from(n_hashes); @@ -437,6 +448,15 @@ impl<'a> StringReader<'a> { self.report_raw_str_error(start, 2); } } + rustc_lexer::LiteralKind::RawCStr { n_hashes } => { + if let Some(n_hashes) = n_hashes { + let n = u32::from(n_hashes); + let kind = token::CStrRaw(n_hashes); + self.cook_c_string(kind, Mode::RawCStr, start, end, 3 + n, 1 + n) // cr##" "## + } else { + self.report_raw_str_error(start, 2); + } + } rustc_lexer::LiteralKind::Int { base, empty_int } => { if empty_int { let span = self.mk_sp(start, end); @@ -546,7 +566,7 @@ impl<'a> StringReader<'a> { err.span_label(self.mk_sp(start, start), "unterminated raw string"); if n_hashes > 0 { - err.note(&format!( + err.note(format!( "this raw string should be terminated with `\"{}`", "#".repeat(n_hashes as usize) )); @@ -642,7 +662,7 @@ impl<'a> StringReader<'a> { &RUST_2021_PREFIXES_INCOMPATIBLE_SYNTAX, prefix_span, ast::CRATE_NODE_ID, - &format!("prefix `{prefix}` is unknown"), + format!("prefix `{prefix}` is unknown"), BuiltinLintDiagnostics::ReservedPrefix(prefix_span), ); } @@ -652,7 +672,7 @@ impl<'a> StringReader<'a> { self.sess.emit_fatal(errors::TooManyHashes { span: self.mk_sp(start, self.pos), num }); } - fn cook_quoted( + fn cook_common( &self, kind: token::LitKind, mode: Mode, @@ -660,12 +680,13 @@ impl<'a> StringReader<'a> { end: BytePos, prefix_len: u32, postfix_len: u32, + unescape: fn(&str, Mode, &mut dyn FnMut(Range<usize>, Result<(), EscapeError>)), ) -> (token::LitKind, Symbol) { let mut has_fatal_err = false; let content_start = start + BytePos(prefix_len); let content_end = end - BytePos(postfix_len); let lit_content = self.str_from_to(content_start, content_end); - unescape::unescape_literal(lit_content, mode, &mut |range, result| { + unescape(lit_content, mode, &mut |range, result| { // Here we only check for errors. The actual unescaping is done later. if let Err(err) = result { let span_with_quotes = self.mk_sp(start, end); @@ -696,6 +717,38 @@ impl<'a> StringReader<'a> { (token::Err, self.symbol_from_to(start, end)) } } + + fn cook_quoted( + &self, + kind: token::LitKind, + mode: Mode, + start: BytePos, + end: BytePos, + prefix_len: u32, + postfix_len: u32, + ) -> (token::LitKind, Symbol) { + self.cook_common(kind, mode, start, end, prefix_len, postfix_len, |src, mode, callback| { + unescape::unescape_literal(src, mode, &mut |span, result| { + callback(span, result.map(drop)) + }) + }) + } + + fn cook_c_string( + &self, + kind: token::LitKind, + mode: Mode, + start: BytePos, + end: BytePos, + prefix_len: u32, + postfix_len: u32, + ) -> (token::LitKind, Symbol) { + self.cook_common(kind, mode, start, end, prefix_len, postfix_len, |src, mode, callback| { + unescape::unescape_c_string(src, mode, &mut |span, result| { + callback(span, result.map(drop)) + }) + }) + } } pub fn nfc_normalize(string: &str) -> Symbol { diff --git a/compiler/rustc_parse/src/lexer/tokentrees.rs b/compiler/rustc_parse/src/lexer/tokentrees.rs index 7c2c08951..318a29985 100644 --- a/compiler/rustc_parse/src/lexer/tokentrees.rs +++ b/compiler/rustc_parse/src/lexer/tokentrees.rs @@ -199,8 +199,7 @@ impl<'a> TokenTreesReader<'a> { // matching opening delimiter). let token_str = token_to_string(&self.token); let msg = format!("unexpected closing delimiter: `{}`", token_str); - let mut err = - self.string_reader.sess.span_diagnostic.struct_span_err(self.token.span, &msg); + let mut err = self.string_reader.sess.span_diagnostic.struct_span_err(self.token.span, msg); report_suspicious_mismatch_block( &mut err, diff --git a/compiler/rustc_parse/src/lexer/unescape_error_reporting.rs b/compiler/rustc_parse/src/lexer/unescape_error_reporting.rs index 0d12ec608..eb9625f92 100644 --- a/compiler/rustc_parse/src/lexer/unescape_error_reporting.rs +++ b/compiler/rustc_parse/src/lexer/unescape_error_reporting.rs @@ -78,8 +78,7 @@ pub(crate) fn emit_unescape_error( } }; let sugg = sugg.unwrap_or_else(|| { - let is_byte = mode.is_byte(); - let prefix = if is_byte { "b" } else { "" }; + let prefix = mode.prefix_noraw(); let mut escaped = String::with_capacity(lit.len()); let mut chrs = lit.chars().peekable(); while let Some(first) = chrs.next() { @@ -97,7 +96,11 @@ pub(crate) fn emit_unescape_error( }; } let sugg = format!("{prefix}\"{escaped}\""); - MoreThanOneCharSugg::Quotes { span: span_with_quotes, is_byte, sugg } + MoreThanOneCharSugg::Quotes { + span: span_with_quotes, + is_byte: mode == Mode::Byte, + sugg, + } }); handler.emit_err(UnescapeError::MoreThanOneChar { span: span_with_quotes, @@ -112,7 +115,7 @@ pub(crate) fn emit_unescape_error( char_span, escaped_sugg: c.escape_default().to_string(), escaped_msg: escaped_char(c), - byte: mode.is_byte(), + byte: mode == Mode::Byte, }); } EscapeError::BareCarriageReturn => { @@ -126,12 +129,15 @@ pub(crate) fn emit_unescape_error( EscapeError::InvalidEscape => { let (c, span) = last_char(); - let label = - if mode.is_byte() { "unknown byte escape" } else { "unknown character escape" }; + let label = if mode == Mode::Byte || mode == Mode::ByteStr { + "unknown byte escape" + } else { + "unknown character escape" + }; let ec = escaped_char(c); - let mut diag = handler.struct_span_err(span, &format!("{}: `{}`", label, ec)); + let mut diag = handler.struct_span_err(span, format!("{}: `{}`", label, ec)); diag.span_label(span, label); - if c == '{' || c == '}' && !mode.is_byte() { + if c == '{' || c == '}' && matches!(mode, Mode::Str | Mode::RawStr) { diag.help( "if used in a formatting string, curly braces are escaped with `{{` and `}}`", ); @@ -141,7 +147,7 @@ pub(crate) fn emit_unescape_error( version control settings", ); } else { - if !mode.is_byte() { + if mode == Mode::Str || mode == Mode::Char { diag.span_suggestion( span_with_quotes, "if you meant to write a literal backslash (perhaps escaping in a regular expression), consider a raw string literal", @@ -180,13 +186,13 @@ pub(crate) fn emit_unescape_error( } else { String::new() }; - err.span_label(span, &format!("must be ASCII{}", postfix)); + err.span_label(span, format!("must be ASCII{}", postfix)); // Note: the \\xHH suggestions are not given for raw byte string // literals, because they are araw and so cannot use any escapes. if (c as u32) <= 0xFF && mode != Mode::RawByteStr { err.span_suggestion( span, - &format!( + format!( "if you meant to use the unicode code point for {:?}, use a \\xHH escape", c ), @@ -200,10 +206,7 @@ pub(crate) fn emit_unescape_error( utf8.push(c); err.span_suggestion( span, - &format!( - "if you meant to use the UTF-8 encoding of {:?}, use \\xHH escapes", - c - ), + format!("if you meant to use the UTF-8 encoding of {:?}, use \\xHH escapes", c), utf8.as_bytes() .iter() .map(|b: &u8| format!("\\x{:X}", *b)) diff --git a/compiler/rustc_parse/src/lexer/unicode_chars.rs b/compiler/rustc_parse/src/lexer/unicode_chars.rs index 1f027c08f..829d9693e 100644 --- a/compiler/rustc_parse/src/lexer/unicode_chars.rs +++ b/compiler/rustc_parse/src/lexer/unicode_chars.rs @@ -350,7 +350,7 @@ pub(super) fn check_for_substitution( let Some((_, ascii_name, token)) = ASCII_ARRAY.iter().find(|&&(s, _, _)| s == ascii_str) else { let msg = format!("substitution character not found for '{}'", ch); - reader.sess.span_diagnostic.span_bug_no_panic(span, &msg); + reader.sess.span_diagnostic.span_bug_no_panic(span, msg); return (None, None); }; |