Merging upstream version 1.71.1+dfsg1.

Signed-off-by: Daniel Baumann <daniel.baumann@progress-linux.org>
author: Daniel Baumann <daniel.baumann@progress-linux.org> 2024-05-18 02:49:50 +0000
committer: Daniel Baumann <daniel.baumann@progress-linux.org> 2024-05-18 02:49:50 +0000
commit: 9835e2ae736235810b4ea1c162ca5e65c547e770 (patch)
tree: 3fcebf40ed70e581d776a8a4c65923e8ec20e026 /compiler/rustc_parse/src/lexer
parent: Releasing progress-linux version 1.70.0+dfsg2-1~progress7.99u1. (diff)
download: rustc-9835e2ae736235810b4ea1c162ca5e65c547e770.tar.xz
rustc-9835e2ae736235810b4ea1c162ca5e65c547e770.zip
4 files changed, 85 insertions, 30 deletions
diff --git a/compiler/rustc_parse/src/lexer/mod.rs b/compiler/rustc_parse/src/lexer/mod.rs
index 9e856c9f2..c6e6b46e4 100644
--- a/compiler/rustc_parse/src/lexer/mod.rs
+++ b/compiler/rustc_parse/src/lexer/mod.rs
@@ -1,3 +1,5 @@
+use std::ops::Range;
+
 use crate::errors;
 use crate::lexer::unicode_chars::UNICODE_ARRAY;
 use crate::make_unclosed_delims_error;
@@ -6,7 +8,7 @@ use rustc_ast::token::{self, CommentKind, Delimiter, Token, TokenKind};
 use rustc_ast::tokenstream::TokenStream;
 use rustc_ast::util::unicode::contains_text_flow_control_chars;
 use rustc_errors::{error_code, Applicability, Diagnostic, DiagnosticBuilder, StashKey};
-use rustc_lexer::unescape::{self, Mode};
+use rustc_lexer::unescape::{self, EscapeError, Mode};
 use rustc_lexer::Cursor;
 use rustc_lexer::{Base, DocStyle, RawStrError};
 use rustc_session::lint::builtin::{
@@ -67,7 +69,7 @@ pub(crate) fn parse_token_trees<'a>(
     match token_trees {
         Ok(stream) if unmatched_delims.is_empty() => Ok(stream),
         _ => {
-            // Return error if there are unmatched delimiters or unclosng delimiters.
+            // Return error if there are unmatched delimiters or unclosed delimiters.
             // We emit delimiter mismatch errors first, then emit the unclosing delimiter mismatch
             // because the delimiter mismatch is more likely to be the root cause of error
 
@@ -204,16 +206,15 @@ impl<'a> StringReader<'a> {
                 rustc_lexer::TokenKind::Literal { kind, suffix_start } => {
                     let suffix_start = start + BytePos(suffix_start);
                     let (kind, symbol) = self.cook_lexer_literal(start, suffix_start, kind);
+                    if let token::LitKind::CStr | token::LitKind::CStrRaw(_) = kind {
+                        self.sess.gated_spans.gate(sym::c_str_literals, self.mk_sp(start, self.pos));
+                    }
                     let suffix = if suffix_start < self.pos {
                         let string = self.str_from(suffix_start);
                         if string == "_" {
                             self.sess
                                 .span_diagnostic
-                                .struct_span_err(
-                                    self.mk_sp(suffix_start, self.pos),
-                                    "underscore literal suffix is not allowed",
-                                )
-                                .emit();
+                                .emit_err(errors::UnderscoreLiteralSuffix { span: self.mk_sp(suffix_start, self.pos) });
                             None
                         } else {
                             Some(Symbol::intern(string))
@@ -325,7 +326,7 @@ impl<'a> StringReader<'a> {
     ) -> DiagnosticBuilder<'a, !> {
         self.sess
             .span_diagnostic
-            .struct_span_fatal(self.mk_sp(from_pos, to_pos), &format!("{}: {}", m, escaped_char(c)))
+            .struct_span_fatal(self.mk_sp(from_pos, to_pos), format!("{}: {}", m, escaped_char(c)))
     }
 
     /// Detect usages of Unicode codepoints changing the direction of the text on screen and loudly
@@ -419,6 +420,16 @@ impl<'a> StringReader<'a> {
                 }
                 self.cook_quoted(token::ByteStr, Mode::ByteStr, start, end, 2, 1) // b" "
             }
+            rustc_lexer::LiteralKind::CStr { terminated } => {
+                if !terminated {
+                    self.sess.span_diagnostic.span_fatal_with_code(
+                        self.mk_sp(start + BytePos(1), end),
+                        "unterminated C string",
+                        error_code!(E0767),
+                    )
+                }
+                self.cook_c_string(token::CStr, Mode::CStr, start, end, 2, 1) // c" "
+            }
             rustc_lexer::LiteralKind::RawStr { n_hashes } => {
                 if let Some(n_hashes) = n_hashes {
                     let n = u32::from(n_hashes);
@@ -437,6 +448,15 @@ impl<'a> StringReader<'a> {
                     self.report_raw_str_error(start, 2);
                 }
             }
+            rustc_lexer::LiteralKind::RawCStr { n_hashes } => {
+                if let Some(n_hashes) = n_hashes {
+                    let n = u32::from(n_hashes);
+                    let kind = token::CStrRaw(n_hashes);
+                    self.cook_c_string(kind, Mode::RawCStr, start, end, 3 + n, 1 + n) // cr##" "##
+                } else {
+                    self.report_raw_str_error(start, 2);
+                }
+            }
             rustc_lexer::LiteralKind::Int { base, empty_int } => {
                 if empty_int {
                     let span = self.mk_sp(start, end);
@@ -546,7 +566,7 @@ impl<'a> StringReader<'a> {
         err.span_label(self.mk_sp(start, start), "unterminated raw string");
 
         if n_hashes > 0 {
-            err.note(&format!(
+            err.note(format!(
                 "this raw string should be terminated with `\"{}`",
                 "#".repeat(n_hashes as usize)
             ));
@@ -642,7 +662,7 @@ impl<'a> StringReader<'a> {
                 &RUST_2021_PREFIXES_INCOMPATIBLE_SYNTAX,
                 prefix_span,
                 ast::CRATE_NODE_ID,
-                &format!("prefix `{prefix}` is unknown"),
+                format!("prefix `{prefix}` is unknown"),
                 BuiltinLintDiagnostics::ReservedPrefix(prefix_span),
             );
         }
@@ -652,7 +672,7 @@ impl<'a> StringReader<'a> {
         self.sess.emit_fatal(errors::TooManyHashes { span: self.mk_sp(start, self.pos), num });
     }
 
-    fn cook_quoted(
+    fn cook_common(
         &self,
         kind: token::LitKind,
         mode: Mode,
@@ -660,12 +680,13 @@ impl<'a> StringReader<'a> {
         end: BytePos,
         prefix_len: u32,
         postfix_len: u32,
+        unescape: fn(&str, Mode, &mut dyn FnMut(Range<usize>, Result<(), EscapeError>)),
     ) -> (token::LitKind, Symbol) {
         let mut has_fatal_err = false;
         let content_start = start + BytePos(prefix_len);
         let content_end = end - BytePos(postfix_len);
         let lit_content = self.str_from_to(content_start, content_end);
-        unescape::unescape_literal(lit_content, mode, &mut |range, result| {
+        unescape(lit_content, mode, &mut |range, result| {
             // Here we only check for errors. The actual unescaping is done later.
             if let Err(err) = result {
                 let span_with_quotes = self.mk_sp(start, end);
@@ -696,6 +717,38 @@ impl<'a> StringReader<'a> {
             (token::Err, self.symbol_from_to(start, end))
         }
     }
+
+    fn cook_quoted(
+        &self,
+        kind: token::LitKind,
+        mode: Mode,
+        start: BytePos,
+        end: BytePos,
+        prefix_len: u32,
+        postfix_len: u32,
+    ) -> (token::LitKind, Symbol) {
+        self.cook_common(kind, mode, start, end, prefix_len, postfix_len, |src, mode, callback| {
+            unescape::unescape_literal(src, mode, &mut |span, result| {
+                callback(span, result.map(drop))
+            })
+        })
+    }
+
+    fn cook_c_string(
+        &self,
+        kind: token::LitKind,
+        mode: Mode,
+        start: BytePos,
+        end: BytePos,
+        prefix_len: u32,
+        postfix_len: u32,
+    ) -> (token::LitKind, Symbol) {
+        self.cook_common(kind, mode, start, end, prefix_len, postfix_len, |src, mode, callback| {
+            unescape::unescape_c_string(src, mode, &mut |span, result| {
+                callback(span, result.map(drop))
+            })
+        })
+    }
 }
 
 pub fn nfc_normalize(string: &str) -> Symbol {
diff --git a/compiler/rustc_parse/src/lexer/tokentrees.rs b/compiler/rustc_parse/src/lexer/tokentrees.rs
index 7c2c08951..318a29985 100644
--- a/compiler/rustc_parse/src/lexer/tokentrees.rs
+++ b/compiler/rustc_parse/src/lexer/tokentrees.rs
@@ -199,8 +199,7 @@ impl<'a> TokenTreesReader<'a> {
         // matching opening delimiter).
         let token_str = token_to_string(&self.token);
         let msg = format!("unexpected closing delimiter: `{}`", token_str);
-        let mut err =
-            self.string_reader.sess.span_diagnostic.struct_span_err(self.token.span, &msg);
+        let mut err = self.string_reader.sess.span_diagnostic.struct_span_err(self.token.span, msg);
 
         report_suspicious_mismatch_block(
             &mut err,
diff --git a/compiler/rustc_parse/src/lexer/unescape_error_reporting.rs b/compiler/rustc_parse/src/lexer/unescape_error_reporting.rs
index 0d12ec608..eb9625f92 100644
--- a/compiler/rustc_parse/src/lexer/unescape_error_reporting.rs
+++ b/compiler/rustc_parse/src/lexer/unescape_error_reporting.rs
@@ -78,8 +78,7 @@ pub(crate) fn emit_unescape_error(
                 }
             };
             let sugg = sugg.unwrap_or_else(|| {
-                let is_byte = mode.is_byte();
-                let prefix = if is_byte { "b" } else { "" };
+                let prefix = mode.prefix_noraw();
                 let mut escaped = String::with_capacity(lit.len());
                 let mut chrs = lit.chars().peekable();
                 while let Some(first) = chrs.next() {
@@ -97,7 +96,11 @@ pub(crate) fn emit_unescape_error(
                     };
                 }
                 let sugg = format!("{prefix}\"{escaped}\"");
-                MoreThanOneCharSugg::Quotes { span: span_with_quotes, is_byte, sugg }
+                MoreThanOneCharSugg::Quotes {
+                    span: span_with_quotes,
+                    is_byte: mode == Mode::Byte,
+                    sugg,
+                }
             });
             handler.emit_err(UnescapeError::MoreThanOneChar {
                 span: span_with_quotes,
@@ -112,7 +115,7 @@ pub(crate) fn emit_unescape_error(
                 char_span,
                 escaped_sugg: c.escape_default().to_string(),
                 escaped_msg: escaped_char(c),
-                byte: mode.is_byte(),
+                byte: mode == Mode::Byte,
             });
         }
         EscapeError::BareCarriageReturn => {
@@ -126,12 +129,15 @@ pub(crate) fn emit_unescape_error(
         EscapeError::InvalidEscape => {
             let (c, span) = last_char();
 
-            let label =
-                if mode.is_byte() { "unknown byte escape" } else { "unknown character escape" };
+            let label = if mode == Mode::Byte || mode == Mode::ByteStr {
+                "unknown byte escape"
+            } else {
+                "unknown character escape"
+            };
             let ec = escaped_char(c);
-            let mut diag = handler.struct_span_err(span, &format!("{}: `{}`", label, ec));
+            let mut diag = handler.struct_span_err(span, format!("{}: `{}`", label, ec));
             diag.span_label(span, label);
-            if c == '{' || c == '}' && !mode.is_byte() {
+            if c == '{' || c == '}' && matches!(mode, Mode::Str | Mode::RawStr) {
                 diag.help(
                     "if used in a formatting string, curly braces are escaped with `{{` and `}}`",
                 );
@@ -141,7 +147,7 @@ pub(crate) fn emit_unescape_error(
                      version control settings",
                 );
             } else {
-                if !mode.is_byte() {
+                if mode == Mode::Str || mode == Mode::Char {
                     diag.span_suggestion(
                         span_with_quotes,
                         "if you meant to write a literal backslash (perhaps escaping in a regular expression), consider a raw string literal",
@@ -180,13 +186,13 @@ pub(crate) fn emit_unescape_error(
             } else {
                 String::new()
             };
-            err.span_label(span, &format!("must be ASCII{}", postfix));
+            err.span_label(span, format!("must be ASCII{}", postfix));
             // Note: the \\xHH suggestions are not given for raw byte string
             // literals, because they are araw and so cannot use any escapes.
             if (c as u32) <= 0xFF && mode != Mode::RawByteStr {
                 err.span_suggestion(
                     span,
-                    &format!(
+                    format!(
                         "if you meant to use the unicode code point for {:?}, use a \\xHH escape",
                         c
                     ),
@@ -200,10 +206,7 @@ pub(crate) fn emit_unescape_error(
                 utf8.push(c);
                 err.span_suggestion(
                     span,
-                    &format!(
-                        "if you meant to use the UTF-8 encoding of {:?}, use \\xHH escapes",
-                        c
-                    ),
+                    format!("if you meant to use the UTF-8 encoding of {:?}, use \\xHH escapes", c),
                     utf8.as_bytes()
                         .iter()
                         .map(|b: &u8| format!("\\x{:X}", *b))
diff --git a/compiler/rustc_parse/src/lexer/unicode_chars.rs b/compiler/rustc_parse/src/lexer/unicode_chars.rs
index 1f027c08f..829d9693e 100644
--- a/compiler/rustc_parse/src/lexer/unicode_chars.rs
+++ b/compiler/rustc_parse/src/lexer/unicode_chars.rs
@@ -350,7 +350,7 @@ pub(super) fn check_for_substitution(
 
     let Some((_, ascii_name, token)) = ASCII_ARRAY.iter().find(|&&(s, _, _)| s == ascii_str) else {
         let msg = format!("substitution character not found for '{}'", ch);
-        reader.sess.span_diagnostic.span_bug_no_panic(span, &msg);
+        reader.sess.span_diagnostic.span_bug_no_panic(span, msg);
         return (None, None);
     };
author	Daniel Baumann <daniel.baumann@progress-linux.org>	2024-05-18 02:49:50 +0000
committer	Daniel Baumann <daniel.baumann@progress-linux.org>	2024-05-18 02:49:50 +0000
commit	9835e2ae736235810b4ea1c162ca5e65c547e770 (patch)
tree	3fcebf40ed70e581d776a8a4c65923e8ec20e026 /compiler/rustc_parse/src/lexer
parent	Releasing progress-linux version 1.70.0+dfsg2-1~progress7.99u1. (diff)
download	rustc-9835e2ae736235810b4ea1c162ca5e65c547e770.tar.xz rustc-9835e2ae736235810b4ea1c162ca5e65c547e770.zip