summaryrefslogtreecommitdiffstats
path: root/compiler/rustc_parse/src/lexer
diff options
context:
space:
mode:
authorDaniel Baumann <daniel.baumann@progress-linux.org>2024-05-18 02:49:50 +0000
committerDaniel Baumann <daniel.baumann@progress-linux.org>2024-05-18 02:49:50 +0000
commit9835e2ae736235810b4ea1c162ca5e65c547e770 (patch)
tree3fcebf40ed70e581d776a8a4c65923e8ec20e026 /compiler/rustc_parse/src/lexer
parentReleasing progress-linux version 1.70.0+dfsg2-1~progress7.99u1. (diff)
downloadrustc-9835e2ae736235810b4ea1c162ca5e65c547e770.tar.xz
rustc-9835e2ae736235810b4ea1c162ca5e65c547e770.zip
Merging upstream version 1.71.1+dfsg1.
Signed-off-by: Daniel Baumann <daniel.baumann@progress-linux.org>
Diffstat (limited to 'compiler/rustc_parse/src/lexer')
-rw-r--r--compiler/rustc_parse/src/lexer/mod.rs77
-rw-r--r--compiler/rustc_parse/src/lexer/tokentrees.rs3
-rw-r--r--compiler/rustc_parse/src/lexer/unescape_error_reporting.rs33
-rw-r--r--compiler/rustc_parse/src/lexer/unicode_chars.rs2
4 files changed, 85 insertions, 30 deletions
diff --git a/compiler/rustc_parse/src/lexer/mod.rs b/compiler/rustc_parse/src/lexer/mod.rs
index 9e856c9f2..c6e6b46e4 100644
--- a/compiler/rustc_parse/src/lexer/mod.rs
+++ b/compiler/rustc_parse/src/lexer/mod.rs
@@ -1,3 +1,5 @@
+use std::ops::Range;
+
use crate::errors;
use crate::lexer::unicode_chars::UNICODE_ARRAY;
use crate::make_unclosed_delims_error;
@@ -6,7 +8,7 @@ use rustc_ast::token::{self, CommentKind, Delimiter, Token, TokenKind};
use rustc_ast::tokenstream::TokenStream;
use rustc_ast::util::unicode::contains_text_flow_control_chars;
use rustc_errors::{error_code, Applicability, Diagnostic, DiagnosticBuilder, StashKey};
-use rustc_lexer::unescape::{self, Mode};
+use rustc_lexer::unescape::{self, EscapeError, Mode};
use rustc_lexer::Cursor;
use rustc_lexer::{Base, DocStyle, RawStrError};
use rustc_session::lint::builtin::{
@@ -67,7 +69,7 @@ pub(crate) fn parse_token_trees<'a>(
match token_trees {
Ok(stream) if unmatched_delims.is_empty() => Ok(stream),
_ => {
- // Return error if there are unmatched delimiters or unclosng delimiters.
+ // Return error if there are unmatched delimiters or unclosed delimiters.
// We emit delimiter mismatch errors first, then emit the unclosing delimiter mismatch
// because the delimiter mismatch is more likely to be the root cause of error
@@ -204,16 +206,15 @@ impl<'a> StringReader<'a> {
rustc_lexer::TokenKind::Literal { kind, suffix_start } => {
let suffix_start = start + BytePos(suffix_start);
let (kind, symbol) = self.cook_lexer_literal(start, suffix_start, kind);
+ if let token::LitKind::CStr | token::LitKind::CStrRaw(_) = kind {
+ self.sess.gated_spans.gate(sym::c_str_literals, self.mk_sp(start, self.pos));
+ }
let suffix = if suffix_start < self.pos {
let string = self.str_from(suffix_start);
if string == "_" {
self.sess
.span_diagnostic
- .struct_span_err(
- self.mk_sp(suffix_start, self.pos),
- "underscore literal suffix is not allowed",
- )
- .emit();
+ .emit_err(errors::UnderscoreLiteralSuffix { span: self.mk_sp(suffix_start, self.pos) });
None
} else {
Some(Symbol::intern(string))
@@ -325,7 +326,7 @@ impl<'a> StringReader<'a> {
) -> DiagnosticBuilder<'a, !> {
self.sess
.span_diagnostic
- .struct_span_fatal(self.mk_sp(from_pos, to_pos), &format!("{}: {}", m, escaped_char(c)))
+ .struct_span_fatal(self.mk_sp(from_pos, to_pos), format!("{}: {}", m, escaped_char(c)))
}
/// Detect usages of Unicode codepoints changing the direction of the text on screen and loudly
@@ -419,6 +420,16 @@ impl<'a> StringReader<'a> {
}
self.cook_quoted(token::ByteStr, Mode::ByteStr, start, end, 2, 1) // b" "
}
+ rustc_lexer::LiteralKind::CStr { terminated } => {
+ if !terminated {
+ self.sess.span_diagnostic.span_fatal_with_code(
+ self.mk_sp(start + BytePos(1), end),
+ "unterminated C string",
+ error_code!(E0767),
+ )
+ }
+ self.cook_c_string(token::CStr, Mode::CStr, start, end, 2, 1) // c" "
+ }
rustc_lexer::LiteralKind::RawStr { n_hashes } => {
if let Some(n_hashes) = n_hashes {
let n = u32::from(n_hashes);
@@ -437,6 +448,15 @@ impl<'a> StringReader<'a> {
self.report_raw_str_error(start, 2);
}
}
+ rustc_lexer::LiteralKind::RawCStr { n_hashes } => {
+ if let Some(n_hashes) = n_hashes {
+ let n = u32::from(n_hashes);
+ let kind = token::CStrRaw(n_hashes);
+ self.cook_c_string(kind, Mode::RawCStr, start, end, 3 + n, 1 + n) // cr##" "##
+ } else {
+ self.report_raw_str_error(start, 2);
+ }
+ }
rustc_lexer::LiteralKind::Int { base, empty_int } => {
if empty_int {
let span = self.mk_sp(start, end);
@@ -546,7 +566,7 @@ impl<'a> StringReader<'a> {
err.span_label(self.mk_sp(start, start), "unterminated raw string");
if n_hashes > 0 {
- err.note(&format!(
+ err.note(format!(
"this raw string should be terminated with `\"{}`",
"#".repeat(n_hashes as usize)
));
@@ -642,7 +662,7 @@ impl<'a> StringReader<'a> {
&RUST_2021_PREFIXES_INCOMPATIBLE_SYNTAX,
prefix_span,
ast::CRATE_NODE_ID,
- &format!("prefix `{prefix}` is unknown"),
+ format!("prefix `{prefix}` is unknown"),
BuiltinLintDiagnostics::ReservedPrefix(prefix_span),
);
}
@@ -652,7 +672,7 @@ impl<'a> StringReader<'a> {
self.sess.emit_fatal(errors::TooManyHashes { span: self.mk_sp(start, self.pos), num });
}
- fn cook_quoted(
+ fn cook_common(
&self,
kind: token::LitKind,
mode: Mode,
@@ -660,12 +680,13 @@ impl<'a> StringReader<'a> {
end: BytePos,
prefix_len: u32,
postfix_len: u32,
+ unescape: fn(&str, Mode, &mut dyn FnMut(Range<usize>, Result<(), EscapeError>)),
) -> (token::LitKind, Symbol) {
let mut has_fatal_err = false;
let content_start = start + BytePos(prefix_len);
let content_end = end - BytePos(postfix_len);
let lit_content = self.str_from_to(content_start, content_end);
- unescape::unescape_literal(lit_content, mode, &mut |range, result| {
+ unescape(lit_content, mode, &mut |range, result| {
// Here we only check for errors. The actual unescaping is done later.
if let Err(err) = result {
let span_with_quotes = self.mk_sp(start, end);
@@ -696,6 +717,38 @@ impl<'a> StringReader<'a> {
(token::Err, self.symbol_from_to(start, end))
}
}
+
+ fn cook_quoted(
+ &self,
+ kind: token::LitKind,
+ mode: Mode,
+ start: BytePos,
+ end: BytePos,
+ prefix_len: u32,
+ postfix_len: u32,
+ ) -> (token::LitKind, Symbol) {
+ self.cook_common(kind, mode, start, end, prefix_len, postfix_len, |src, mode, callback| {
+ unescape::unescape_literal(src, mode, &mut |span, result| {
+ callback(span, result.map(drop))
+ })
+ })
+ }
+
+ fn cook_c_string(
+ &self,
+ kind: token::LitKind,
+ mode: Mode,
+ start: BytePos,
+ end: BytePos,
+ prefix_len: u32,
+ postfix_len: u32,
+ ) -> (token::LitKind, Symbol) {
+ self.cook_common(kind, mode, start, end, prefix_len, postfix_len, |src, mode, callback| {
+ unescape::unescape_c_string(src, mode, &mut |span, result| {
+ callback(span, result.map(drop))
+ })
+ })
+ }
}
pub fn nfc_normalize(string: &str) -> Symbol {
diff --git a/compiler/rustc_parse/src/lexer/tokentrees.rs b/compiler/rustc_parse/src/lexer/tokentrees.rs
index 7c2c08951..318a29985 100644
--- a/compiler/rustc_parse/src/lexer/tokentrees.rs
+++ b/compiler/rustc_parse/src/lexer/tokentrees.rs
@@ -199,8 +199,7 @@ impl<'a> TokenTreesReader<'a> {
// matching opening delimiter).
let token_str = token_to_string(&self.token);
let msg = format!("unexpected closing delimiter: `{}`", token_str);
- let mut err =
- self.string_reader.sess.span_diagnostic.struct_span_err(self.token.span, &msg);
+ let mut err = self.string_reader.sess.span_diagnostic.struct_span_err(self.token.span, msg);
report_suspicious_mismatch_block(
&mut err,
diff --git a/compiler/rustc_parse/src/lexer/unescape_error_reporting.rs b/compiler/rustc_parse/src/lexer/unescape_error_reporting.rs
index 0d12ec608..eb9625f92 100644
--- a/compiler/rustc_parse/src/lexer/unescape_error_reporting.rs
+++ b/compiler/rustc_parse/src/lexer/unescape_error_reporting.rs
@@ -78,8 +78,7 @@ pub(crate) fn emit_unescape_error(
}
};
let sugg = sugg.unwrap_or_else(|| {
- let is_byte = mode.is_byte();
- let prefix = if is_byte { "b" } else { "" };
+ let prefix = mode.prefix_noraw();
let mut escaped = String::with_capacity(lit.len());
let mut chrs = lit.chars().peekable();
while let Some(first) = chrs.next() {
@@ -97,7 +96,11 @@ pub(crate) fn emit_unescape_error(
};
}
let sugg = format!("{prefix}\"{escaped}\"");
- MoreThanOneCharSugg::Quotes { span: span_with_quotes, is_byte, sugg }
+ MoreThanOneCharSugg::Quotes {
+ span: span_with_quotes,
+ is_byte: mode == Mode::Byte,
+ sugg,
+ }
});
handler.emit_err(UnescapeError::MoreThanOneChar {
span: span_with_quotes,
@@ -112,7 +115,7 @@ pub(crate) fn emit_unescape_error(
char_span,
escaped_sugg: c.escape_default().to_string(),
escaped_msg: escaped_char(c),
- byte: mode.is_byte(),
+ byte: mode == Mode::Byte,
});
}
EscapeError::BareCarriageReturn => {
@@ -126,12 +129,15 @@ pub(crate) fn emit_unescape_error(
EscapeError::InvalidEscape => {
let (c, span) = last_char();
- let label =
- if mode.is_byte() { "unknown byte escape" } else { "unknown character escape" };
+ let label = if mode == Mode::Byte || mode == Mode::ByteStr {
+ "unknown byte escape"
+ } else {
+ "unknown character escape"
+ };
let ec = escaped_char(c);
- let mut diag = handler.struct_span_err(span, &format!("{}: `{}`", label, ec));
+ let mut diag = handler.struct_span_err(span, format!("{}: `{}`", label, ec));
diag.span_label(span, label);
- if c == '{' || c == '}' && !mode.is_byte() {
+ if c == '{' || c == '}' && matches!(mode, Mode::Str | Mode::RawStr) {
diag.help(
"if used in a formatting string, curly braces are escaped with `{{` and `}}`",
);
@@ -141,7 +147,7 @@ pub(crate) fn emit_unescape_error(
version control settings",
);
} else {
- if !mode.is_byte() {
+ if mode == Mode::Str || mode == Mode::Char {
diag.span_suggestion(
span_with_quotes,
"if you meant to write a literal backslash (perhaps escaping in a regular expression), consider a raw string literal",
@@ -180,13 +186,13 @@ pub(crate) fn emit_unescape_error(
} else {
String::new()
};
- err.span_label(span, &format!("must be ASCII{}", postfix));
+ err.span_label(span, format!("must be ASCII{}", postfix));
// Note: the \\xHH suggestions are not given for raw byte string
// literals, because they are araw and so cannot use any escapes.
if (c as u32) <= 0xFF && mode != Mode::RawByteStr {
err.span_suggestion(
span,
- &format!(
+ format!(
"if you meant to use the unicode code point for {:?}, use a \\xHH escape",
c
),
@@ -200,10 +206,7 @@ pub(crate) fn emit_unescape_error(
utf8.push(c);
err.span_suggestion(
span,
- &format!(
- "if you meant to use the UTF-8 encoding of {:?}, use \\xHH escapes",
- c
- ),
+ format!("if you meant to use the UTF-8 encoding of {:?}, use \\xHH escapes", c),
utf8.as_bytes()
.iter()
.map(|b: &u8| format!("\\x{:X}", *b))
diff --git a/compiler/rustc_parse/src/lexer/unicode_chars.rs b/compiler/rustc_parse/src/lexer/unicode_chars.rs
index 1f027c08f..829d9693e 100644
--- a/compiler/rustc_parse/src/lexer/unicode_chars.rs
+++ b/compiler/rustc_parse/src/lexer/unicode_chars.rs
@@ -350,7 +350,7 @@ pub(super) fn check_for_substitution(
let Some((_, ascii_name, token)) = ASCII_ARRAY.iter().find(|&&(s, _, _)| s == ascii_str) else {
let msg = format!("substitution character not found for '{}'", ch);
- reader.sess.span_diagnostic.span_bug_no_panic(span, &msg);
+ reader.sess.span_diagnostic.span_bug_no_panic(span, msg);
return (None, None);
};