summaryrefslogtreecommitdiffstats
path: root/compiler/rustc_parse/src/lexer
diff options
context:
space:
mode:
Diffstat (limited to 'compiler/rustc_parse/src/lexer')
-rw-r--r--compiler/rustc_parse/src/lexer/diagnostics.rs2
-rw-r--r--compiler/rustc_parse/src/lexer/mod.rs42
-rw-r--r--compiler/rustc_parse/src/lexer/tokentrees.rs2
-rw-r--r--compiler/rustc_parse/src/lexer/unescape_error_reporting.rs39
-rw-r--r--compiler/rustc_parse/src/lexer/unicode_chars.rs2
5 files changed, 51 insertions, 36 deletions
diff --git a/compiler/rustc_parse/src/lexer/diagnostics.rs b/compiler/rustc_parse/src/lexer/diagnostics.rs
index 9e6d27bf0..b50bb47f2 100644
--- a/compiler/rustc_parse/src/lexer/diagnostics.rs
+++ b/compiler/rustc_parse/src/lexer/diagnostics.rs
@@ -46,7 +46,7 @@ pub fn report_missing_open_delim(
};
err.span_label(
unmatch_brace.found_span.shrink_to_lo(),
- format!("missing open `{}` for this delimiter", missed_open),
+ format!("missing open `{missed_open}` for this delimiter"),
);
reported_missing_open = true;
}
diff --git a/compiler/rustc_parse/src/lexer/mod.rs b/compiler/rustc_parse/src/lexer/mod.rs
index c6e6b46e4..a375a1d69 100644
--- a/compiler/rustc_parse/src/lexer/mod.rs
+++ b/compiler/rustc_parse/src/lexer/mod.rs
@@ -9,8 +9,8 @@ use rustc_ast::tokenstream::TokenStream;
use rustc_ast::util::unicode::contains_text_flow_control_chars;
use rustc_errors::{error_code, Applicability, Diagnostic, DiagnosticBuilder, StashKey};
use rustc_lexer::unescape::{self, EscapeError, Mode};
-use rustc_lexer::Cursor;
use rustc_lexer::{Base, DocStyle, RawStrError};
+use rustc_lexer::{Cursor, LiteralKind};
use rustc_session::lint::builtin::{
RUST_2021_PREFIXES_INCOMPATIBLE_SYNTAX, TEXT_DIRECTION_CODEPOINT_IN_COMMENT,
};
@@ -74,7 +74,6 @@ pub(crate) fn parse_token_trees<'a>(
// because the delimiter mismatch is more likely to be the root cause of error
let mut buffer = Vec::with_capacity(1);
- // Not using `emit_unclosed_delims` to use `db.buffer`
for unmatched in unmatched_delims {
if let Some(err) = make_unclosed_delims_error(unmatched, &sess) {
err.buffer(&mut buffer);
@@ -118,6 +117,7 @@ impl<'a> StringReader<'a> {
let mut swallow_next_invalid = 0;
// Skip trivial (whitespace & comments) tokens
loop {
+ let str_before = self.cursor.as_str();
let token = self.cursor.advance_token();
let start = self.pos;
self.pos = self.pos + BytePos(token.len);
@@ -165,10 +165,7 @@ impl<'a> StringReader<'a> {
continue;
}
rustc_lexer::TokenKind::Ident => {
- let sym = nfc_normalize(self.str_from(start));
- let span = self.mk_sp(start, self.pos);
- self.sess.symbol_gallery.insert(sym, span);
- token::Ident(sym, false)
+ self.ident(start)
}
rustc_lexer::TokenKind::RawIdent => {
let sym = nfc_normalize(self.str_from(start + BytePos(2)));
@@ -182,10 +179,7 @@ impl<'a> StringReader<'a> {
}
rustc_lexer::TokenKind::UnknownPrefix => {
self.report_unknown_prefix(start);
- let sym = nfc_normalize(self.str_from(start));
- let span = self.mk_sp(start, self.pos);
- self.sess.symbol_gallery.insert(sym, span);
- token::Ident(sym, false)
+ self.ident(start)
}
rustc_lexer::TokenKind::InvalidIdent
// Do not recover an identifier with emoji if the codepoint is a confusable
@@ -203,6 +197,27 @@ impl<'a> StringReader<'a> {
.push(span);
token::Ident(sym, false)
}
+ // split up (raw) c string literals to an ident and a string literal when edition < 2021.
+ rustc_lexer::TokenKind::Literal {
+ kind: kind @ (LiteralKind::CStr { .. } | LiteralKind::RawCStr { .. }),
+ suffix_start: _,
+ } if !self.mk_sp(start, self.pos).edition().at_least_rust_2021() => {
+ let prefix_len = match kind {
+ LiteralKind::CStr { .. } => 1,
+ LiteralKind::RawCStr { .. } => 2,
+ _ => unreachable!(),
+ };
+
+ // reset the state so that only the prefix ("c" or "cr")
+ // was consumed.
+ let lit_start = start + BytePos(prefix_len);
+ self.pos = lit_start;
+ self.cursor = Cursor::new(&str_before[prefix_len as usize..]);
+
+ self.report_unknown_prefix(start);
+ let prefix_span = self.mk_sp(start, lit_start);
+ return (Token::new(self.ident(start), prefix_span), preceded_by_whitespace);
+ }
rustc_lexer::TokenKind::Literal { kind, suffix_start } => {
let suffix_start = start + BytePos(suffix_start);
let (kind, symbol) = self.cook_lexer_literal(start, suffix_start, kind);
@@ -317,6 +332,13 @@ impl<'a> StringReader<'a> {
}
}
+ fn ident(&self, start: BytePos) -> TokenKind {
+ let sym = nfc_normalize(self.str_from(start));
+ let span = self.mk_sp(start, self.pos);
+ self.sess.symbol_gallery.insert(sym, span);
+ token::Ident(sym, false)
+ }
+
fn struct_fatal_span_char(
&self,
from_pos: BytePos,
diff --git a/compiler/rustc_parse/src/lexer/tokentrees.rs b/compiler/rustc_parse/src/lexer/tokentrees.rs
index 318a29985..07910113d 100644
--- a/compiler/rustc_parse/src/lexer/tokentrees.rs
+++ b/compiler/rustc_parse/src/lexer/tokentrees.rs
@@ -198,7 +198,7 @@ impl<'a> TokenTreesReader<'a> {
// An unexpected closing delimiter (i.e., there is no
// matching opening delimiter).
let token_str = token_to_string(&self.token);
- let msg = format!("unexpected closing delimiter: `{}`", token_str);
+ let msg = format!("unexpected closing delimiter: `{token_str}`");
let mut err = self.string_reader.sess.span_diagnostic.struct_span_err(self.token.span, msg);
report_suspicious_mismatch_block(
diff --git a/compiler/rustc_parse/src/lexer/unescape_error_reporting.rs b/compiler/rustc_parse/src/lexer/unescape_error_reporting.rs
index 461a34b67..b659c40b2 100644
--- a/compiler/rustc_parse/src/lexer/unescape_error_reporting.rs
+++ b/compiler/rustc_parse/src/lexer/unescape_error_reporting.rs
@@ -27,7 +27,7 @@ pub(crate) fn emit_unescape_error(
lit, span_with_quotes, mode, range, error
);
let last_char = || {
- let c = lit[range.clone()].chars().rev().next().unwrap();
+ let c = lit[range.clone()].chars().next_back().unwrap();
let span = span.with_lo(span.hi() - BytePos(c.len_utf8() as u32));
(c, span)
};
@@ -80,20 +80,14 @@ pub(crate) fn emit_unescape_error(
let sugg = sugg.unwrap_or_else(|| {
let prefix = mode.prefix_noraw();
let mut escaped = String::with_capacity(lit.len());
- let mut chrs = lit.chars().peekable();
- while let Some(first) = chrs.next() {
- match (first, chrs.peek()) {
- ('\\', Some('"')) => {
- escaped.push('\\');
- escaped.push('"');
- chrs.next();
- }
- ('"', _) => {
- escaped.push('\\');
- escaped.push('"')
- }
- (c, _) => escaped.push(c),
- };
+ let mut in_escape = false;
+ for c in lit.chars() {
+ match c {
+ '\\' => in_escape = !in_escape,
+ '"' if !in_escape => escaped.push('\\'),
+ _ => in_escape = false,
+ }
+ escaped.push(c);
}
let sugg = format!("{prefix}\"{escaped}\"");
MoreThanOneCharSugg::Quotes {
@@ -135,7 +129,7 @@ pub(crate) fn emit_unescape_error(
"unknown character escape"
};
let ec = escaped_char(c);
- let mut diag = handler.struct_span_err(span, format!("{}: `{}`", label, ec));
+ let mut diag = handler.struct_span_err(span, format!("{label}: `{ec}`"));
diag.span_label(span, label);
if c == '{' || c == '}' && matches!(mode, Mode::Str | Mode::RawStr) {
diag.help(
@@ -151,7 +145,7 @@ pub(crate) fn emit_unescape_error(
diag.span_suggestion(
span_with_quotes,
"if you meant to write a literal backslash (perhaps escaping in a regular expression), consider a raw string literal",
- format!("r\"{}\"", lit),
+ format!("r\"{lit}\""),
Applicability::MaybeIncorrect,
);
}
@@ -180,21 +174,20 @@ pub(crate) fn emit_unescape_error(
Mode::RawByteStr => "raw byte string literal",
_ => panic!("non-is_byte literal paired with NonAsciiCharInByte"),
};
- let mut err = handler.struct_span_err(span, format!("non-ASCII character in {}", desc));
+ let mut err = handler.struct_span_err(span, format!("non-ASCII character in {desc}"));
let postfix = if unicode_width::UnicodeWidthChar::width(c).unwrap_or(1) == 0 {
- format!(" but is {:?}", c)
+ format!(" but is {c:?}")
} else {
String::new()
};
- err.span_label(span, format!("must be ASCII{}", postfix));
+ err.span_label(span, format!("must be ASCII{postfix}"));
// Note: the \\xHH suggestions are not given for raw byte string
// literals, because they are araw and so cannot use any escapes.
if (c as u32) <= 0xFF && mode != Mode::RawByteStr {
err.span_suggestion(
span,
format!(
- "if you meant to use the unicode code point for {:?}, use a \\xHH escape",
- c
+ "if you meant to use the unicode code point for {c:?}, use a \\xHH escape"
),
format!("\\x{:X}", c as u32),
Applicability::MaybeIncorrect,
@@ -206,7 +199,7 @@ pub(crate) fn emit_unescape_error(
utf8.push(c);
err.span_suggestion(
span,
- format!("if you meant to use the UTF-8 encoding of {:?}, use \\xHH escapes", c),
+ format!("if you meant to use the UTF-8 encoding of {c:?}, use \\xHH escapes"),
utf8.as_bytes()
.iter()
.map(|b: &u8| format!("\\x{:X}", *b))
diff --git a/compiler/rustc_parse/src/lexer/unicode_chars.rs b/compiler/rustc_parse/src/lexer/unicode_chars.rs
index 829d9693e..bbfb160eb 100644
--- a/compiler/rustc_parse/src/lexer/unicode_chars.rs
+++ b/compiler/rustc_parse/src/lexer/unicode_chars.rs
@@ -349,7 +349,7 @@ pub(super) fn check_for_substitution(
let span = Span::with_root_ctxt(pos, pos + Pos::from_usize(ch.len_utf8() * count));
let Some((_, ascii_name, token)) = ASCII_ARRAY.iter().find(|&&(s, _, _)| s == ascii_str) else {
- let msg = format!("substitution character not found for '{}'", ch);
+ let msg = format!("substitution character not found for '{ch}'");
reader.sess.span_diagnostic.span_bug_no_panic(span, msg);
return (None, None);
};