From 9918693037dce8aa4bb6f08741b6812923486c18 Mon Sep 17 00:00:00 2001 From: Daniel Baumann Date: Wed, 19 Jun 2024 11:26:03 +0200 Subject: Merging upstream version 1.76.0+dfsg1. Signed-off-by: Daniel Baumann --- compiler/rustc_ast/src/util/classify.rs | 35 ++++++++++++++-- compiler/rustc_ast/src/util/literal.rs | 74 +++++++++++++-------------------- 2 files changed, 62 insertions(+), 47 deletions(-) (limited to 'compiler/rustc_ast/src/util') diff --git a/compiler/rustc_ast/src/util/classify.rs b/compiler/rustc_ast/src/util/classify.rs index 821fca665..4dece0797 100644 --- a/compiler/rustc_ast/src/util/classify.rs +++ b/compiler/rustc_ast/src/util/classify.rs @@ -40,15 +40,44 @@ pub fn expr_trailing_brace(mut expr: &ast::Expr) -> Option<&ast::Expr> { | Range(_, Some(e), _) | Ret(Some(e)) | Unary(_, e) - | Yield(Some(e)) => { + | Yield(Some(e)) + | Yeet(Some(e)) + | Become(e) => { expr = e; } Closure(closure) => { expr = &closure.body; } Gen(..) | Block(..) | ForLoop(..) | If(..) | Loop(..) | Match(..) | Struct(..) - | TryBlock(..) | While(..) => break Some(expr), - _ => break None, + | TryBlock(..) | While(..) | ConstBlock(_) => break Some(expr), + + // FIXME: These can end in `}`, but changing these would break stable code. + InlineAsm(_) | OffsetOf(_, _) | MacCall(_) | IncludedBytes(_) | FormatArgs(_) => { + break None; + } + + Break(_, None) + | Range(_, None, _) + | Ret(None) + | Yield(None) + | Array(_) + | Call(_, _) + | MethodCall(_) + | Tup(_) + | Lit(_) + | Cast(_, _) + | Type(_, _) + | Await(_, _) + | Field(_, _) + | Index(_, _, _) + | Underscore + | Path(_, _) + | Continue(_) + | Repeat(_, _) + | Paren(_) + | Try(_) + | Yeet(None) + | Err => break None, } } } diff --git a/compiler/rustc_ast/src/util/literal.rs b/compiler/rustc_ast/src/util/literal.rs index 50eb92125..92b9adf1d 100644 --- a/compiler/rustc_ast/src/util/literal.rs +++ b/compiler/rustc_ast/src/util/literal.rs @@ -77,6 +77,8 @@ impl LitKind { // new symbol because the string in the LitKind is different to the // string in the token. let s = symbol.as_str(); + // Vanilla strings are so common we optimize for the common case where no chars + // requiring special behaviour are present. let symbol = if s.contains(['\\', '\r']) { let mut buf = String::with_capacity(s.len()); let mut error = Ok(()); @@ -104,27 +106,20 @@ impl LitKind { LitKind::Str(symbol, ast::StrStyle::Cooked) } token::StrRaw(n) => { - // Ditto. - let s = symbol.as_str(); - let symbol = - if s.contains('\r') { - let mut buf = String::with_capacity(s.len()); - let mut error = Ok(()); - unescape_literal(s, Mode::RawStr, &mut |_, unescaped_char| { - match unescaped_char { - Ok(c) => buf.push(c), - Err(err) => { - if err.is_fatal() { - error = Err(LitError::LexerError); - } - } + // Raw strings have no escapes, so we only need to check for invalid chars, and we + // can reuse the symbol on success. + let mut error = Ok(()); + unescape_literal(symbol.as_str(), Mode::RawStr, &mut |_, unescaped_char| { + match unescaped_char { + Ok(_) => {} + Err(err) => { + if err.is_fatal() { + error = Err(LitError::LexerError); } - }); - error?; - Symbol::intern(&buf) - } else { - symbol - }; + } + } + }); + error?; LitKind::Str(symbol, ast::StrStyle::Raw(n)) } token::ByteStr => { @@ -143,25 +138,19 @@ impl LitKind { LitKind::ByteStr(buf.into(), StrStyle::Cooked) } token::ByteStrRaw(n) => { + // Raw strings have no escapes, so we only need to check for invalid chars, and we + // can convert the symbol directly to a `Lrc` on success. let s = symbol.as_str(); - let bytes = if s.contains('\r') { - let mut buf = Vec::with_capacity(s.len()); - let mut error = Ok(()); - unescape_literal(s, Mode::RawByteStr, &mut |_, c| match c { - Ok(c) => buf.push(byte_from_char(c)), - Err(err) => { - if err.is_fatal() { - error = Err(LitError::LexerError); - } + let mut error = Ok(()); + unescape_literal(s, Mode::RawByteStr, &mut |_, c| match c { + Ok(_) => {} + Err(err) => { + if err.is_fatal() { + error = Err(LitError::LexerError); } - }); - error?; - buf - } else { - symbol.to_string().into_bytes() - }; - - LitKind::ByteStr(bytes.into(), StrStyle::Raw(n)) + } + }); + LitKind::ByteStr(s.to_owned().into_bytes().into(), StrStyle::Raw(n)) } token::CStr => { let s = symbol.as_str(); @@ -172,7 +161,6 @@ impl LitKind { error = Err(LitError::NulInCStr(span)); } Ok(CStrUnit::Byte(b)) => buf.push(b), - Ok(CStrUnit::Char(c)) if c.len_utf8() == 1 => buf.push(c as u8), Ok(CStrUnit::Char(c)) => { buf.extend_from_slice(c.encode_utf8(&mut [0; 4]).as_bytes()) } @@ -187,18 +175,15 @@ impl LitKind { LitKind::CStr(buf.into(), StrStyle::Cooked) } token::CStrRaw(n) => { + // Raw strings have no escapes, so we only need to check for invalid chars, and we + // can convert the symbol directly to a `Lrc` on success. let s = symbol.as_str(); - let mut buf = Vec::with_capacity(s.len()); let mut error = Ok(()); unescape_c_string(s, Mode::RawCStr, &mut |span, c| match c { Ok(CStrUnit::Byte(0) | CStrUnit::Char('\0')) => { error = Err(LitError::NulInCStr(span)); } - Ok(CStrUnit::Byte(b)) => buf.push(b), - Ok(CStrUnit::Char(c)) if c.len_utf8() == 1 => buf.push(c as u8), - Ok(CStrUnit::Char(c)) => { - buf.extend_from_slice(c.encode_utf8(&mut [0; 4]).as_bytes()) - } + Ok(_) => {} Err(err) => { if err.is_fatal() { error = Err(LitError::LexerError); @@ -206,6 +191,7 @@ impl LitKind { } }); error?; + let mut buf = s.to_owned().into_bytes(); buf.push(0); LitKind::CStr(buf.into(), StrStyle::Raw(n)) } -- cgit v1.2.3