summaryrefslogtreecommitdiffstats
path: root/compiler/rustc_ast/src/util
diff options
context:
space:
mode:
Diffstat (limited to 'compiler/rustc_ast/src/util')
-rw-r--r--compiler/rustc_ast/src/util/classify.rs52
-rw-r--r--compiler/rustc_ast/src/util/comments.rs255
-rw-r--r--compiler/rustc_ast/src/util/comments/tests.rs61
-rw-r--r--compiler/rustc_ast/src/util/literal.rs336
-rw-r--r--compiler/rustc_ast/src/util/parser.rs406
-rw-r--r--compiler/rustc_ast/src/util/unicode.rs35
6 files changed, 1145 insertions, 0 deletions
diff --git a/compiler/rustc_ast/src/util/classify.rs b/compiler/rustc_ast/src/util/classify.rs
new file mode 100644
index 000000000..6ea3db6d3
--- /dev/null
+++ b/compiler/rustc_ast/src/util/classify.rs
@@ -0,0 +1,52 @@
+//! Routines the parser uses to classify AST nodes
+
+// Predicates on exprs and stmts that the pretty-printer and parser use
+
+use crate::ast;
+
+/// Does this expression require a semicolon to be treated
+/// as a statement? The negation of this: 'can this expression
+/// be used as a statement without a semicolon' -- is used
+/// as an early-bail-out in the parser so that, for instance,
+/// if true {...} else {...}
+/// |x| 5
+/// isn't parsed as (if true {...} else {...} | x) | 5
+pub fn expr_requires_semi_to_be_stmt(e: &ast::Expr) -> bool {
+ !matches!(
+ e.kind,
+ ast::ExprKind::If(..)
+ | ast::ExprKind::Match(..)
+ | ast::ExprKind::Block(..)
+ | ast::ExprKind::While(..)
+ | ast::ExprKind::Loop(..)
+ | ast::ExprKind::ForLoop(..)
+ | ast::ExprKind::TryBlock(..)
+ )
+}
+
+/// If an expression ends with `}`, returns the innermost expression ending in the `}`
+pub fn expr_trailing_brace(mut expr: &ast::Expr) -> Option<&ast::Expr> {
+ use ast::ExprKind::*;
+
+ loop {
+ match &expr.kind {
+ AddrOf(_, _, e)
+ | Assign(_, e, _)
+ | AssignOp(_, _, e)
+ | Binary(_, _, e)
+ | Box(e)
+ | Break(_, Some(e))
+ | Closure(.., e, _)
+ | Let(_, e, _)
+ | Range(_, Some(e), _)
+ | Ret(Some(e))
+ | Unary(_, e)
+ | Yield(Some(e)) => {
+ expr = e;
+ }
+ Async(..) | Block(..) | ForLoop(..) | If(..) | Loop(..) | Match(..) | Struct(..)
+ | TryBlock(..) | While(..) => break Some(expr),
+ _ => break None,
+ }
+ }
+}
diff --git a/compiler/rustc_ast/src/util/comments.rs b/compiler/rustc_ast/src/util/comments.rs
new file mode 100644
index 000000000..c96474ccb
--- /dev/null
+++ b/compiler/rustc_ast/src/util/comments.rs
@@ -0,0 +1,255 @@
+use crate::token::CommentKind;
+use rustc_span::source_map::SourceMap;
+use rustc_span::{BytePos, CharPos, FileName, Pos, Symbol};
+
+#[cfg(test)]
+mod tests;
+
+#[derive(Clone, Copy, PartialEq, Debug)]
+pub enum CommentStyle {
+ /// No code on either side of each line of the comment
+ Isolated,
+ /// Code exists to the left of the comment
+ Trailing,
+ /// Code before /* foo */ and after the comment
+ Mixed,
+ /// Just a manual blank line "\n\n", for layout
+ BlankLine,
+}
+
+#[derive(Clone)]
+pub struct Comment {
+ pub style: CommentStyle,
+ pub lines: Vec<String>,
+ pub pos: BytePos,
+}
+
+/// A fast conservative estimate on whether the string can contain documentation links.
+/// A pair of square brackets `[]` must exist in the string, but we only search for the
+/// opening bracket because brackets always go in pairs in practice.
+#[inline]
+pub fn may_have_doc_links(s: &str) -> bool {
+ s.contains('[')
+}
+
+/// Makes a doc string more presentable to users.
+/// Used by rustdoc and perhaps other tools, but not by rustc.
+pub fn beautify_doc_string(data: Symbol, kind: CommentKind) -> Symbol {
+ fn get_vertical_trim(lines: &[&str]) -> Option<(usize, usize)> {
+ let mut i = 0;
+ let mut j = lines.len();
+ // first line of all-stars should be omitted
+ if !lines.is_empty() && lines[0].chars().all(|c| c == '*') {
+ i += 1;
+ }
+
+ // like the first, a last line of all stars should be omitted
+ if j > i && !lines[j - 1].is_empty() && lines[j - 1].chars().all(|c| c == '*') {
+ j -= 1;
+ }
+
+ if i != 0 || j != lines.len() { Some((i, j)) } else { None }
+ }
+
+ fn get_horizontal_trim<'a>(lines: &'a [&str], kind: CommentKind) -> Option<String> {
+ let mut i = usize::MAX;
+ let mut first = true;
+
+ // In case we have doc comments like `/**` or `/*!`, we want to remove stars if they are
+ // present. However, we first need to strip the empty lines so they don't get in the middle
+ // when we try to compute the "horizontal trim".
+ let lines = if kind == CommentKind::Block {
+ // Whatever happens, we skip the first line.
+ let mut i = lines
+ .get(0)
+ .map(|l| if l.trim_start().starts_with('*') { 0 } else { 1 })
+ .unwrap_or(0);
+ let mut j = lines.len();
+
+ while i < j && lines[i].trim().is_empty() {
+ i += 1;
+ }
+ while j > i && lines[j - 1].trim().is_empty() {
+ j -= 1;
+ }
+ &lines[i..j]
+ } else {
+ lines
+ };
+
+ for line in lines {
+ for (j, c) in line.chars().enumerate() {
+ if j > i || !"* \t".contains(c) {
+ return None;
+ }
+ if c == '*' {
+ if first {
+ i = j;
+ first = false;
+ } else if i != j {
+ return None;
+ }
+ break;
+ }
+ }
+ if i >= line.len() {
+ return None;
+ }
+ }
+ if lines.is_empty() { None } else { Some(lines[0][..i].into()) }
+ }
+
+ let data_s = data.as_str();
+ if data_s.contains('\n') {
+ let mut lines = data_s.lines().collect::<Vec<&str>>();
+ let mut changes = false;
+ let lines = if let Some((i, j)) = get_vertical_trim(&lines) {
+ changes = true;
+ // remove whitespace-only lines from the start/end of lines
+ &mut lines[i..j]
+ } else {
+ &mut lines
+ };
+ if let Some(horizontal) = get_horizontal_trim(&lines, kind) {
+ changes = true;
+ // remove a "[ \t]*\*" block from each line, if possible
+ for line in lines.iter_mut() {
+ if let Some(tmp) = line.strip_prefix(&horizontal) {
+ *line = tmp;
+ if kind == CommentKind::Block
+ && (*line == "*" || line.starts_with("* ") || line.starts_with("**"))
+ {
+ *line = &line[1..];
+ }
+ }
+ }
+ }
+ if changes {
+ return Symbol::intern(&lines.join("\n"));
+ }
+ }
+ data
+}
+
+/// Returns `None` if the first `col` chars of `s` contain a non-whitespace char.
+/// Otherwise returns `Some(k)` where `k` is first char offset after that leading
+/// whitespace. Note that `k` may be outside bounds of `s`.
+fn all_whitespace(s: &str, col: CharPos) -> Option<usize> {
+ let mut idx = 0;
+ for (i, ch) in s.char_indices().take(col.to_usize()) {
+ if !ch.is_whitespace() {
+ return None;
+ }
+ idx = i + ch.len_utf8();
+ }
+ Some(idx)
+}
+
+fn trim_whitespace_prefix(s: &str, col: CharPos) -> &str {
+ let len = s.len();
+ match all_whitespace(&s, col) {
+ Some(col) => {
+ if col < len {
+ &s[col..]
+ } else {
+ ""
+ }
+ }
+ None => s,
+ }
+}
+
+fn split_block_comment_into_lines(text: &str, col: CharPos) -> Vec<String> {
+ let mut res: Vec<String> = vec![];
+ let mut lines = text.lines();
+ // just push the first line
+ res.extend(lines.next().map(|it| it.to_string()));
+ // for other lines, strip common whitespace prefix
+ for line in lines {
+ res.push(trim_whitespace_prefix(line, col).to_string())
+ }
+ res
+}
+
+// it appears this function is called only from pprust... that's
+// probably not a good thing.
+pub fn gather_comments(sm: &SourceMap, path: FileName, src: String) -> Vec<Comment> {
+ let sm = SourceMap::new(sm.path_mapping().clone());
+ let source_file = sm.new_source_file(path, src);
+ let text = (*source_file.src.as_ref().unwrap()).clone();
+
+ let text: &str = text.as_str();
+ let start_bpos = source_file.start_pos;
+ let mut pos = 0;
+ let mut comments: Vec<Comment> = Vec::new();
+ let mut code_to_the_left = false;
+
+ if let Some(shebang_len) = rustc_lexer::strip_shebang(text) {
+ comments.push(Comment {
+ style: CommentStyle::Isolated,
+ lines: vec![text[..shebang_len].to_string()],
+ pos: start_bpos,
+ });
+ pos += shebang_len;
+ }
+
+ for token in rustc_lexer::tokenize(&text[pos..]) {
+ let token_text = &text[pos..pos + token.len as usize];
+ match token.kind {
+ rustc_lexer::TokenKind::Whitespace => {
+ if let Some(mut idx) = token_text.find('\n') {
+ code_to_the_left = false;
+ while let Some(next_newline) = &token_text[idx + 1..].find('\n') {
+ idx += 1 + next_newline;
+ comments.push(Comment {
+ style: CommentStyle::BlankLine,
+ lines: vec![],
+ pos: start_bpos + BytePos((pos + idx) as u32),
+ });
+ }
+ }
+ }
+ rustc_lexer::TokenKind::BlockComment { doc_style, .. } => {
+ if doc_style.is_none() {
+ let code_to_the_right = !matches!(
+ text[pos + token.len as usize..].chars().next(),
+ Some('\r' | '\n')
+ );
+ let style = match (code_to_the_left, code_to_the_right) {
+ (_, true) => CommentStyle::Mixed,
+ (false, false) => CommentStyle::Isolated,
+ (true, false) => CommentStyle::Trailing,
+ };
+
+ // Count the number of chars since the start of the line by rescanning.
+ let pos_in_file = start_bpos + BytePos(pos as u32);
+ let line_begin_in_file = source_file.line_begin_pos(pos_in_file);
+ let line_begin_pos = (line_begin_in_file - start_bpos).to_usize();
+ let col = CharPos(text[line_begin_pos..pos].chars().count());
+
+ let lines = split_block_comment_into_lines(token_text, col);
+ comments.push(Comment { style, lines, pos: pos_in_file })
+ }
+ }
+ rustc_lexer::TokenKind::LineComment { doc_style } => {
+ if doc_style.is_none() {
+ comments.push(Comment {
+ style: if code_to_the_left {
+ CommentStyle::Trailing
+ } else {
+ CommentStyle::Isolated
+ },
+ lines: vec![token_text.to_string()],
+ pos: start_bpos + BytePos(pos as u32),
+ })
+ }
+ }
+ _ => {
+ code_to_the_left = true;
+ }
+ }
+ pos += token.len as usize;
+ }
+
+ comments
+}
diff --git a/compiler/rustc_ast/src/util/comments/tests.rs b/compiler/rustc_ast/src/util/comments/tests.rs
new file mode 100644
index 000000000..11d50603a
--- /dev/null
+++ b/compiler/rustc_ast/src/util/comments/tests.rs
@@ -0,0 +1,61 @@
+use super::*;
+use rustc_span::create_default_session_globals_then;
+
+#[test]
+fn test_block_doc_comment_1() {
+ create_default_session_globals_then(|| {
+ let comment = "\n * Test \n ** Test\n * Test\n";
+ let stripped = beautify_doc_string(Symbol::intern(comment), CommentKind::Block);
+ assert_eq!(stripped.as_str(), " Test \n* Test\n Test");
+ })
+}
+
+#[test]
+fn test_block_doc_comment_2() {
+ create_default_session_globals_then(|| {
+ let comment = "\n * Test\n * Test\n";
+ let stripped = beautify_doc_string(Symbol::intern(comment), CommentKind::Block);
+ assert_eq!(stripped.as_str(), " Test\n Test");
+ })
+}
+
+#[test]
+fn test_block_doc_comment_3() {
+ create_default_session_globals_then(|| {
+ let comment = "\n let a: *i32;\n *a = 5;\n";
+ let stripped = beautify_doc_string(Symbol::intern(comment), CommentKind::Block);
+ assert_eq!(stripped.as_str(), "let a: *i32;\n*a = 5;");
+ })
+}
+
+#[test]
+fn test_line_doc_comment() {
+ create_default_session_globals_then(|| {
+ let stripped = beautify_doc_string(Symbol::intern(" test"), CommentKind::Line);
+ assert_eq!(stripped.as_str(), " test");
+ let stripped = beautify_doc_string(Symbol::intern("! test"), CommentKind::Line);
+ assert_eq!(stripped.as_str(), "! test");
+ let stripped = beautify_doc_string(Symbol::intern("test"), CommentKind::Line);
+ assert_eq!(stripped.as_str(), "test");
+ let stripped = beautify_doc_string(Symbol::intern("!test"), CommentKind::Line);
+ assert_eq!(stripped.as_str(), "!test");
+ })
+}
+
+#[test]
+fn test_doc_blocks() {
+ create_default_session_globals_then(|| {
+ let stripped =
+ beautify_doc_string(Symbol::intern(" # Returns\n *\n "), CommentKind::Block);
+ assert_eq!(stripped.as_str(), " # Returns\n\n");
+
+ let stripped = beautify_doc_string(
+ Symbol::intern("\n * # Returns\n *\n "),
+ CommentKind::Block,
+ );
+ assert_eq!(stripped.as_str(), " # Returns\n\n");
+
+ let stripped = beautify_doc_string(Symbol::intern("\n * a\n "), CommentKind::Block);
+ assert_eq!(stripped.as_str(), " a\n");
+ })
+}
diff --git a/compiler/rustc_ast/src/util/literal.rs b/compiler/rustc_ast/src/util/literal.rs
new file mode 100644
index 000000000..9c18f55c0
--- /dev/null
+++ b/compiler/rustc_ast/src/util/literal.rs
@@ -0,0 +1,336 @@
+//! Code related to parsing literals.
+
+use crate::ast::{self, Lit, LitKind};
+use crate::token::{self, Token};
+
+use rustc_lexer::unescape::{unescape_byte, unescape_char};
+use rustc_lexer::unescape::{unescape_byte_literal, unescape_literal, Mode};
+use rustc_span::symbol::{kw, sym, Symbol};
+use rustc_span::Span;
+
+use std::ascii;
+use tracing::debug;
+
+pub enum LitError {
+ NotLiteral,
+ LexerError,
+ InvalidSuffix,
+ InvalidIntSuffix,
+ InvalidFloatSuffix,
+ NonDecimalFloat(u32),
+ IntTooLarge,
+}
+
+impl LitKind {
+ /// Converts literal token into a semantic literal.
+ pub fn from_lit_token(lit: token::Lit) -> Result<LitKind, LitError> {
+ let token::Lit { kind, symbol, suffix } = lit;
+ if suffix.is_some() && !kind.may_have_suffix() {
+ return Err(LitError::InvalidSuffix);
+ }
+
+ Ok(match kind {
+ token::Bool => {
+ assert!(symbol.is_bool_lit());
+ LitKind::Bool(symbol == kw::True)
+ }
+ token::Byte => {
+ return unescape_byte(symbol.as_str())
+ .map(LitKind::Byte)
+ .map_err(|_| LitError::LexerError);
+ }
+ token::Char => {
+ return unescape_char(symbol.as_str())
+ .map(LitKind::Char)
+ .map_err(|_| LitError::LexerError);
+ }
+
+ // There are some valid suffixes for integer and float literals,
+ // so all the handling is done internally.
+ token::Integer => return integer_lit(symbol, suffix),
+ token::Float => return float_lit(symbol, suffix),
+
+ token::Str => {
+ // If there are no characters requiring special treatment we can
+ // reuse the symbol from the token. Otherwise, we must generate a
+ // new symbol because the string in the LitKind is different to the
+ // string in the token.
+ let s = symbol.as_str();
+ let symbol = if s.contains(&['\\', '\r']) {
+ let mut buf = String::with_capacity(s.len());
+ let mut error = Ok(());
+ // Force-inlining here is aggressive but the closure is
+ // called on every char in the string, so it can be
+ // hot in programs with many long strings.
+ unescape_literal(
+ &s,
+ Mode::Str,
+ &mut #[inline(always)]
+ |_, unescaped_char| match unescaped_char {
+ Ok(c) => buf.push(c),
+ Err(err) => {
+ if err.is_fatal() {
+ error = Err(LitError::LexerError);
+ }
+ }
+ },
+ );
+ error?;
+ Symbol::intern(&buf)
+ } else {
+ symbol
+ };
+ LitKind::Str(symbol, ast::StrStyle::Cooked)
+ }
+ token::StrRaw(n) => {
+ // Ditto.
+ let s = symbol.as_str();
+ let symbol =
+ if s.contains('\r') {
+ let mut buf = String::with_capacity(s.len());
+ let mut error = Ok(());
+ unescape_literal(&s, Mode::RawStr, &mut |_, unescaped_char| {
+ match unescaped_char {
+ Ok(c) => buf.push(c),
+ Err(err) => {
+ if err.is_fatal() {
+ error = Err(LitError::LexerError);
+ }
+ }
+ }
+ });
+ error?;
+ Symbol::intern(&buf)
+ } else {
+ symbol
+ };
+ LitKind::Str(symbol, ast::StrStyle::Raw(n))
+ }
+ token::ByteStr => {
+ let s = symbol.as_str();
+ let mut buf = Vec::with_capacity(s.len());
+ let mut error = Ok(());
+ unescape_byte_literal(&s, Mode::ByteStr, &mut |_, unescaped_byte| {
+ match unescaped_byte {
+ Ok(c) => buf.push(c),
+ Err(err) => {
+ if err.is_fatal() {
+ error = Err(LitError::LexerError);
+ }
+ }
+ }
+ });
+ error?;
+ LitKind::ByteStr(buf.into())
+ }
+ token::ByteStrRaw(_) => {
+ let s = symbol.as_str();
+ let bytes = if s.contains('\r') {
+ let mut buf = Vec::with_capacity(s.len());
+ let mut error = Ok(());
+ unescape_byte_literal(&s, Mode::RawByteStr, &mut |_, unescaped_byte| {
+ match unescaped_byte {
+ Ok(c) => buf.push(c),
+ Err(err) => {
+ if err.is_fatal() {
+ error = Err(LitError::LexerError);
+ }
+ }
+ }
+ });
+ error?;
+ buf
+ } else {
+ symbol.to_string().into_bytes()
+ };
+
+ LitKind::ByteStr(bytes.into())
+ }
+ token::Err => LitKind::Err(symbol),
+ })
+ }
+
+ /// Attempts to recover a token from semantic literal.
+ /// This function is used when the original token doesn't exist (e.g. the literal is created
+ /// by an AST-based macro) or unavailable (e.g. from HIR pretty-printing).
+ pub fn to_lit_token(&self) -> token::Lit {
+ let (kind, symbol, suffix) = match *self {
+ LitKind::Str(symbol, ast::StrStyle::Cooked) => {
+ // Don't re-intern unless the escaped string is different.
+ let s = symbol.as_str();
+ let escaped = s.escape_default().to_string();
+ let symbol = if s == escaped { symbol } else { Symbol::intern(&escaped) };
+ (token::Str, symbol, None)
+ }
+ LitKind::Str(symbol, ast::StrStyle::Raw(n)) => (token::StrRaw(n), symbol, None),
+ LitKind::ByteStr(ref bytes) => {
+ let string = bytes
+ .iter()
+ .cloned()
+ .flat_map(ascii::escape_default)
+ .map(Into::<char>::into)
+ .collect::<String>();
+ (token::ByteStr, Symbol::intern(&string), None)
+ }
+ LitKind::Byte(byte) => {
+ let string: String = ascii::escape_default(byte).map(Into::<char>::into).collect();
+ (token::Byte, Symbol::intern(&string), None)
+ }
+ LitKind::Char(ch) => {
+ let string: String = ch.escape_default().map(Into::<char>::into).collect();
+ (token::Char, Symbol::intern(&string), None)
+ }
+ LitKind::Int(n, ty) => {
+ let suffix = match ty {
+ ast::LitIntType::Unsigned(ty) => Some(ty.name()),
+ ast::LitIntType::Signed(ty) => Some(ty.name()),
+ ast::LitIntType::Unsuffixed => None,
+ };
+ (token::Integer, sym::integer(n), suffix)
+ }
+ LitKind::Float(symbol, ty) => {
+ let suffix = match ty {
+ ast::LitFloatType::Suffixed(ty) => Some(ty.name()),
+ ast::LitFloatType::Unsuffixed => None,
+ };
+ (token::Float, symbol, suffix)
+ }
+ LitKind::Bool(value) => {
+ let symbol = if value { kw::True } else { kw::False };
+ (token::Bool, symbol, None)
+ }
+ LitKind::Err(symbol) => (token::Err, symbol, None),
+ };
+
+ token::Lit::new(kind, symbol, suffix)
+ }
+}
+
+impl Lit {
+ /// Converts literal token into an AST literal.
+ pub fn from_lit_token(token: token::Lit, span: Span) -> Result<Lit, LitError> {
+ Ok(Lit { token, kind: LitKind::from_lit_token(token)?, span })
+ }
+
+ /// Converts arbitrary token into an AST literal.
+ ///
+ /// Keep this in sync with `Token::can_begin_literal_or_bool` excluding unary negation.
+ pub fn from_token(token: &Token) -> Result<Lit, LitError> {
+ let lit = match token.uninterpolate().kind {
+ token::Ident(name, false) if name.is_bool_lit() => {
+ token::Lit::new(token::Bool, name, None)
+ }
+ token::Literal(lit) => lit,
+ token::Interpolated(ref nt) => {
+ if let token::NtExpr(expr) | token::NtLiteral(expr) = &**nt
+ && let ast::ExprKind::Lit(lit) = &expr.kind
+ {
+ return Ok(lit.clone());
+ }
+ return Err(LitError::NotLiteral);
+ }
+ _ => return Err(LitError::NotLiteral),
+ };
+
+ Lit::from_lit_token(lit, token.span)
+ }
+
+ /// Attempts to recover an AST literal from semantic literal.
+ /// This function is used when the original token doesn't exist (e.g. the literal is created
+ /// by an AST-based macro) or unavailable (e.g. from HIR pretty-printing).
+ pub fn from_lit_kind(kind: LitKind, span: Span) -> Lit {
+ Lit { token: kind.to_lit_token(), kind, span }
+ }
+
+ /// Losslessly convert an AST literal into a token.
+ pub fn to_token(&self) -> Token {
+ let kind = match self.token.kind {
+ token::Bool => token::Ident(self.token.symbol, false),
+ _ => token::Literal(self.token),
+ };
+ Token::new(kind, self.span)
+ }
+}
+
+fn strip_underscores(symbol: Symbol) -> Symbol {
+ // Do not allocate a new string unless necessary.
+ let s = symbol.as_str();
+ if s.contains('_') {
+ let mut s = s.to_string();
+ s.retain(|c| c != '_');
+ return Symbol::intern(&s);
+ }
+ symbol
+}
+
+fn filtered_float_lit(
+ symbol: Symbol,
+ suffix: Option<Symbol>,
+ base: u32,
+) -> Result<LitKind, LitError> {
+ debug!("filtered_float_lit: {:?}, {:?}, {:?}", symbol, suffix, base);
+ if base != 10 {
+ return Err(LitError::NonDecimalFloat(base));
+ }
+ Ok(match suffix {
+ Some(suf) => LitKind::Float(
+ symbol,
+ ast::LitFloatType::Suffixed(match suf {
+ sym::f32 => ast::FloatTy::F32,
+ sym::f64 => ast::FloatTy::F64,
+ _ => return Err(LitError::InvalidFloatSuffix),
+ }),
+ ),
+ None => LitKind::Float(symbol, ast::LitFloatType::Unsuffixed),
+ })
+}
+
+fn float_lit(symbol: Symbol, suffix: Option<Symbol>) -> Result<LitKind, LitError> {
+ debug!("float_lit: {:?}, {:?}", symbol, suffix);
+ filtered_float_lit(strip_underscores(symbol), suffix, 10)
+}
+
+fn integer_lit(symbol: Symbol, suffix: Option<Symbol>) -> Result<LitKind, LitError> {
+ debug!("integer_lit: {:?}, {:?}", symbol, suffix);
+ let symbol = strip_underscores(symbol);
+ let s = symbol.as_str();
+
+ let base = match s.as_bytes() {
+ [b'0', b'x', ..] => 16,
+ [b'0', b'o', ..] => 8,
+ [b'0', b'b', ..] => 2,
+ _ => 10,
+ };
+
+ let ty = match suffix {
+ Some(suf) => match suf {
+ sym::isize => ast::LitIntType::Signed(ast::IntTy::Isize),
+ sym::i8 => ast::LitIntType::Signed(ast::IntTy::I8),
+ sym::i16 => ast::LitIntType::Signed(ast::IntTy::I16),
+ sym::i32 => ast::LitIntType::Signed(ast::IntTy::I32),
+ sym::i64 => ast::LitIntType::Signed(ast::IntTy::I64),
+ sym::i128 => ast::LitIntType::Signed(ast::IntTy::I128),
+ sym::usize => ast::LitIntType::Unsigned(ast::UintTy::Usize),
+ sym::u8 => ast::LitIntType::Unsigned(ast::UintTy::U8),
+ sym::u16 => ast::LitIntType::Unsigned(ast::UintTy::U16),
+ sym::u32 => ast::LitIntType::Unsigned(ast::UintTy::U32),
+ sym::u64 => ast::LitIntType::Unsigned(ast::UintTy::U64),
+ sym::u128 => ast::LitIntType::Unsigned(ast::UintTy::U128),
+ // `1f64` and `2f32` etc. are valid float literals, and
+ // `fxxx` looks more like an invalid float literal than invalid integer literal.
+ _ if suf.as_str().starts_with('f') => return filtered_float_lit(symbol, suffix, base),
+ _ => return Err(LitError::InvalidIntSuffix),
+ },
+ _ => ast::LitIntType::Unsuffixed,
+ };
+
+ let s = &s[if base != 10 { 2 } else { 0 }..];
+ u128::from_str_radix(s, base).map(|i| LitKind::Int(i, ty)).map_err(|_| {
+ // Small bases are lexed as if they were base 10, e.g, the string
+ // might be `0b10201`. This will cause the conversion above to fail,
+ // but these kinds of errors are already reported by the lexer.
+ let from_lexer =
+ base < 10 && s.chars().any(|c| c.to_digit(10).map_or(false, |d| d >= base));
+ if from_lexer { LitError::LexerError } else { LitError::IntTooLarge }
+ })
+}
diff --git a/compiler/rustc_ast/src/util/parser.rs b/compiler/rustc_ast/src/util/parser.rs
new file mode 100644
index 000000000..74b7fe9e2
--- /dev/null
+++ b/compiler/rustc_ast/src/util/parser.rs
@@ -0,0 +1,406 @@
+use crate::ast::{self, BinOpKind};
+use crate::token::{self, BinOpToken, Token};
+use rustc_span::symbol::kw;
+
+/// Associative operator with precedence.
+///
+/// This is the enum which specifies operator precedence and fixity to the parser.
+#[derive(Copy, Clone, PartialEq, Debug)]
+pub enum AssocOp {
+ /// `+`
+ Add,
+ /// `-`
+ Subtract,
+ /// `*`
+ Multiply,
+ /// `/`
+ Divide,
+ /// `%`
+ Modulus,
+ /// `&&`
+ LAnd,
+ /// `||`
+ LOr,
+ /// `^`
+ BitXor,
+ /// `&`
+ BitAnd,
+ /// `|`
+ BitOr,
+ /// `<<`
+ ShiftLeft,
+ /// `>>`
+ ShiftRight,
+ /// `==`
+ Equal,
+ /// `<`
+ Less,
+ /// `<=`
+ LessEqual,
+ /// `!=`
+ NotEqual,
+ /// `>`
+ Greater,
+ /// `>=`
+ GreaterEqual,
+ /// `=`
+ Assign,
+ /// `?=` where ? is one of the BinOpToken
+ AssignOp(BinOpToken),
+ /// `as`
+ As,
+ /// `..` range
+ DotDot,
+ /// `..=` range
+ DotDotEq,
+ /// `:`
+ Colon,
+}
+
+#[derive(PartialEq, Debug)]
+pub enum Fixity {
+ /// The operator is left-associative
+ Left,
+ /// The operator is right-associative
+ Right,
+ /// The operator is not associative
+ None,
+}
+
+impl AssocOp {
+ /// Creates a new AssocOP from a token
+ pub fn from_token(t: &Token) -> Option<AssocOp> {
+ use AssocOp::*;
+ match t.kind {
+ token::BinOpEq(k) => Some(AssignOp(k)),
+ token::Eq => Some(Assign),
+ token::BinOp(BinOpToken::Star) => Some(Multiply),
+ token::BinOp(BinOpToken::Slash) => Some(Divide),
+ token::BinOp(BinOpToken::Percent) => Some(Modulus),
+ token::BinOp(BinOpToken::Plus) => Some(Add),
+ token::BinOp(BinOpToken::Minus) => Some(Subtract),
+ token::BinOp(BinOpToken::Shl) => Some(ShiftLeft),
+ token::BinOp(BinOpToken::Shr) => Some(ShiftRight),
+ token::BinOp(BinOpToken::And) => Some(BitAnd),
+ token::BinOp(BinOpToken::Caret) => Some(BitXor),
+ token::BinOp(BinOpToken::Or) => Some(BitOr),
+ token::Lt => Some(Less),
+ token::Le => Some(LessEqual),
+ token::Ge => Some(GreaterEqual),
+ token::Gt => Some(Greater),
+ token::EqEq => Some(Equal),
+ token::Ne => Some(NotEqual),
+ token::AndAnd => Some(LAnd),
+ token::OrOr => Some(LOr),
+ token::DotDot => Some(DotDot),
+ token::DotDotEq => Some(DotDotEq),
+ // DotDotDot is no longer supported, but we need some way to display the error
+ token::DotDotDot => Some(DotDotEq),
+ token::Colon => Some(Colon),
+ // `<-` should probably be `< -`
+ token::LArrow => Some(Less),
+ _ if t.is_keyword(kw::As) => Some(As),
+ _ => None,
+ }
+ }
+
+ /// Creates a new AssocOp from ast::BinOpKind.
+ pub fn from_ast_binop(op: BinOpKind) -> Self {
+ use AssocOp::*;
+ match op {
+ BinOpKind::Lt => Less,
+ BinOpKind::Gt => Greater,
+ BinOpKind::Le => LessEqual,
+ BinOpKind::Ge => GreaterEqual,
+ BinOpKind::Eq => Equal,
+ BinOpKind::Ne => NotEqual,
+ BinOpKind::Mul => Multiply,
+ BinOpKind::Div => Divide,
+ BinOpKind::Rem => Modulus,
+ BinOpKind::Add => Add,
+ BinOpKind::Sub => Subtract,
+ BinOpKind::Shl => ShiftLeft,
+ BinOpKind::Shr => ShiftRight,
+ BinOpKind::BitAnd => BitAnd,
+ BinOpKind::BitXor => BitXor,
+ BinOpKind::BitOr => BitOr,
+ BinOpKind::And => LAnd,
+ BinOpKind::Or => LOr,
+ }
+ }
+
+ /// Gets the precedence of this operator
+ pub fn precedence(&self) -> usize {
+ use AssocOp::*;
+ match *self {
+ As | Colon => 14,
+ Multiply | Divide | Modulus => 13,
+ Add | Subtract => 12,
+ ShiftLeft | ShiftRight => 11,
+ BitAnd => 10,
+ BitXor => 9,
+ BitOr => 8,
+ Less | Greater | LessEqual | GreaterEqual | Equal | NotEqual => 7,
+ LAnd => 6,
+ LOr => 5,
+ DotDot | DotDotEq => 4,
+ Assign | AssignOp(_) => 2,
+ }
+ }
+
+ /// Gets the fixity of this operator
+ pub fn fixity(&self) -> Fixity {
+ use AssocOp::*;
+ // NOTE: it is a bug to have an operators that has same precedence but different fixities!
+ match *self {
+ Assign | AssignOp(_) => Fixity::Right,
+ As | Multiply | Divide | Modulus | Add | Subtract | ShiftLeft | ShiftRight | BitAnd
+ | BitXor | BitOr | Less | Greater | LessEqual | GreaterEqual | Equal | NotEqual
+ | LAnd | LOr | Colon => Fixity::Left,
+ DotDot | DotDotEq => Fixity::None,
+ }
+ }
+
+ pub fn is_comparison(&self) -> bool {
+ use AssocOp::*;
+ match *self {
+ Less | Greater | LessEqual | GreaterEqual | Equal | NotEqual => true,
+ Assign | AssignOp(_) | As | Multiply | Divide | Modulus | Add | Subtract
+ | ShiftLeft | ShiftRight | BitAnd | BitXor | BitOr | LAnd | LOr | DotDot | DotDotEq
+ | Colon => false,
+ }
+ }
+
+ pub fn is_assign_like(&self) -> bool {
+ use AssocOp::*;
+ match *self {
+ Assign | AssignOp(_) => true,
+ Less | Greater | LessEqual | GreaterEqual | Equal | NotEqual | As | Multiply
+ | Divide | Modulus | Add | Subtract | ShiftLeft | ShiftRight | BitAnd | BitXor
+ | BitOr | LAnd | LOr | DotDot | DotDotEq | Colon => false,
+ }
+ }
+
+ pub fn to_ast_binop(&self) -> Option<BinOpKind> {
+ use AssocOp::*;
+ match *self {
+ Less => Some(BinOpKind::Lt),
+ Greater => Some(BinOpKind::Gt),
+ LessEqual => Some(BinOpKind::Le),
+ GreaterEqual => Some(BinOpKind::Ge),
+ Equal => Some(BinOpKind::Eq),
+ NotEqual => Some(BinOpKind::Ne),
+ Multiply => Some(BinOpKind::Mul),
+ Divide => Some(BinOpKind::Div),
+ Modulus => Some(BinOpKind::Rem),
+ Add => Some(BinOpKind::Add),
+ Subtract => Some(BinOpKind::Sub),
+ ShiftLeft => Some(BinOpKind::Shl),
+ ShiftRight => Some(BinOpKind::Shr),
+ BitAnd => Some(BinOpKind::BitAnd),
+ BitXor => Some(BinOpKind::BitXor),
+ BitOr => Some(BinOpKind::BitOr),
+ LAnd => Some(BinOpKind::And),
+ LOr => Some(BinOpKind::Or),
+ Assign | AssignOp(_) | As | DotDot | DotDotEq | Colon => None,
+ }
+ }
+
+ /// This operator could be used to follow a block unambiguously.
+ ///
+ /// This is used for error recovery at the moment, providing a suggestion to wrap blocks with
+ /// parentheses while having a high degree of confidence on the correctness of the suggestion.
+ pub fn can_continue_expr_unambiguously(&self) -> bool {
+ use AssocOp::*;
+ matches!(
+ self,
+ BitXor | // `{ 42 } ^ 3`
+ Assign | // `{ 42 } = { 42 }`
+ Divide | // `{ 42 } / 42`
+ Modulus | // `{ 42 } % 2`
+ ShiftRight | // `{ 42 } >> 2`
+ LessEqual | // `{ 42 } <= 3`
+ Greater | // `{ 42 } > 3`
+ GreaterEqual | // `{ 42 } >= 3`
+ AssignOp(_) | // `{ 42 } +=`
+ As | // `{ 42 } as usize`
+ // Equal | // `{ 42 } == { 42 }` Accepting these here would regress incorrect
+ // NotEqual | // `{ 42 } != { 42 } struct literals parser recovery.
+ Colon, // `{ 42 }: usize`
+ )
+ }
+}
+
+pub const PREC_CLOSURE: i8 = -40;
+pub const PREC_JUMP: i8 = -30;
+pub const PREC_RANGE: i8 = -10;
+// The range 2..=14 is reserved for AssocOp binary operator precedences.
+pub const PREC_PREFIX: i8 = 50;
+pub const PREC_POSTFIX: i8 = 60;
+pub const PREC_PAREN: i8 = 99;
+pub const PREC_FORCE_PAREN: i8 = 100;
+
+#[derive(Debug, Clone, Copy)]
+pub enum ExprPrecedence {
+ Closure,
+ Break,
+ Continue,
+ Ret,
+ Yield,
+ Yeet,
+
+ Range,
+
+ Binary(BinOpKind),
+
+ Cast,
+ Type,
+
+ Assign,
+ AssignOp,
+
+ Box,
+ AddrOf,
+ Let,
+ Unary,
+
+ Call,
+ MethodCall,
+ Field,
+ Index,
+ Try,
+ InlineAsm,
+ Mac,
+
+ Array,
+ Repeat,
+ Tup,
+ Lit,
+ Path,
+ Paren,
+ If,
+ While,
+ ForLoop,
+ Loop,
+ Match,
+ ConstBlock,
+ Block,
+ TryBlock,
+ Struct,
+ Async,
+ Await,
+ Err,
+}
+
+impl ExprPrecedence {
+ pub fn order(self) -> i8 {
+ match self {
+ ExprPrecedence::Closure => PREC_CLOSURE,
+
+ ExprPrecedence::Break |
+ ExprPrecedence::Continue |
+ ExprPrecedence::Ret |
+ ExprPrecedence::Yield |
+ ExprPrecedence::Yeet => PREC_JUMP,
+
+ // `Range` claims to have higher precedence than `Assign`, but `x .. x = x` fails to
+ // parse, instead of parsing as `(x .. x) = x`. Giving `Range` a lower precedence
+ // ensures that `pprust` will add parentheses in the right places to get the desired
+ // parse.
+ ExprPrecedence::Range => PREC_RANGE,
+
+ // Binop-like expr kinds, handled by `AssocOp`.
+ ExprPrecedence::Binary(op) => AssocOp::from_ast_binop(op).precedence() as i8,
+ ExprPrecedence::Cast => AssocOp::As.precedence() as i8,
+ ExprPrecedence::Type => AssocOp::Colon.precedence() as i8,
+
+ ExprPrecedence::Assign |
+ ExprPrecedence::AssignOp => AssocOp::Assign.precedence() as i8,
+
+ // Unary, prefix
+ ExprPrecedence::Box |
+ ExprPrecedence::AddrOf |
+ // Here `let pats = expr` has `let pats =` as a "unary" prefix of `expr`.
+ // However, this is not exactly right. When `let _ = a` is the LHS of a binop we
+ // need parens sometimes. E.g. we can print `(let _ = a) && b` as `let _ = a && b`
+ // but we need to print `(let _ = a) < b` as-is with parens.
+ ExprPrecedence::Let |
+ ExprPrecedence::Unary => PREC_PREFIX,
+
+ // Unary, postfix
+ ExprPrecedence::Await |
+ ExprPrecedence::Call |
+ ExprPrecedence::MethodCall |
+ ExprPrecedence::Field |
+ ExprPrecedence::Index |
+ ExprPrecedence::Try |
+ ExprPrecedence::InlineAsm |
+ ExprPrecedence::Mac => PREC_POSTFIX,
+
+ // Never need parens
+ ExprPrecedence::Array |
+ ExprPrecedence::Repeat |
+ ExprPrecedence::Tup |
+ ExprPrecedence::Lit |
+ ExprPrecedence::Path |
+ ExprPrecedence::Paren |
+ ExprPrecedence::If |
+ ExprPrecedence::While |
+ ExprPrecedence::ForLoop |
+ ExprPrecedence::Loop |
+ ExprPrecedence::Match |
+ ExprPrecedence::ConstBlock |
+ ExprPrecedence::Block |
+ ExprPrecedence::TryBlock |
+ ExprPrecedence::Async |
+ ExprPrecedence::Struct |
+ ExprPrecedence::Err => PREC_PAREN,
+ }
+ }
+}
+
+/// In `let p = e`, operators with precedence `<=` this one requires parentheses in `e`.
+pub fn prec_let_scrutinee_needs_par() -> usize {
+ AssocOp::LAnd.precedence()
+}
+
+/// Suppose we have `let _ = e` and the `order` of `e`.
+/// Is the `order` such that `e` in `let _ = e` needs parentheses when it is on the RHS?
+///
+/// Conversely, suppose that we have `(let _ = a) OP b` and `order` is that of `OP`.
+/// Can we print this as `let _ = a OP b`?
+pub fn needs_par_as_let_scrutinee(order: i8) -> bool {
+ order <= prec_let_scrutinee_needs_par() as i8
+}
+
+/// Expressions that syntactically contain an "exterior" struct literal i.e., not surrounded by any
+/// parens or other delimiters, e.g., `X { y: 1 }`, `X { y: 1 }.method()`, `foo == X { y: 1 }` and
+/// `X { y: 1 } == foo` all do, but `(X { y: 1 }) == foo` does not.
+pub fn contains_exterior_struct_lit(value: &ast::Expr) -> bool {
+ match value.kind {
+ ast::ExprKind::Struct(..) => true,
+
+ ast::ExprKind::Assign(ref lhs, ref rhs, _)
+ | ast::ExprKind::AssignOp(_, ref lhs, ref rhs)
+ | ast::ExprKind::Binary(_, ref lhs, ref rhs) => {
+ // X { y: 1 } + X { y: 2 }
+ contains_exterior_struct_lit(&lhs) || contains_exterior_struct_lit(&rhs)
+ }
+ ast::ExprKind::Await(ref x)
+ | ast::ExprKind::Unary(_, ref x)
+ | ast::ExprKind::Cast(ref x, _)
+ | ast::ExprKind::Type(ref x, _)
+ | ast::ExprKind::Field(ref x, _)
+ | ast::ExprKind::Index(ref x, _) => {
+ // &X { y: 1 }, X { y: 1 }.y
+ contains_exterior_struct_lit(&x)
+ }
+
+ ast::ExprKind::MethodCall(.., ref exprs, _) => {
+ // X { y: 1 }.bar(...)
+ contains_exterior_struct_lit(&exprs[0])
+ }
+
+ _ => false,
+ }
+}
diff --git a/compiler/rustc_ast/src/util/unicode.rs b/compiler/rustc_ast/src/util/unicode.rs
new file mode 100644
index 000000000..f009f7b30
--- /dev/null
+++ b/compiler/rustc_ast/src/util/unicode.rs
@@ -0,0 +1,35 @@
+pub const TEXT_FLOW_CONTROL_CHARS: &[char] = &[
+ '\u{202A}', '\u{202B}', '\u{202D}', '\u{202E}', '\u{2066}', '\u{2067}', '\u{2068}', '\u{202C}',
+ '\u{2069}',
+];
+
+#[inline]
+pub fn contains_text_flow_control_chars(s: &str) -> bool {
+ // Char - UTF-8
+ // U+202A - E2 80 AA
+ // U+202B - E2 80 AB
+ // U+202C - E2 80 AC
+ // U+202D - E2 80 AD
+ // U+202E - E2 80 AE
+ // U+2066 - E2 81 A6
+ // U+2067 - E2 81 A7
+ // U+2068 - E2 81 A8
+ // U+2069 - E2 81 A9
+ let mut bytes = s.as_bytes();
+ loop {
+ match core::slice::memchr::memchr(0xE2, &bytes) {
+ Some(idx) => {
+ // bytes are valid UTF-8 -> E2 must be followed by two bytes
+ let ch = &bytes[idx..idx + 3];
+ match ch {
+ [_, 0x80, 0xAA..=0xAE] | [_, 0x81, 0xA6..=0xA9] => break true,
+ _ => {}
+ }
+ bytes = &bytes[idx + 3..];
+ }
+ None => {
+ break false;
+ }
+ }
+ }
+}