Adding upstream version 1.64.0+dfsg1.upstream/1.64.0+dfsg1

Signed-off-by: Daniel Baumann <daniel.baumann@progress-linux.org>
author: Daniel Baumann <daniel.baumann@progress-linux.org> 2024-04-17 12:02:58 +0000
committer: Daniel Baumann <daniel.baumann@progress-linux.org> 2024-04-17 12:02:58 +0000
commit: 698f8c2f01ea549d77d7dc3338a12e04c11057b9 (patch)
tree: 173a775858bd501c378080a10dca74132f05bc50 /compiler/rustc_ast/src/util
parent: Initial commit. (diff)
download: rustc-698f8c2f01ea549d77d7dc3338a12e04c11057b9.tar.xz
rustc-698f8c2f01ea549d77d7dc3338a12e04c11057b9.zip
6 files changed, 1145 insertions, 0 deletions
diff --git a/compiler/rustc_ast/src/util/classify.rs b/compiler/rustc_ast/src/util/classify.rs
new file mode 100644
index 000000000..6ea3db6d3
--- /dev/null
+++ b/compiler/rustc_ast/src/util/classify.rs
@@ -0,0 +1,52 @@
+//! Routines the parser uses to classify AST nodes
+
+// Predicates on exprs and stmts that the pretty-printer and parser use
+
+use crate::ast;
+
+/// Does this expression require a semicolon to be treated
+/// as a statement? The negation of this: 'can this expression
+/// be used as a statement without a semicolon' -- is used
+/// as an early-bail-out in the parser so that, for instance,
+///     if true {...} else {...}
+///      |x| 5
+/// isn't parsed as (if true {...} else {...} | x) | 5
+pub fn expr_requires_semi_to_be_stmt(e: &ast::Expr) -> bool {
+    !matches!(
+        e.kind,
+        ast::ExprKind::If(..)
+            | ast::ExprKind::Match(..)
+            | ast::ExprKind::Block(..)
+            | ast::ExprKind::While(..)
+            | ast::ExprKind::Loop(..)
+            | ast::ExprKind::ForLoop(..)
+            | ast::ExprKind::TryBlock(..)
+    )
+}
+
+/// If an expression ends with `}`, returns the innermost expression ending in the `}`
+pub fn expr_trailing_brace(mut expr: &ast::Expr) -> Option<&ast::Expr> {
+    use ast::ExprKind::*;
+
+    loop {
+        match &expr.kind {
+            AddrOf(_, _, e)
+            | Assign(_, e, _)
+            | AssignOp(_, _, e)
+            | Binary(_, _, e)
+            | Box(e)
+            | Break(_, Some(e))
+            | Closure(.., e, _)
+            | Let(_, e, _)
+            | Range(_, Some(e), _)
+            | Ret(Some(e))
+            | Unary(_, e)
+            | Yield(Some(e)) => {
+                expr = e;
+            }
+            Async(..) | Block(..) | ForLoop(..) | If(..) | Loop(..) | Match(..) | Struct(..)
+            | TryBlock(..) | While(..) => break Some(expr),
+            _ => break None,
+        }
+    }
+}
diff --git a/compiler/rustc_ast/src/util/comments.rs b/compiler/rustc_ast/src/util/comments.rs
new file mode 100644
index 000000000..c96474ccb
--- /dev/null
+++ b/compiler/rustc_ast/src/util/comments.rs
@@ -0,0 +1,255 @@
+use crate::token::CommentKind;
+use rustc_span::source_map::SourceMap;
+use rustc_span::{BytePos, CharPos, FileName, Pos, Symbol};
+
+#[cfg(test)]
+mod tests;
+
+#[derive(Clone, Copy, PartialEq, Debug)]
+pub enum CommentStyle {
+    /// No code on either side of each line of the comment
+    Isolated,
+    /// Code exists to the left of the comment
+    Trailing,
+    /// Code before /* foo */ and after the comment
+    Mixed,
+    /// Just a manual blank line "\n\n", for layout
+    BlankLine,
+}
+
+#[derive(Clone)]
+pub struct Comment {
+    pub style: CommentStyle,
+    pub lines: Vec<String>,
+    pub pos: BytePos,
+}
+
+/// A fast conservative estimate on whether the string can contain documentation links.
+/// A pair of square brackets `[]` must exist in the string, but we only search for the
+/// opening bracket because brackets always go in pairs in practice.
+#[inline]
+pub fn may_have_doc_links(s: &str) -> bool {
+    s.contains('[')
+}
+
+/// Makes a doc string more presentable to users.
+/// Used by rustdoc and perhaps other tools, but not by rustc.
+pub fn beautify_doc_string(data: Symbol, kind: CommentKind) -> Symbol {
+    fn get_vertical_trim(lines: &[&str]) -> Option<(usize, usize)> {
+        let mut i = 0;
+        let mut j = lines.len();
+        // first line of all-stars should be omitted
+        if !lines.is_empty() && lines[0].chars().all(|c| c == '*') {
+            i += 1;
+        }
+
+        // like the first, a last line of all stars should be omitted
+        if j > i && !lines[j - 1].is_empty() && lines[j - 1].chars().all(|c| c == '*') {
+            j -= 1;
+        }
+
+        if i != 0 || j != lines.len() { Some((i, j)) } else { None }
+    }
+
+    fn get_horizontal_trim<'a>(lines: &'a [&str], kind: CommentKind) -> Option<String> {
+        let mut i = usize::MAX;
+        let mut first = true;
+
+        // In case we have doc comments like `/**` or `/*!`, we want to remove stars if they are
+        // present. However, we first need to strip the empty lines so they don't get in the middle
+        // when we try to compute the "horizontal trim".
+        let lines = if kind == CommentKind::Block {
+            // Whatever happens, we skip the first line.
+            let mut i = lines
+                .get(0)
+                .map(|l| if l.trim_start().starts_with('*') { 0 } else { 1 })
+                .unwrap_or(0);
+            let mut j = lines.len();
+
+            while i < j && lines[i].trim().is_empty() {
+                i += 1;
+            }
+            while j > i && lines[j - 1].trim().is_empty() {
+                j -= 1;
+            }
+            &lines[i..j]
+        } else {
+            lines
+        };
+
+        for line in lines {
+            for (j, c) in line.chars().enumerate() {
+                if j > i || !"* \t".contains(c) {
+                    return None;
+                }
+                if c == '*' {
+                    if first {
+                        i = j;
+                        first = false;
+                    } else if i != j {
+                        return None;
+                    }
+                    break;
+                }
+            }
+            if i >= line.len() {
+                return None;
+            }
+        }
+        if lines.is_empty() { None } else { Some(lines[0][..i].into()) }
+    }
+
+    let data_s = data.as_str();
+    if data_s.contains('\n') {
+        let mut lines = data_s.lines().collect::<Vec<&str>>();
+        let mut changes = false;
+        let lines = if let Some((i, j)) = get_vertical_trim(&lines) {
+            changes = true;
+            // remove whitespace-only lines from the start/end of lines
+            &mut lines[i..j]
+        } else {
+            &mut lines
+        };
+        if let Some(horizontal) = get_horizontal_trim(&lines, kind) {
+            changes = true;
+            // remove a "[ \t]*\*" block from each line, if possible
+            for line in lines.iter_mut() {
+                if let Some(tmp) = line.strip_prefix(&horizontal) {
+                    *line = tmp;
+                    if kind == CommentKind::Block
+                        && (*line == "*" || line.starts_with("* ") || line.starts_with("**"))
+                    {
+                        *line = &line[1..];
+                    }
+                }
+            }
+        }
+        if changes {
+            return Symbol::intern(&lines.join("\n"));
+        }
+    }
+    data
+}
+
+/// Returns `None` if the first `col` chars of `s` contain a non-whitespace char.
+/// Otherwise returns `Some(k)` where `k` is first char offset after that leading
+/// whitespace. Note that `k` may be outside bounds of `s`.
+fn all_whitespace(s: &str, col: CharPos) -> Option<usize> {
+    let mut idx = 0;
+    for (i, ch) in s.char_indices().take(col.to_usize()) {
+        if !ch.is_whitespace() {
+            return None;
+        }
+        idx = i + ch.len_utf8();
+    }
+    Some(idx)
+}
+
+fn trim_whitespace_prefix(s: &str, col: CharPos) -> &str {
+    let len = s.len();
+    match all_whitespace(&s, col) {
+        Some(col) => {
+            if col < len {
+                &s[col..]
+            } else {
+                ""
+            }
+        }
+        None => s,
+    }
+}
+
+fn split_block_comment_into_lines(text: &str, col: CharPos) -> Vec<String> {
+    let mut res: Vec<String> = vec![];
+    let mut lines = text.lines();
+    // just push the first line
+    res.extend(lines.next().map(|it| it.to_string()));
+    // for other lines, strip common whitespace prefix
+    for line in lines {
+        res.push(trim_whitespace_prefix(line, col).to_string())
+    }
+    res
+}
+
+// it appears this function is called only from pprust... that's
+// probably not a good thing.
+pub fn gather_comments(sm: &SourceMap, path: FileName, src: String) -> Vec<Comment> {
+    let sm = SourceMap::new(sm.path_mapping().clone());
+    let source_file = sm.new_source_file(path, src);
+    let text = (*source_file.src.as_ref().unwrap()).clone();
+
+    let text: &str = text.as_str();
+    let start_bpos = source_file.start_pos;
+    let mut pos = 0;
+    let mut comments: Vec<Comment> = Vec::new();
+    let mut code_to_the_left = false;
+
+    if let Some(shebang_len) = rustc_lexer::strip_shebang(text) {
+        comments.push(Comment {
+            style: CommentStyle::Isolated,
+            lines: vec![text[..shebang_len].to_string()],
+            pos: start_bpos,
+        });
+        pos += shebang_len;
+    }
+
+    for token in rustc_lexer::tokenize(&text[pos..]) {
+        let token_text = &text[pos..pos + token.len as usize];
+        match token.kind {
+            rustc_lexer::TokenKind::Whitespace => {
+                if let Some(mut idx) = token_text.find('\n') {
+                    code_to_the_left = false;
+                    while let Some(next_newline) = &token_text[idx + 1..].find('\n') {
+                        idx += 1 + next_newline;
+                        comments.push(Comment {
+                            style: CommentStyle::BlankLine,
+                            lines: vec![],
+                            pos: start_bpos + BytePos((pos + idx) as u32),
+                        });
+                    }
+                }
+            }
+            rustc_lexer::TokenKind::BlockComment { doc_style, .. } => {
+                if doc_style.is_none() {
+                    let code_to_the_right = !matches!(
+                        text[pos + token.len as usize..].chars().next(),
+                        Some('\r' | '\n')
+                    );
+                    let style = match (code_to_the_left, code_to_the_right) {
+                        (_, true) => CommentStyle::Mixed,
+                        (false, false) => CommentStyle::Isolated,
+                        (true, false) => CommentStyle::Trailing,
+                    };
+
+                    // Count the number of chars since the start of the line by rescanning.
+                    let pos_in_file = start_bpos + BytePos(pos as u32);
+                    let line_begin_in_file = source_file.line_begin_pos(pos_in_file);
+                    let line_begin_pos = (line_begin_in_file - start_bpos).to_usize();
+                    let col = CharPos(text[line_begin_pos..pos].chars().count());
+
+                    let lines = split_block_comment_into_lines(token_text, col);
+                    comments.push(Comment { style, lines, pos: pos_in_file })
+                }
+            }
+            rustc_lexer::TokenKind::LineComment { doc_style } => {
+                if doc_style.is_none() {
+                    comments.push(Comment {
+                        style: if code_to_the_left {
+                            CommentStyle::Trailing
+                        } else {
+                            CommentStyle::Isolated
+                        },
+                        lines: vec![token_text.to_string()],
+                        pos: start_bpos + BytePos(pos as u32),
+                    })
+                }
+            }
+            _ => {
+                code_to_the_left = true;
+            }
+        }
+        pos += token.len as usize;
+    }
+
+    comments
+}
diff --git a/compiler/rustc_ast/src/util/comments/tests.rs b/compiler/rustc_ast/src/util/comments/tests.rs
new file mode 100644
index 000000000..11d50603a
--- /dev/null
+++ b/compiler/rustc_ast/src/util/comments/tests.rs
@@ -0,0 +1,61 @@
+use super::*;
+use rustc_span::create_default_session_globals_then;
+
+#[test]
+fn test_block_doc_comment_1() {
+    create_default_session_globals_then(|| {
+        let comment = "\n * Test \n **  Test\n *   Test\n";
+        let stripped = beautify_doc_string(Symbol::intern(comment), CommentKind::Block);
+        assert_eq!(stripped.as_str(), " Test \n*  Test\n   Test");
+    })
+}
+
+#[test]
+fn test_block_doc_comment_2() {
+    create_default_session_globals_then(|| {
+        let comment = "\n * Test\n *  Test\n";
+        let stripped = beautify_doc_string(Symbol::intern(comment), CommentKind::Block);
+        assert_eq!(stripped.as_str(), " Test\n  Test");
+    })
+}
+
+#[test]
+fn test_block_doc_comment_3() {
+    create_default_session_globals_then(|| {
+        let comment = "\n let a: *i32;\n *a = 5;\n";
+        let stripped = beautify_doc_string(Symbol::intern(comment), CommentKind::Block);
+        assert_eq!(stripped.as_str(), "let a: *i32;\n*a = 5;");
+    })
+}
+
+#[test]
+fn test_line_doc_comment() {
+    create_default_session_globals_then(|| {
+        let stripped = beautify_doc_string(Symbol::intern(" test"), CommentKind::Line);
+        assert_eq!(stripped.as_str(), " test");
+        let stripped = beautify_doc_string(Symbol::intern("! test"), CommentKind::Line);
+        assert_eq!(stripped.as_str(), "! test");
+        let stripped = beautify_doc_string(Symbol::intern("test"), CommentKind::Line);
+        assert_eq!(stripped.as_str(), "test");
+        let stripped = beautify_doc_string(Symbol::intern("!test"), CommentKind::Line);
+        assert_eq!(stripped.as_str(), "!test");
+    })
+}
+
+#[test]
+fn test_doc_blocks() {
+    create_default_session_globals_then(|| {
+        let stripped =
+            beautify_doc_string(Symbol::intern(" # Returns\n     *\n     "), CommentKind::Block);
+        assert_eq!(stripped.as_str(), " # Returns\n\n");
+
+        let stripped = beautify_doc_string(
+            Symbol::intern("\n     * # Returns\n     *\n     "),
+            CommentKind::Block,
+        );
+        assert_eq!(stripped.as_str(), " # Returns\n\n");
+
+        let stripped = beautify_doc_string(Symbol::intern("\n *     a\n "), CommentKind::Block);
+        assert_eq!(stripped.as_str(), "     a\n");
+    })
+}
diff --git a/compiler/rustc_ast/src/util/literal.rs b/compiler/rustc_ast/src/util/literal.rs
new file mode 100644
index 000000000..9c18f55c0
--- /dev/null
+++ b/compiler/rustc_ast/src/util/literal.rs
@@ -0,0 +1,336 @@
+//! Code related to parsing literals.
+
+use crate::ast::{self, Lit, LitKind};
+use crate::token::{self, Token};
+
+use rustc_lexer::unescape::{unescape_byte, unescape_char};
+use rustc_lexer::unescape::{unescape_byte_literal, unescape_literal, Mode};
+use rustc_span::symbol::{kw, sym, Symbol};
+use rustc_span::Span;
+
+use std::ascii;
+use tracing::debug;
+
+pub enum LitError {
+    NotLiteral,
+    LexerError,
+    InvalidSuffix,
+    InvalidIntSuffix,
+    InvalidFloatSuffix,
+    NonDecimalFloat(u32),
+    IntTooLarge,
+}
+
+impl LitKind {
+    /// Converts literal token into a semantic literal.
+    pub fn from_lit_token(lit: token::Lit) -> Result<LitKind, LitError> {
+        let token::Lit { kind, symbol, suffix } = lit;
+        if suffix.is_some() && !kind.may_have_suffix() {
+            return Err(LitError::InvalidSuffix);
+        }
+
+        Ok(match kind {
+            token::Bool => {
+                assert!(symbol.is_bool_lit());
+                LitKind::Bool(symbol == kw::True)
+            }
+            token::Byte => {
+                return unescape_byte(symbol.as_str())
+                    .map(LitKind::Byte)
+                    .map_err(|_| LitError::LexerError);
+            }
+            token::Char => {
+                return unescape_char(symbol.as_str())
+                    .map(LitKind::Char)
+                    .map_err(|_| LitError::LexerError);
+            }
+
+            // There are some valid suffixes for integer and float literals,
+            // so all the handling is done internally.
+            token::Integer => return integer_lit(symbol, suffix),
+            token::Float => return float_lit(symbol, suffix),
+
+            token::Str => {
+                // If there are no characters requiring special treatment we can
+                // reuse the symbol from the token. Otherwise, we must generate a
+                // new symbol because the string in the LitKind is different to the
+                // string in the token.
+                let s = symbol.as_str();
+                let symbol = if s.contains(&['\\', '\r']) {
+                    let mut buf = String::with_capacity(s.len());
+                    let mut error = Ok(());
+                    // Force-inlining here is aggressive but the closure is
+                    // called on every char in the string, so it can be
+                    // hot in programs with many long strings.
+                    unescape_literal(
+                        &s,
+                        Mode::Str,
+                        &mut #[inline(always)]
+                        |_, unescaped_char| match unescaped_char {
+                            Ok(c) => buf.push(c),
+                            Err(err) => {
+                                if err.is_fatal() {
+                                    error = Err(LitError::LexerError);
+                                }
+                            }
+                        },
+                    );
+                    error?;
+                    Symbol::intern(&buf)
+                } else {
+                    symbol
+                };
+                LitKind::Str(symbol, ast::StrStyle::Cooked)
+            }
+            token::StrRaw(n) => {
+                // Ditto.
+                let s = symbol.as_str();
+                let symbol =
+                    if s.contains('\r') {
+                        let mut buf = String::with_capacity(s.len());
+                        let mut error = Ok(());
+                        unescape_literal(&s, Mode::RawStr, &mut |_, unescaped_char| {
+                            match unescaped_char {
+                                Ok(c) => buf.push(c),
+                                Err(err) => {
+                                    if err.is_fatal() {
+                                        error = Err(LitError::LexerError);
+                                    }
+                                }
+                            }
+                        });
+                        error?;
+                        Symbol::intern(&buf)
+                    } else {
+                        symbol
+                    };
+                LitKind::Str(symbol, ast::StrStyle::Raw(n))
+            }
+            token::ByteStr => {
+                let s = symbol.as_str();
+                let mut buf = Vec::with_capacity(s.len());
+                let mut error = Ok(());
+                unescape_byte_literal(&s, Mode::ByteStr, &mut |_, unescaped_byte| {
+                    match unescaped_byte {
+                        Ok(c) => buf.push(c),
+                        Err(err) => {
+                            if err.is_fatal() {
+                                error = Err(LitError::LexerError);
+                            }
+                        }
+                    }
+                });
+                error?;
+                LitKind::ByteStr(buf.into())
+            }
+            token::ByteStrRaw(_) => {
+                let s = symbol.as_str();
+                let bytes = if s.contains('\r') {
+                    let mut buf = Vec::with_capacity(s.len());
+                    let mut error = Ok(());
+                    unescape_byte_literal(&s, Mode::RawByteStr, &mut |_, unescaped_byte| {
+                        match unescaped_byte {
+                            Ok(c) => buf.push(c),
+                            Err(err) => {
+                                if err.is_fatal() {
+                                    error = Err(LitError::LexerError);
+                                }
+                            }
+                        }
+                    });
+                    error?;
+                    buf
+                } else {
+                    symbol.to_string().into_bytes()
+                };
+
+                LitKind::ByteStr(bytes.into())
+            }
+            token::Err => LitKind::Err(symbol),
+        })
+    }
+
+    /// Attempts to recover a token from semantic literal.
+    /// This function is used when the original token doesn't exist (e.g. the literal is created
+    /// by an AST-based macro) or unavailable (e.g. from HIR pretty-printing).
+    pub fn to_lit_token(&self) -> token::Lit {
+        let (kind, symbol, suffix) = match *self {
+            LitKind::Str(symbol, ast::StrStyle::Cooked) => {
+                // Don't re-intern unless the escaped string is different.
+                let s = symbol.as_str();
+                let escaped = s.escape_default().to_string();
+                let symbol = if s == escaped { symbol } else { Symbol::intern(&escaped) };
+                (token::Str, symbol, None)
+            }
+            LitKind::Str(symbol, ast::StrStyle::Raw(n)) => (token::StrRaw(n), symbol, None),
+            LitKind::ByteStr(ref bytes) => {
+                let string = bytes
+                    .iter()
+                    .cloned()
+                    .flat_map(ascii::escape_default)
+                    .map(Into::<char>::into)
+                    .collect::<String>();
+                (token::ByteStr, Symbol::intern(&string), None)
+            }
+            LitKind::Byte(byte) => {
+                let string: String = ascii::escape_default(byte).map(Into::<char>::into).collect();
+                (token::Byte, Symbol::intern(&string), None)
+            }
+            LitKind::Char(ch) => {
+                let string: String = ch.escape_default().map(Into::<char>::into).collect();
+                (token::Char, Symbol::intern(&string), None)
+            }
+            LitKind::Int(n, ty) => {
+                let suffix = match ty {
+                    ast::LitIntType::Unsigned(ty) => Some(ty.name()),
+                    ast::LitIntType::Signed(ty) => Some(ty.name()),
+                    ast::LitIntType::Unsuffixed => None,
+                };
+                (token::Integer, sym::integer(n), suffix)
+            }
+            LitKind::Float(symbol, ty) => {
+                let suffix = match ty {
+                    ast::LitFloatType::Suffixed(ty) => Some(ty.name()),
+                    ast::LitFloatType::Unsuffixed => None,
+                };
+                (token::Float, symbol, suffix)
+            }
+            LitKind::Bool(value) => {
+                let symbol = if value { kw::True } else { kw::False };
+                (token::Bool, symbol, None)
+            }
+            LitKind::Err(symbol) => (token::Err, symbol, None),
+        };
+
+        token::Lit::new(kind, symbol, suffix)
+    }
+}
+
+impl Lit {
+    /// Converts literal token into an AST literal.
+    pub fn from_lit_token(token: token::Lit, span: Span) -> Result<Lit, LitError> {
+        Ok(Lit { token, kind: LitKind::from_lit_token(token)?, span })
+    }
+
+    /// Converts arbitrary token into an AST literal.
+    ///
+    /// Keep this in sync with `Token::can_begin_literal_or_bool` excluding unary negation.
+    pub fn from_token(token: &Token) -> Result<Lit, LitError> {
+        let lit = match token.uninterpolate().kind {
+            token::Ident(name, false) if name.is_bool_lit() => {
+                token::Lit::new(token::Bool, name, None)
+            }
+            token::Literal(lit) => lit,
+            token::Interpolated(ref nt) => {
+                if let token::NtExpr(expr) | token::NtLiteral(expr) = &**nt
+                    && let ast::ExprKind::Lit(lit) = &expr.kind
+                {
+                    return Ok(lit.clone());
+                }
+                return Err(LitError::NotLiteral);
+            }
+            _ => return Err(LitError::NotLiteral),
+        };
+
+        Lit::from_lit_token(lit, token.span)
+    }
+
+    /// Attempts to recover an AST literal from semantic literal.
+    /// This function is used when the original token doesn't exist (e.g. the literal is created
+    /// by an AST-based macro) or unavailable (e.g. from HIR pretty-printing).
+    pub fn from_lit_kind(kind: LitKind, span: Span) -> Lit {
+        Lit { token: kind.to_lit_token(), kind, span }
+    }
+
+    /// Losslessly convert an AST literal into a token.
+    pub fn to_token(&self) -> Token {
+        let kind = match self.token.kind {
+            token::Bool => token::Ident(self.token.symbol, false),
+            _ => token::Literal(self.token),
+        };
+        Token::new(kind, self.span)
+    }
+}
+
+fn strip_underscores(symbol: Symbol) -> Symbol {
+    // Do not allocate a new string unless necessary.
+    let s = symbol.as_str();
+    if s.contains('_') {
+        let mut s = s.to_string();
+        s.retain(|c| c != '_');
+        return Symbol::intern(&s);
+    }
+    symbol
+}
+
+fn filtered_float_lit(
+    symbol: Symbol,
+    suffix: Option<Symbol>,
+    base: u32,
+) -> Result<LitKind, LitError> {
+    debug!("filtered_float_lit: {:?}, {:?}, {:?}", symbol, suffix, base);
+    if base != 10 {
+        return Err(LitError::NonDecimalFloat(base));
+    }
+    Ok(match suffix {
+        Some(suf) => LitKind::Float(
+            symbol,
+            ast::LitFloatType::Suffixed(match suf {
+                sym::f32 => ast::FloatTy::F32,
+                sym::f64 => ast::FloatTy::F64,
+                _ => return Err(LitError::InvalidFloatSuffix),
+            }),
+        ),
+        None => LitKind::Float(symbol, ast::LitFloatType::Unsuffixed),
+    })
+}
+
+fn float_lit(symbol: Symbol, suffix: Option<Symbol>) -> Result<LitKind, LitError> {
+    debug!("float_lit: {:?}, {:?}", symbol, suffix);
+    filtered_float_lit(strip_underscores(symbol), suffix, 10)
+}
+
+fn integer_lit(symbol: Symbol, suffix: Option<Symbol>) -> Result<LitKind, LitError> {
+    debug!("integer_lit: {:?}, {:?}", symbol, suffix);
+    let symbol = strip_underscores(symbol);
+    let s = symbol.as_str();
+
+    let base = match s.as_bytes() {
+        [b'0', b'x', ..] => 16,
+        [b'0', b'o', ..] => 8,
+        [b'0', b'b', ..] => 2,
+        _ => 10,
+    };
+
+    let ty = match suffix {
+        Some(suf) => match suf {
+            sym::isize => ast::LitIntType::Signed(ast::IntTy::Isize),
+            sym::i8 => ast::LitIntType::Signed(ast::IntTy::I8),
+            sym::i16 => ast::LitIntType::Signed(ast::IntTy::I16),
+            sym::i32 => ast::LitIntType::Signed(ast::IntTy::I32),
+            sym::i64 => ast::LitIntType::Signed(ast::IntTy::I64),
+            sym::i128 => ast::LitIntType::Signed(ast::IntTy::I128),
+            sym::usize => ast::LitIntType::Unsigned(ast::UintTy::Usize),
+            sym::u8 => ast::LitIntType::Unsigned(ast::UintTy::U8),
+            sym::u16 => ast::LitIntType::Unsigned(ast::UintTy::U16),
+            sym::u32 => ast::LitIntType::Unsigned(ast::UintTy::U32),
+            sym::u64 => ast::LitIntType::Unsigned(ast::UintTy::U64),
+            sym::u128 => ast::LitIntType::Unsigned(ast::UintTy::U128),
+            // `1f64` and `2f32` etc. are valid float literals, and
+            // `fxxx` looks more like an invalid float literal than invalid integer literal.
+            _ if suf.as_str().starts_with('f') => return filtered_float_lit(symbol, suffix, base),
+            _ => return Err(LitError::InvalidIntSuffix),
+        },
+        _ => ast::LitIntType::Unsuffixed,
+    };
+
+    let s = &s[if base != 10 { 2 } else { 0 }..];
+    u128::from_str_radix(s, base).map(|i| LitKind::Int(i, ty)).map_err(|_| {
+        // Small bases are lexed as if they were base 10, e.g, the string
+        // might be `0b10201`. This will cause the conversion above to fail,
+        // but these kinds of errors are already reported by the lexer.
+        let from_lexer =
+            base < 10 && s.chars().any(|c| c.to_digit(10).map_or(false, |d| d >= base));
+        if from_lexer { LitError::LexerError } else { LitError::IntTooLarge }
+    })
+}
diff --git a/compiler/rustc_ast/src/util/parser.rs b/compiler/rustc_ast/src/util/parser.rs
new file mode 100644
index 000000000..74b7fe9e2
--- /dev/null
+++ b/compiler/rustc_ast/src/util/parser.rs
@@ -0,0 +1,406 @@
+use crate::ast::{self, BinOpKind};
+use crate::token::{self, BinOpToken, Token};
+use rustc_span::symbol::kw;
+
+/// Associative operator with precedence.
+///
+/// This is the enum which specifies operator precedence and fixity to the parser.
+#[derive(Copy, Clone, PartialEq, Debug)]
+pub enum AssocOp {
+    /// `+`
+    Add,
+    /// `-`
+    Subtract,
+    /// `*`
+    Multiply,
+    /// `/`
+    Divide,
+    /// `%`
+    Modulus,
+    /// `&&`
+    LAnd,
+    /// `||`
+    LOr,
+    /// `^`
+    BitXor,
+    /// `&`
+    BitAnd,
+    /// `|`
+    BitOr,
+    /// `<<`
+    ShiftLeft,
+    /// `>>`
+    ShiftRight,
+    /// `==`
+    Equal,
+    /// `<`
+    Less,
+    /// `<=`
+    LessEqual,
+    /// `!=`
+    NotEqual,
+    /// `>`
+    Greater,
+    /// `>=`
+    GreaterEqual,
+    /// `=`
+    Assign,
+    /// `?=` where ? is one of the BinOpToken
+    AssignOp(BinOpToken),
+    /// `as`
+    As,
+    /// `..` range
+    DotDot,
+    /// `..=` range
+    DotDotEq,
+    /// `:`
+    Colon,
+}
+
+#[derive(PartialEq, Debug)]
+pub enum Fixity {
+    /// The operator is left-associative
+    Left,
+    /// The operator is right-associative
+    Right,
+    /// The operator is not associative
+    None,
+}
+
+impl AssocOp {
+    /// Creates a new AssocOP from a token
+    pub fn from_token(t: &Token) -> Option<AssocOp> {
+        use AssocOp::*;
+        match t.kind {
+            token::BinOpEq(k) => Some(AssignOp(k)),
+            token::Eq => Some(Assign),
+            token::BinOp(BinOpToken::Star) => Some(Multiply),
+            token::BinOp(BinOpToken::Slash) => Some(Divide),
+            token::BinOp(BinOpToken::Percent) => Some(Modulus),
+            token::BinOp(BinOpToken::Plus) => Some(Add),
+            token::BinOp(BinOpToken::Minus) => Some(Subtract),
+            token::BinOp(BinOpToken::Shl) => Some(ShiftLeft),
+            token::BinOp(BinOpToken::Shr) => Some(ShiftRight),
+            token::BinOp(BinOpToken::And) => Some(BitAnd),
+            token::BinOp(BinOpToken::Caret) => Some(BitXor),
+            token::BinOp(BinOpToken::Or) => Some(BitOr),
+            token::Lt => Some(Less),
+            token::Le => Some(LessEqual),
+            token::Ge => Some(GreaterEqual),
+            token::Gt => Some(Greater),
+            token::EqEq => Some(Equal),
+            token::Ne => Some(NotEqual),
+            token::AndAnd => Some(LAnd),
+            token::OrOr => Some(LOr),
+            token::DotDot => Some(DotDot),
+            token::DotDotEq => Some(DotDotEq),
+            // DotDotDot is no longer supported, but we need some way to display the error
+            token::DotDotDot => Some(DotDotEq),
+            token::Colon => Some(Colon),
+            // `<-` should probably be `< -`
+            token::LArrow => Some(Less),
+            _ if t.is_keyword(kw::As) => Some(As),
+            _ => None,
+        }
+    }
+
+    /// Creates a new AssocOp from ast::BinOpKind.
+    pub fn from_ast_binop(op: BinOpKind) -> Self {
+        use AssocOp::*;
+        match op {
+            BinOpKind::Lt => Less,
+            BinOpKind::Gt => Greater,
+            BinOpKind::Le => LessEqual,
+            BinOpKind::Ge => GreaterEqual,
+            BinOpKind::Eq => Equal,
+            BinOpKind::Ne => NotEqual,
+            BinOpKind::Mul => Multiply,
+            BinOpKind::Div => Divide,
+            BinOpKind::Rem => Modulus,
+            BinOpKind::Add => Add,
+            BinOpKind::Sub => Subtract,
+            BinOpKind::Shl => ShiftLeft,
+            BinOpKind::Shr => ShiftRight,
+            BinOpKind::BitAnd => BitAnd,
+            BinOpKind::BitXor => BitXor,
+            BinOpKind::BitOr => BitOr,
+            BinOpKind::And => LAnd,
+            BinOpKind::Or => LOr,
+        }
+    }
+
+    /// Gets the precedence of this operator
+    pub fn precedence(&self) -> usize {
+        use AssocOp::*;
+        match *self {
+            As | Colon => 14,
+            Multiply | Divide | Modulus => 13,
+            Add | Subtract => 12,
+            ShiftLeft | ShiftRight => 11,
+            BitAnd => 10,
+            BitXor => 9,
+            BitOr => 8,
+            Less | Greater | LessEqual | GreaterEqual | Equal | NotEqual => 7,
+            LAnd => 6,
+            LOr => 5,
+            DotDot | DotDotEq => 4,
+            Assign | AssignOp(_) => 2,
+        }
+    }
+
+    /// Gets the fixity of this operator
+    pub fn fixity(&self) -> Fixity {
+        use AssocOp::*;
+        // NOTE: it is a bug to have an operators that has same precedence but different fixities!
+        match *self {
+            Assign | AssignOp(_) => Fixity::Right,
+            As | Multiply | Divide | Modulus | Add | Subtract | ShiftLeft | ShiftRight | BitAnd
+            | BitXor | BitOr | Less | Greater | LessEqual | GreaterEqual | Equal | NotEqual
+            | LAnd | LOr | Colon => Fixity::Left,
+            DotDot | DotDotEq => Fixity::None,
+        }
+    }
+
+    pub fn is_comparison(&self) -> bool {
+        use AssocOp::*;
+        match *self {
+            Less | Greater | LessEqual | GreaterEqual | Equal | NotEqual => true,
+            Assign | AssignOp(_) | As | Multiply | Divide | Modulus | Add | Subtract
+            | ShiftLeft | ShiftRight | BitAnd | BitXor | BitOr | LAnd | LOr | DotDot | DotDotEq
+            | Colon => false,
+        }
+    }
+
+    pub fn is_assign_like(&self) -> bool {
+        use AssocOp::*;
+        match *self {
+            Assign | AssignOp(_) => true,
+            Less | Greater | LessEqual | GreaterEqual | Equal | NotEqual | As | Multiply
+            | Divide | Modulus | Add | Subtract | ShiftLeft | ShiftRight | BitAnd | BitXor
+            | BitOr | LAnd | LOr | DotDot | DotDotEq | Colon => false,
+        }
+    }
+
+    pub fn to_ast_binop(&self) -> Option<BinOpKind> {
+        use AssocOp::*;
+        match *self {
+            Less => Some(BinOpKind::Lt),
+            Greater => Some(BinOpKind::Gt),
+            LessEqual => Some(BinOpKind::Le),
+            GreaterEqual => Some(BinOpKind::Ge),
+            Equal => Some(BinOpKind::Eq),
+            NotEqual => Some(BinOpKind::Ne),
+            Multiply => Some(BinOpKind::Mul),
+            Divide => Some(BinOpKind::Div),
+            Modulus => Some(BinOpKind::Rem),
+            Add => Some(BinOpKind::Add),
+            Subtract => Some(BinOpKind::Sub),
+            ShiftLeft => Some(BinOpKind::Shl),
+            ShiftRight => Some(BinOpKind::Shr),
+            BitAnd => Some(BinOpKind::BitAnd),
+            BitXor => Some(BinOpKind::BitXor),
+            BitOr => Some(BinOpKind::BitOr),
+            LAnd => Some(BinOpKind::And),
+            LOr => Some(BinOpKind::Or),
+            Assign | AssignOp(_) | As | DotDot | DotDotEq | Colon => None,
+        }
+    }
+
+    /// This operator could be used to follow a block unambiguously.
+    ///
+    /// This is used for error recovery at the moment, providing a suggestion to wrap blocks with
+    /// parentheses while having a high degree of confidence on the correctness of the suggestion.
+    pub fn can_continue_expr_unambiguously(&self) -> bool {
+        use AssocOp::*;
+        matches!(
+            self,
+            BitXor | // `{ 42 } ^ 3`
+            Assign | // `{ 42 } = { 42 }`
+            Divide | // `{ 42 } / 42`
+            Modulus | // `{ 42 } % 2`
+            ShiftRight | // `{ 42 } >> 2`
+            LessEqual | // `{ 42 } <= 3`
+            Greater | // `{ 42 } > 3`
+            GreaterEqual | // `{ 42 } >= 3`
+            AssignOp(_) | // `{ 42 } +=`
+            As | // `{ 42 } as usize`
+            // Equal | // `{ 42 } == { 42 }`    Accepting these here would regress incorrect
+            // NotEqual | // `{ 42 } != { 42 }  struct literals parser recovery.
+            Colon, // `{ 42 }: usize`
+        )
+    }
+}
+
+pub const PREC_CLOSURE: i8 = -40;
+pub const PREC_JUMP: i8 = -30;
+pub const PREC_RANGE: i8 = -10;
+// The range 2..=14 is reserved for AssocOp binary operator precedences.
+pub const PREC_PREFIX: i8 = 50;
+pub const PREC_POSTFIX: i8 = 60;
+pub const PREC_PAREN: i8 = 99;
+pub const PREC_FORCE_PAREN: i8 = 100;
+
+#[derive(Debug, Clone, Copy)]
+pub enum ExprPrecedence {
+    Closure,
+    Break,
+    Continue,
+    Ret,
+    Yield,
+    Yeet,
+
+    Range,
+
+    Binary(BinOpKind),
+
+    Cast,
+    Type,
+
+    Assign,
+    AssignOp,
+
+    Box,
+    AddrOf,
+    Let,
+    Unary,
+
+    Call,
+    MethodCall,
+    Field,
+    Index,
+    Try,
+    InlineAsm,
+    Mac,
+
+    Array,
+    Repeat,
+    Tup,
+    Lit,
+    Path,
+    Paren,
+    If,
+    While,
+    ForLoop,
+    Loop,
+    Match,
+    ConstBlock,
+    Block,
+    TryBlock,
+    Struct,
+    Async,
+    Await,
+    Err,
+}
+
+impl ExprPrecedence {
+    pub fn order(self) -> i8 {
+        match self {
+            ExprPrecedence::Closure => PREC_CLOSURE,
+
+            ExprPrecedence::Break |
+            ExprPrecedence::Continue |
+            ExprPrecedence::Ret |
+            ExprPrecedence::Yield |
+            ExprPrecedence::Yeet => PREC_JUMP,
+
+            // `Range` claims to have higher precedence than `Assign`, but `x .. x = x` fails to
+            // parse, instead of parsing as `(x .. x) = x`.  Giving `Range` a lower precedence
+            // ensures that `pprust` will add parentheses in the right places to get the desired
+            // parse.
+            ExprPrecedence::Range => PREC_RANGE,
+
+            // Binop-like expr kinds, handled by `AssocOp`.
+            ExprPrecedence::Binary(op) => AssocOp::from_ast_binop(op).precedence() as i8,
+            ExprPrecedence::Cast => AssocOp::As.precedence() as i8,
+            ExprPrecedence::Type => AssocOp::Colon.precedence() as i8,
+
+            ExprPrecedence::Assign |
+            ExprPrecedence::AssignOp => AssocOp::Assign.precedence() as i8,
+
+            // Unary, prefix
+            ExprPrecedence::Box |
+            ExprPrecedence::AddrOf |
+            // Here `let pats = expr` has `let pats =` as a "unary" prefix of `expr`.
+            // However, this is not exactly right. When `let _ = a` is the LHS of a binop we
+            // need parens sometimes. E.g. we can print `(let _ = a) && b` as `let _ = a && b`
+            // but we need to print `(let _ = a) < b` as-is with parens.
+            ExprPrecedence::Let |
+            ExprPrecedence::Unary => PREC_PREFIX,
+
+            // Unary, postfix
+            ExprPrecedence::Await |
+            ExprPrecedence::Call |
+            ExprPrecedence::MethodCall |
+            ExprPrecedence::Field |
+            ExprPrecedence::Index |
+            ExprPrecedence::Try |
+            ExprPrecedence::InlineAsm |
+            ExprPrecedence::Mac => PREC_POSTFIX,
+
+            // Never need parens
+            ExprPrecedence::Array |
+            ExprPrecedence::Repeat |
+            ExprPrecedence::Tup |
+            ExprPrecedence::Lit |
+            ExprPrecedence::Path |
+            ExprPrecedence::Paren |
+            ExprPrecedence::If |
+            ExprPrecedence::While |
+            ExprPrecedence::ForLoop |
+            ExprPrecedence::Loop |
+            ExprPrecedence::Match |
+            ExprPrecedence::ConstBlock |
+            ExprPrecedence::Block |
+            ExprPrecedence::TryBlock |
+            ExprPrecedence::Async |
+            ExprPrecedence::Struct |
+            ExprPrecedence::Err => PREC_PAREN,
+        }
+    }
+}
+
+/// In `let p = e`, operators with precedence `<=` this one requires parentheses in `e`.
+pub fn prec_let_scrutinee_needs_par() -> usize {
+    AssocOp::LAnd.precedence()
+}
+
+/// Suppose we have `let _ = e` and the `order` of `e`.
+/// Is the `order` such that `e` in `let _ = e` needs parentheses when it is on the RHS?
+///
+/// Conversely, suppose that we have `(let _ = a) OP b` and `order` is that of `OP`.
+/// Can we print this as `let _ = a OP b`?
+pub fn needs_par_as_let_scrutinee(order: i8) -> bool {
+    order <= prec_let_scrutinee_needs_par() as i8
+}
+
+/// Expressions that syntactically contain an "exterior" struct literal i.e., not surrounded by any
+/// parens or other delimiters, e.g., `X { y: 1 }`, `X { y: 1 }.method()`, `foo == X { y: 1 }` and
+/// `X { y: 1 } == foo` all do, but `(X { y: 1 }) == foo` does not.
+pub fn contains_exterior_struct_lit(value: &ast::Expr) -> bool {
+    match value.kind {
+        ast::ExprKind::Struct(..) => true,
+
+        ast::ExprKind::Assign(ref lhs, ref rhs, _)
+        | ast::ExprKind::AssignOp(_, ref lhs, ref rhs)
+        | ast::ExprKind::Binary(_, ref lhs, ref rhs) => {
+            // X { y: 1 } + X { y: 2 }
+            contains_exterior_struct_lit(&lhs) || contains_exterior_struct_lit(&rhs)
+        }
+        ast::ExprKind::Await(ref x)
+        | ast::ExprKind::Unary(_, ref x)
+        | ast::ExprKind::Cast(ref x, _)
+        | ast::ExprKind::Type(ref x, _)
+        | ast::ExprKind::Field(ref x, _)
+        | ast::ExprKind::Index(ref x, _) => {
+            // &X { y: 1 }, X { y: 1 }.y
+            contains_exterior_struct_lit(&x)
+        }
+
+        ast::ExprKind::MethodCall(.., ref exprs, _) => {
+            // X { y: 1 }.bar(...)
+            contains_exterior_struct_lit(&exprs[0])
+        }
+
+        _ => false,
+    }
+}
diff --git a/compiler/rustc_ast/src/util/unicode.rs b/compiler/rustc_ast/src/util/unicode.rs
new file mode 100644
index 000000000..f009f7b30
--- /dev/null
+++ b/compiler/rustc_ast/src/util/unicode.rs
@@ -0,0 +1,35 @@
+pub const TEXT_FLOW_CONTROL_CHARS: &[char] = &[
+    '\u{202A}', '\u{202B}', '\u{202D}', '\u{202E}', '\u{2066}', '\u{2067}', '\u{2068}', '\u{202C}',
+    '\u{2069}',
+];
+
+#[inline]
+pub fn contains_text_flow_control_chars(s: &str) -> bool {
+    // Char   - UTF-8
+    // U+202A - E2 80 AA
+    // U+202B - E2 80 AB
+    // U+202C - E2 80 AC
+    // U+202D - E2 80 AD
+    // U+202E - E2 80 AE
+    // U+2066 - E2 81 A6
+    // U+2067 - E2 81 A7
+    // U+2068 - E2 81 A8
+    // U+2069 - E2 81 A9
+    let mut bytes = s.as_bytes();
+    loop {
+        match core::slice::memchr::memchr(0xE2, &bytes) {
+            Some(idx) => {
+                // bytes are valid UTF-8 -> E2 must be followed by two bytes
+                let ch = &bytes[idx..idx + 3];
+                match ch {
+                    [_, 0x80, 0xAA..=0xAE] | [_, 0x81, 0xA6..=0xA9] => break true,
+                    _ => {}
+                }
+                bytes = &bytes[idx + 3..];
+            }
+            None => {
+                break false;
+            }
+        }
+    }
+}
author	Daniel Baumann <daniel.baumann@progress-linux.org>	2024-04-17 12:02:58 +0000
committer	Daniel Baumann <daniel.baumann@progress-linux.org>	2024-04-17 12:02:58 +0000
commit	698f8c2f01ea549d77d7dc3338a12e04c11057b9 (patch)
tree	173a775858bd501c378080a10dca74132f05bc50 /compiler/rustc_ast/src/util
parent	Initial commit. (diff)
download	rustc-698f8c2f01ea549d77d7dc3338a12e04c11057b9.tar.xz rustc-698f8c2f01ea549d77d7dc3338a12e04c11057b9.zip