From 698f8c2f01ea549d77d7dc3338a12e04c11057b9 Mon Sep 17 00:00:00 2001 From: Daniel Baumann Date: Wed, 17 Apr 2024 14:02:58 +0200 Subject: Adding upstream version 1.64.0+dfsg1. Signed-off-by: Daniel Baumann --- compiler/rustc_expand/src/mbe/macro_check.rs | 652 ++++++++++++ compiler/rustc_expand/src/mbe/macro_parser.rs | 704 ++++++++++++ compiler/rustc_expand/src/mbe/macro_rules.rs | 1420 +++++++++++++++++++++++++ compiler/rustc_expand/src/mbe/metavar_expr.rs | 161 +++ compiler/rustc_expand/src/mbe/quoted.rs | 366 +++++++ compiler/rustc_expand/src/mbe/transcribe.rs | 580 ++++++++++ 6 files changed, 3883 insertions(+) create mode 100644 compiler/rustc_expand/src/mbe/macro_check.rs create mode 100644 compiler/rustc_expand/src/mbe/macro_parser.rs create mode 100644 compiler/rustc_expand/src/mbe/macro_rules.rs create mode 100644 compiler/rustc_expand/src/mbe/metavar_expr.rs create mode 100644 compiler/rustc_expand/src/mbe/quoted.rs create mode 100644 compiler/rustc_expand/src/mbe/transcribe.rs (limited to 'compiler/rustc_expand/src/mbe') diff --git a/compiler/rustc_expand/src/mbe/macro_check.rs b/compiler/rustc_expand/src/mbe/macro_check.rs new file mode 100644 index 000000000..8994a2f78 --- /dev/null +++ b/compiler/rustc_expand/src/mbe/macro_check.rs @@ -0,0 +1,652 @@ +//! Checks that meta-variables in macro definition are correctly declared and used. +//! +//! # What is checked +//! +//! ## Meta-variables must not be bound twice +//! +//! ```compile_fail +//! macro_rules! foo { ($x:tt $x:tt) => { $x }; } +//! ``` +//! +//! This check is sound (no false-negative) and complete (no false-positive). +//! +//! ## Meta-variables must not be free +//! +//! ``` +//! macro_rules! foo { () => { $x }; } +//! ``` +//! +//! This check is also done at macro instantiation but only if the branch is taken. +//! +//! ## Meta-variables must repeat at least as many times as their binder +//! +//! ``` +//! macro_rules! foo { ($($x:tt)*) => { $x }; } +//! ``` +//! +//! This check is also done at macro instantiation but only if the branch is taken. +//! +//! ## Meta-variables must repeat with the same Kleene operators as their binder +//! +//! ``` +//! macro_rules! foo { ($($x:tt)+) => { $($x)* }; } +//! ``` +//! +//! This check is not done at macro instantiation. +//! +//! # Disclaimer +//! +//! In the presence of nested macros (a macro defined in a macro), those checks may have false +//! positives and false negatives. We try to detect those cases by recognizing potential macro +//! definitions in RHSes, but nested macros may be hidden through the use of particular values of +//! meta-variables. +//! +//! ## Examples of false positive +//! +//! False positives can come from cases where we don't recognize a nested macro, because it depends +//! on particular values of meta-variables. In the following example, we think both instances of +//! `$x` are free, which is a correct statement if `$name` is anything but `macro_rules`. But when +//! `$name` is `macro_rules`, like in the instantiation below, then `$x:tt` is actually a binder of +//! the nested macro and `$x` is bound to it. +//! +//! ``` +//! macro_rules! foo { ($name:ident) => { $name! bar { ($x:tt) => { $x }; } }; } +//! foo!(macro_rules); +//! ``` +//! +//! False positives can also come from cases where we think there is a nested macro while there +//! isn't. In the following example, we think `$x` is free, which is incorrect because `bar` is not +//! a nested macro since it is not evaluated as code by `stringify!`. +//! +//! ``` +//! macro_rules! foo { () => { stringify!(macro_rules! bar { () => { $x }; }) }; } +//! ``` +//! +//! ## Examples of false negative +//! +//! False negatives can come from cases where we don't recognize a meta-variable, because it depends +//! on particular values of meta-variables. In the following examples, we don't see that if `$d` is +//! instantiated with `$` then `$d z` becomes `$z` in the nested macro definition and is thus a free +//! meta-variable. Note however, that if `foo` is instantiated, then we would check the definition +//! of `bar` and would see the issue. +//! +//! ``` +//! macro_rules! foo { ($d:tt) => { macro_rules! bar { ($y:tt) => { $d z }; } }; } +//! ``` +//! +//! # How it is checked +//! +//! There are 3 main functions: `check_binders`, `check_occurrences`, and `check_nested_macro`. They +//! all need some kind of environment. +//! +//! ## Environments +//! +//! Environments are used to pass information. +//! +//! ### From LHS to RHS +//! +//! When checking a LHS with `check_binders`, we produce (and use) an environment for binders, +//! namely `Binders`. This is a mapping from binder name to information about that binder: the span +//! of the binder for error messages and the stack of Kleene operators under which it was bound in +//! the LHS. +//! +//! This environment is used by both the LHS and RHS. The LHS uses it to detect duplicate binders. +//! The RHS uses it to detect the other errors. +//! +//! ### From outer macro to inner macro +//! +//! When checking the RHS of an outer macro and we detect a nested macro definition, we push the +//! current state, namely `MacroState`, to an environment of nested macro definitions. Each state +//! stores the LHS binders when entering the macro definition as well as the stack of Kleene +//! operators under which the inner macro is defined in the RHS. +//! +//! This environment is a stack representing the nesting of macro definitions. As such, the stack of +//! Kleene operators under which a meta-variable is repeating is the concatenation of the stacks +//! stored when entering a macro definition starting from the state in which the meta-variable is +//! bound. +use crate::mbe::{KleeneToken, TokenTree}; + +use rustc_ast::token::{Delimiter, Token, TokenKind}; +use rustc_ast::{NodeId, DUMMY_NODE_ID}; +use rustc_data_structures::fx::FxHashMap; +use rustc_errors::MultiSpan; +use rustc_session::lint::builtin::{META_VARIABLE_MISUSE, MISSING_FRAGMENT_SPECIFIER}; +use rustc_session::parse::ParseSess; +use rustc_span::symbol::kw; +use rustc_span::{symbol::MacroRulesNormalizedIdent, Span}; + +use smallvec::SmallVec; + +use std::iter; + +/// Stack represented as linked list. +/// +/// Those are used for environments because they grow incrementally and are not mutable. +enum Stack<'a, T> { + /// Empty stack. + Empty, + /// A non-empty stack. + Push { + /// The top element. + top: T, + /// The previous elements. + prev: &'a Stack<'a, T>, + }, +} + +impl<'a, T> Stack<'a, T> { + /// Returns whether a stack is empty. + fn is_empty(&self) -> bool { + matches!(*self, Stack::Empty) + } + + /// Returns a new stack with an element of top. + fn push(&'a self, top: T) -> Stack<'a, T> { + Stack::Push { top, prev: self } + } +} + +impl<'a, T> Iterator for &'a Stack<'a, T> { + type Item = &'a T; + + // Iterates from top to bottom of the stack. + fn next(&mut self) -> Option<&'a T> { + match *self { + Stack::Empty => None, + Stack::Push { ref top, ref prev } => { + *self = prev; + Some(top) + } + } + } +} + +impl From<&Stack<'_, KleeneToken>> for SmallVec<[KleeneToken; 1]> { + fn from(ops: &Stack<'_, KleeneToken>) -> SmallVec<[KleeneToken; 1]> { + let mut ops: SmallVec<[KleeneToken; 1]> = ops.cloned().collect(); + // The stack is innermost on top. We want outermost first. + ops.reverse(); + ops + } +} + +/// Information attached to a meta-variable binder in LHS. +struct BinderInfo { + /// The span of the meta-variable in LHS. + span: Span, + /// The stack of Kleene operators (outermost first). + ops: SmallVec<[KleeneToken; 1]>, +} + +/// An environment of meta-variables to their binder information. +type Binders = FxHashMap; + +/// The state at which we entered a macro definition in the RHS of another macro definition. +struct MacroState<'a> { + /// The binders of the branch where we entered the macro definition. + binders: &'a Binders, + /// The stack of Kleene operators (outermost first) where we entered the macro definition. + ops: SmallVec<[KleeneToken; 1]>, +} + +/// Checks that meta-variables are used correctly in a macro definition. +/// +/// Arguments: +/// - `sess` is used to emit diagnostics and lints +/// - `node_id` is used to emit lints +/// - `span` is used when no spans are available +/// - `lhses` and `rhses` should have the same length and represent the macro definition +pub(super) fn check_meta_variables( + sess: &ParseSess, + node_id: NodeId, + span: Span, + lhses: &[TokenTree], + rhses: &[TokenTree], +) -> bool { + if lhses.len() != rhses.len() { + sess.span_diagnostic.span_bug(span, "length mismatch between LHSes and RHSes") + } + let mut valid = true; + for (lhs, rhs) in iter::zip(lhses, rhses) { + let mut binders = Binders::default(); + check_binders(sess, node_id, lhs, &Stack::Empty, &mut binders, &Stack::Empty, &mut valid); + check_occurrences(sess, node_id, rhs, &Stack::Empty, &binders, &Stack::Empty, &mut valid); + } + valid +} + +/// Checks `lhs` as part of the LHS of a macro definition, extends `binders` with new binders, and +/// sets `valid` to false in case of errors. +/// +/// Arguments: +/// - `sess` is used to emit diagnostics and lints +/// - `node_id` is used to emit lints +/// - `lhs` is checked as part of a LHS +/// - `macros` is the stack of possible outer macros +/// - `binders` contains the binders of the LHS +/// - `ops` is the stack of Kleene operators from the LHS +/// - `valid` is set in case of errors +fn check_binders( + sess: &ParseSess, + node_id: NodeId, + lhs: &TokenTree, + macros: &Stack<'_, MacroState<'_>>, + binders: &mut Binders, + ops: &Stack<'_, KleeneToken>, + valid: &mut bool, +) { + match *lhs { + TokenTree::Token(..) => {} + // This can only happen when checking a nested macro because this LHS is then in the RHS of + // the outer macro. See ui/macros/macro-of-higher-order.rs where $y:$fragment in the + // LHS of the nested macro (and RHS of the outer macro) is parsed as MetaVar(y) Colon + // MetaVar(fragment) and not as MetaVarDecl(y, fragment). + TokenTree::MetaVar(span, name) => { + if macros.is_empty() { + sess.span_diagnostic.span_bug(span, "unexpected MetaVar in lhs"); + } + let name = MacroRulesNormalizedIdent::new(name); + // There are 3 possibilities: + if let Some(prev_info) = binders.get(&name) { + // 1. The meta-variable is already bound in the current LHS: This is an error. + let mut span = MultiSpan::from_span(span); + span.push_span_label(prev_info.span, "previous declaration"); + buffer_lint(sess, span, node_id, "duplicate matcher binding"); + } else if get_binder_info(macros, binders, name).is_none() { + // 2. The meta-variable is free: This is a binder. + binders.insert(name, BinderInfo { span, ops: ops.into() }); + } else { + // 3. The meta-variable is bound: This is an occurrence. + check_occurrences(sess, node_id, lhs, macros, binders, ops, valid); + } + } + // Similarly, this can only happen when checking a toplevel macro. + TokenTree::MetaVarDecl(span, name, kind) => { + if kind.is_none() && node_id != DUMMY_NODE_ID { + // FIXME: Report this as a hard error eventually and remove equivalent errors from + // `parse_tt_inner` and `nameize`. Until then the error may be reported twice, once + // as a hard error and then once as a buffered lint. + sess.buffer_lint( + MISSING_FRAGMENT_SPECIFIER, + span, + node_id, + "missing fragment specifier", + ); + } + if !macros.is_empty() { + sess.span_diagnostic.span_bug(span, "unexpected MetaVarDecl in nested lhs"); + } + let name = MacroRulesNormalizedIdent::new(name); + if let Some(prev_info) = get_binder_info(macros, binders, name) { + // Duplicate binders at the top-level macro definition are errors. The lint is only + // for nested macro definitions. + sess.span_diagnostic + .struct_span_err(span, "duplicate matcher binding") + .span_label(span, "duplicate binding") + .span_label(prev_info.span, "previous binding") + .emit(); + *valid = false; + } else { + binders.insert(name, BinderInfo { span, ops: ops.into() }); + } + } + // `MetaVarExpr` can not appear in the LHS of a macro arm + TokenTree::MetaVarExpr(..) => {} + TokenTree::Delimited(_, ref del) => { + for tt in &del.tts { + check_binders(sess, node_id, tt, macros, binders, ops, valid); + } + } + TokenTree::Sequence(_, ref seq) => { + let ops = ops.push(seq.kleene); + for tt in &seq.tts { + check_binders(sess, node_id, tt, macros, binders, &ops, valid); + } + } + } +} + +/// Returns the binder information of a meta-variable. +/// +/// Arguments: +/// - `macros` is the stack of possible outer macros +/// - `binders` contains the current binders +/// - `name` is the name of the meta-variable we are looking for +fn get_binder_info<'a>( + mut macros: &'a Stack<'a, MacroState<'a>>, + binders: &'a Binders, + name: MacroRulesNormalizedIdent, +) -> Option<&'a BinderInfo> { + binders.get(&name).or_else(|| macros.find_map(|state| state.binders.get(&name))) +} + +/// Checks `rhs` as part of the RHS of a macro definition and sets `valid` to false in case of +/// errors. +/// +/// Arguments: +/// - `sess` is used to emit diagnostics and lints +/// - `node_id` is used to emit lints +/// - `rhs` is checked as part of a RHS +/// - `macros` is the stack of possible outer macros +/// - `binders` contains the binders of the associated LHS +/// - `ops` is the stack of Kleene operators from the RHS +/// - `valid` is set in case of errors +fn check_occurrences( + sess: &ParseSess, + node_id: NodeId, + rhs: &TokenTree, + macros: &Stack<'_, MacroState<'_>>, + binders: &Binders, + ops: &Stack<'_, KleeneToken>, + valid: &mut bool, +) { + match *rhs { + TokenTree::Token(..) => {} + TokenTree::MetaVarDecl(span, _name, _kind) => { + sess.span_diagnostic.span_bug(span, "unexpected MetaVarDecl in rhs") + } + TokenTree::MetaVar(span, name) => { + let name = MacroRulesNormalizedIdent::new(name); + check_ops_is_prefix(sess, node_id, macros, binders, ops, span, name); + } + TokenTree::MetaVarExpr(dl, ref mve) => { + let Some(name) = mve.ident().map(MacroRulesNormalizedIdent::new) else { + return; + }; + check_ops_is_prefix(sess, node_id, macros, binders, ops, dl.entire(), name); + } + TokenTree::Delimited(_, ref del) => { + check_nested_occurrences(sess, node_id, &del.tts, macros, binders, ops, valid); + } + TokenTree::Sequence(_, ref seq) => { + let ops = ops.push(seq.kleene); + check_nested_occurrences(sess, node_id, &seq.tts, macros, binders, &ops, valid); + } + } +} + +/// Represents the processed prefix of a nested macro. +#[derive(Clone, Copy, PartialEq, Eq)] +enum NestedMacroState { + /// Nothing that matches a nested macro definition was processed yet. + Empty, + /// The token `macro_rules` was processed. + MacroRules, + /// The tokens `macro_rules!` were processed. + MacroRulesNot, + /// The tokens `macro_rules!` followed by a name were processed. The name may be either directly + /// an identifier or a meta-variable (that hopefully would be instantiated by an identifier). + MacroRulesNotName, + /// The keyword `macro` was processed. + Macro, + /// The keyword `macro` followed by a name was processed. + MacroName, + /// The keyword `macro` followed by a name and a token delimited by parentheses was processed. + MacroNameParen, +} + +/// Checks `tts` as part of the RHS of a macro definition, tries to recognize nested macro +/// definitions, and sets `valid` to false in case of errors. +/// +/// Arguments: +/// - `sess` is used to emit diagnostics and lints +/// - `node_id` is used to emit lints +/// - `tts` is checked as part of a RHS and may contain macro definitions +/// - `macros` is the stack of possible outer macros +/// - `binders` contains the binders of the associated LHS +/// - `ops` is the stack of Kleene operators from the RHS +/// - `valid` is set in case of errors +fn check_nested_occurrences( + sess: &ParseSess, + node_id: NodeId, + tts: &[TokenTree], + macros: &Stack<'_, MacroState<'_>>, + binders: &Binders, + ops: &Stack<'_, KleeneToken>, + valid: &mut bool, +) { + let mut state = NestedMacroState::Empty; + let nested_macros = macros.push(MacroState { binders, ops: ops.into() }); + let mut nested_binders = Binders::default(); + for tt in tts { + match (state, tt) { + ( + NestedMacroState::Empty, + &TokenTree::Token(Token { kind: TokenKind::Ident(name, false), .. }), + ) => { + if name == kw::MacroRules { + state = NestedMacroState::MacroRules; + } else if name == kw::Macro { + state = NestedMacroState::Macro; + } + } + ( + NestedMacroState::MacroRules, + &TokenTree::Token(Token { kind: TokenKind::Not, .. }), + ) => { + state = NestedMacroState::MacroRulesNot; + } + ( + NestedMacroState::MacroRulesNot, + &TokenTree::Token(Token { kind: TokenKind::Ident(..), .. }), + ) => { + state = NestedMacroState::MacroRulesNotName; + } + (NestedMacroState::MacroRulesNot, &TokenTree::MetaVar(..)) => { + state = NestedMacroState::MacroRulesNotName; + // We check that the meta-variable is correctly used. + check_occurrences(sess, node_id, tt, macros, binders, ops, valid); + } + (NestedMacroState::MacroRulesNotName, &TokenTree::Delimited(_, ref del)) + | (NestedMacroState::MacroName, &TokenTree::Delimited(_, ref del)) + if del.delim == Delimiter::Brace => + { + let macro_rules = state == NestedMacroState::MacroRulesNotName; + state = NestedMacroState::Empty; + let rest = + check_nested_macro(sess, node_id, macro_rules, &del.tts, &nested_macros, valid); + // If we did not check the whole macro definition, then check the rest as if outside + // the macro definition. + check_nested_occurrences( + sess, + node_id, + &del.tts[rest..], + macros, + binders, + ops, + valid, + ); + } + ( + NestedMacroState::Macro, + &TokenTree::Token(Token { kind: TokenKind::Ident(..), .. }), + ) => { + state = NestedMacroState::MacroName; + } + (NestedMacroState::Macro, &TokenTree::MetaVar(..)) => { + state = NestedMacroState::MacroName; + // We check that the meta-variable is correctly used. + check_occurrences(sess, node_id, tt, macros, binders, ops, valid); + } + (NestedMacroState::MacroName, &TokenTree::Delimited(_, ref del)) + if del.delim == Delimiter::Parenthesis => + { + state = NestedMacroState::MacroNameParen; + nested_binders = Binders::default(); + check_binders( + sess, + node_id, + tt, + &nested_macros, + &mut nested_binders, + &Stack::Empty, + valid, + ); + } + (NestedMacroState::MacroNameParen, &TokenTree::Delimited(_, ref del)) + if del.delim == Delimiter::Brace => + { + state = NestedMacroState::Empty; + check_occurrences( + sess, + node_id, + tt, + &nested_macros, + &nested_binders, + &Stack::Empty, + valid, + ); + } + (_, ref tt) => { + state = NestedMacroState::Empty; + check_occurrences(sess, node_id, tt, macros, binders, ops, valid); + } + } + } +} + +/// Checks the body of nested macro, returns where the check stopped, and sets `valid` to false in +/// case of errors. +/// +/// The token trees are checked as long as they look like a list of (LHS) => {RHS} token trees. This +/// check is a best-effort to detect a macro definition. It returns the position in `tts` where we +/// stopped checking because we detected we were not in a macro definition anymore. +/// +/// Arguments: +/// - `sess` is used to emit diagnostics and lints +/// - `node_id` is used to emit lints +/// - `macro_rules` specifies whether the macro is `macro_rules` +/// - `tts` is checked as a list of (LHS) => {RHS} +/// - `macros` is the stack of outer macros +/// - `valid` is set in case of errors +fn check_nested_macro( + sess: &ParseSess, + node_id: NodeId, + macro_rules: bool, + tts: &[TokenTree], + macros: &Stack<'_, MacroState<'_>>, + valid: &mut bool, +) -> usize { + let n = tts.len(); + let mut i = 0; + let separator = if macro_rules { TokenKind::Semi } else { TokenKind::Comma }; + loop { + // We expect 3 token trees: `(LHS) => {RHS}`. The separator is checked after. + if i + 2 >= n + || !tts[i].is_delimited() + || !tts[i + 1].is_token(&TokenKind::FatArrow) + || !tts[i + 2].is_delimited() + { + break; + } + let lhs = &tts[i]; + let rhs = &tts[i + 2]; + let mut binders = Binders::default(); + check_binders(sess, node_id, lhs, macros, &mut binders, &Stack::Empty, valid); + check_occurrences(sess, node_id, rhs, macros, &binders, &Stack::Empty, valid); + // Since the last semicolon is optional for `macro_rules` macros and decl_macro are not terminated, + // we increment our checked position by how many token trees we already checked (the 3 + // above) before checking for the separator. + i += 3; + if i == n || !tts[i].is_token(&separator) { + break; + } + // We increment our checked position for the semicolon. + i += 1; + } + i +} + +/// Checks that a meta-variable occurrence is valid. +/// +/// Arguments: +/// - `sess` is used to emit diagnostics and lints +/// - `node_id` is used to emit lints +/// - `macros` is the stack of possible outer macros +/// - `binders` contains the binders of the associated LHS +/// - `ops` is the stack of Kleene operators from the RHS +/// - `span` is the span of the meta-variable to check +/// - `name` is the name of the meta-variable to check +fn check_ops_is_prefix( + sess: &ParseSess, + node_id: NodeId, + macros: &Stack<'_, MacroState<'_>>, + binders: &Binders, + ops: &Stack<'_, KleeneToken>, + span: Span, + name: MacroRulesNormalizedIdent, +) { + let macros = macros.push(MacroState { binders, ops: ops.into() }); + // Accumulates the stacks the operators of each state until (and including when) the + // meta-variable is found. The innermost stack is first. + let mut acc: SmallVec<[&SmallVec<[KleeneToken; 1]>; 1]> = SmallVec::new(); + for state in ¯os { + acc.push(&state.ops); + if let Some(binder) = state.binders.get(&name) { + // This variable concatenates the stack of operators from the RHS of the LHS where the + // meta-variable was defined to where it is used (in possibly nested macros). The + // outermost operator is first. + let mut occurrence_ops: SmallVec<[KleeneToken; 2]> = SmallVec::new(); + // We need to iterate from the end to start with outermost stack. + for ops in acc.iter().rev() { + occurrence_ops.extend_from_slice(ops); + } + ops_is_prefix(sess, node_id, span, name, &binder.ops, &occurrence_ops); + return; + } + } + buffer_lint(sess, span.into(), node_id, &format!("unknown macro variable `{}`", name)); +} + +/// Returns whether `binder_ops` is a prefix of `occurrence_ops`. +/// +/// The stack of Kleene operators of a meta-variable occurrence just needs to have the stack of +/// Kleene operators of its binder as a prefix. +/// +/// Consider $i in the following example: +/// ```ignore (illustrative) +/// ( $( $i:ident = $($j:ident),+ );* ) => { $($( $i += $j; )+)* } +/// ``` +/// It occurs under the Kleene stack ["*", "+"] and is bound under ["*"] only. +/// +/// Arguments: +/// - `sess` is used to emit diagnostics and lints +/// - `node_id` is used to emit lints +/// - `span` is the span of the meta-variable being check +/// - `name` is the name of the meta-variable being check +/// - `binder_ops` is the stack of Kleene operators for the binder +/// - `occurrence_ops` is the stack of Kleene operators for the occurrence +fn ops_is_prefix( + sess: &ParseSess, + node_id: NodeId, + span: Span, + name: MacroRulesNormalizedIdent, + binder_ops: &[KleeneToken], + occurrence_ops: &[KleeneToken], +) { + for (i, binder) in binder_ops.iter().enumerate() { + if i >= occurrence_ops.len() { + let mut span = MultiSpan::from_span(span); + span.push_span_label(binder.span, "expected repetition"); + let message = &format!("variable '{}' is still repeating at this depth", name); + buffer_lint(sess, span, node_id, message); + return; + } + let occurrence = &occurrence_ops[i]; + if occurrence.op != binder.op { + let mut span = MultiSpan::from_span(span); + span.push_span_label(binder.span, "expected repetition"); + span.push_span_label(occurrence.span, "conflicting repetition"); + let message = "meta-variable repeats with different Kleene operator"; + buffer_lint(sess, span, node_id, message); + return; + } + } +} + +fn buffer_lint(sess: &ParseSess, span: MultiSpan, node_id: NodeId, message: &str) { + // Macros loaded from other crates have dummy node ids. + if node_id != DUMMY_NODE_ID { + sess.buffer_lint(&META_VARIABLE_MISUSE, span, node_id, message); + } +} diff --git a/compiler/rustc_expand/src/mbe/macro_parser.rs b/compiler/rustc_expand/src/mbe/macro_parser.rs new file mode 100644 index 000000000..4fa91dfea --- /dev/null +++ b/compiler/rustc_expand/src/mbe/macro_parser.rs @@ -0,0 +1,704 @@ +//! This is an NFA-based parser, which calls out to the main Rust parser for named non-terminals +//! (which it commits to fully when it hits one in a grammar). There's a set of current NFA threads +//! and a set of next ones. Instead of NTs, we have a special case for Kleene star. The big-O, in +//! pathological cases, is worse than traditional use of NFA or Earley parsing, but it's an easier +//! fit for Macro-by-Example-style rules. +//! +//! (In order to prevent the pathological case, we'd need to lazily construct the resulting +//! `NamedMatch`es at the very end. It'd be a pain, and require more memory to keep around old +//! matcher positions, but it would also save overhead) +//! +//! We don't say this parser uses the Earley algorithm, because it's unnecessarily inaccurate. +//! The macro parser restricts itself to the features of finite state automata. Earley parsers +//! can be described as an extension of NFAs with completion rules, prediction rules, and recursion. +//! +//! Quick intro to how the parser works: +//! +//! A "matcher position" (a.k.a. "position" or "mp") is a dot in the middle of a matcher, usually +//! written as a `·`. For example `· a $( a )* a b` is one, as is `a $( · a )* a b`. +//! +//! The parser walks through the input a token at a time, maintaining a list +//! of threads consistent with the current position in the input string: `cur_mps`. +//! +//! As it processes them, it fills up `eof_mps` with threads that would be valid if +//! the macro invocation is now over, `bb_mps` with threads that are waiting on +//! a Rust non-terminal like `$e:expr`, and `next_mps` with threads that are waiting +//! on a particular token. Most of the logic concerns moving the · through the +//! repetitions indicated by Kleene stars. The rules for moving the · without +//! consuming any input are called epsilon transitions. It only advances or calls +//! out to the real Rust parser when no `cur_mps` threads remain. +//! +//! Example: +//! +//! ```text, ignore +//! Start parsing a a a a b against [· a $( a )* a b]. +//! +//! Remaining input: a a a a b +//! next: [· a $( a )* a b] +//! +//! - - - Advance over an a. - - - +//! +//! Remaining input: a a a b +//! cur: [a · $( a )* a b] +//! Descend/Skip (first position). +//! next: [a $( · a )* a b] [a $( a )* · a b]. +//! +//! - - - Advance over an a. - - - +//! +//! Remaining input: a a b +//! cur: [a $( a · )* a b] [a $( a )* a · b] +//! Follow epsilon transition: Finish/Repeat (first position) +//! next: [a $( a )* · a b] [a $( · a )* a b] [a $( a )* a · b] +//! +//! - - - Advance over an a. - - - (this looks exactly like the last step) +//! +//! Remaining input: a b +//! cur: [a $( a · )* a b] [a $( a )* a · b] +//! Follow epsilon transition: Finish/Repeat (first position) +//! next: [a $( a )* · a b] [a $( · a )* a b] [a $( a )* a · b] +//! +//! - - - Advance over an a. - - - (this looks exactly like the last step) +//! +//! Remaining input: b +//! cur: [a $( a · )* a b] [a $( a )* a · b] +//! Follow epsilon transition: Finish/Repeat (first position) +//! next: [a $( a )* · a b] [a $( · a )* a b] [a $( a )* a · b] +//! +//! - - - Advance over a b. - - - +//! +//! Remaining input: '' +//! eof: [a $( a )* a b ·] +//! ``` + +pub(crate) use NamedMatch::*; +pub(crate) use ParseResult::*; + +use crate::mbe::{KleeneOp, TokenTree}; + +use rustc_ast::token::{self, DocComment, Nonterminal, NonterminalKind, Token}; +use rustc_lint_defs::pluralize; +use rustc_parse::parser::{NtOrTt, Parser}; +use rustc_span::symbol::MacroRulesNormalizedIdent; +use rustc_span::Span; + +use rustc_data_structures::fx::FxHashMap; +use rustc_data_structures::sync::Lrc; +use rustc_span::symbol::Ident; +use std::borrow::Cow; +use std::collections::hash_map::Entry::{Occupied, Vacant}; + +/// A unit within a matcher that a `MatcherPos` can refer to. Similar to (and derived from) +/// `mbe::TokenTree`, but designed specifically for fast and easy traversal during matching. +/// Notable differences to `mbe::TokenTree`: +/// - It is non-recursive, i.e. there is no nesting. +/// - The end pieces of each sequence (the separator, if present, and the Kleene op) are +/// represented explicitly, as is the very end of the matcher. +/// +/// This means a matcher can be represented by `&[MatcherLoc]`, and traversal mostly involves +/// simply incrementing the current matcher position index by one. +pub(super) enum MatcherLoc { + Token { + token: Token, + }, + Delimited, + Sequence { + op: KleeneOp, + num_metavar_decls: usize, + idx_first_after: usize, + next_metavar: usize, + seq_depth: usize, + }, + SequenceKleeneOpNoSep { + op: KleeneOp, + idx_first: usize, + }, + SequenceSep { + separator: Token, + }, + SequenceKleeneOpAfterSep { + idx_first: usize, + }, + MetaVarDecl { + span: Span, + bind: Ident, + kind: Option, + next_metavar: usize, + seq_depth: usize, + }, + Eof, +} + +pub(super) fn compute_locs(matcher: &[TokenTree]) -> Vec { + fn inner( + tts: &[TokenTree], + locs: &mut Vec, + next_metavar: &mut usize, + seq_depth: usize, + ) { + for tt in tts { + match tt { + TokenTree::Token(token) => { + locs.push(MatcherLoc::Token { token: token.clone() }); + } + TokenTree::Delimited(span, delimited) => { + let open_token = Token::new(token::OpenDelim(delimited.delim), span.open); + let close_token = Token::new(token::CloseDelim(delimited.delim), span.close); + + locs.push(MatcherLoc::Delimited); + locs.push(MatcherLoc::Token { token: open_token }); + inner(&delimited.tts, locs, next_metavar, seq_depth); + locs.push(MatcherLoc::Token { token: close_token }); + } + TokenTree::Sequence(_, seq) => { + // We can't determine `idx_first_after` and construct the final + // `MatcherLoc::Sequence` until after `inner()` is called and the sequence end + // pieces are processed. So we push a dummy value (`Eof` is cheapest to + // construct) now, and overwrite it with the proper value below. + let dummy = MatcherLoc::Eof; + locs.push(dummy); + + let next_metavar_orig = *next_metavar; + let op = seq.kleene.op; + let idx_first = locs.len(); + let idx_seq = idx_first - 1; + inner(&seq.tts, locs, next_metavar, seq_depth + 1); + + if let Some(separator) = &seq.separator { + locs.push(MatcherLoc::SequenceSep { separator: separator.clone() }); + locs.push(MatcherLoc::SequenceKleeneOpAfterSep { idx_first }); + } else { + locs.push(MatcherLoc::SequenceKleeneOpNoSep { op, idx_first }); + } + + // Overwrite the dummy value pushed above with the proper value. + locs[idx_seq] = MatcherLoc::Sequence { + op, + num_metavar_decls: seq.num_captures, + idx_first_after: locs.len(), + next_metavar: next_metavar_orig, + seq_depth, + }; + } + &TokenTree::MetaVarDecl(span, bind, kind) => { + locs.push(MatcherLoc::MetaVarDecl { + span, + bind, + kind, + next_metavar: *next_metavar, + seq_depth, + }); + *next_metavar += 1; + } + TokenTree::MetaVar(..) | TokenTree::MetaVarExpr(..) => unreachable!(), + } + } + } + + let mut locs = vec![]; + let mut next_metavar = 0; + inner(matcher, &mut locs, &mut next_metavar, /* seq_depth */ 0); + + // A final entry is needed for eof. + locs.push(MatcherLoc::Eof); + + locs +} + +/// A single matcher position, representing the state of matching. +struct MatcherPos { + /// The index into `TtParser::locs`, which represents the "dot". + idx: usize, + + /// The matches made against metavar decls so far. On a successful match, this vector ends up + /// with one element per metavar decl in the matcher. Each element records token trees matched + /// against the relevant metavar by the black box parser. An element will be a `MatchedSeq` if + /// the corresponding metavar decl is within a sequence. + /// + /// It is critical to performance that this is an `Lrc`, because it gets cloned frequently when + /// processing sequences. Mostly for sequence-ending possibilities that must be tried but end + /// up failing. + matches: Lrc>, +} + +// This type is used a lot. Make sure it doesn't unintentionally get bigger. +#[cfg(all(target_arch = "x86_64", target_pointer_width = "64"))] +rustc_data_structures::static_assert_size!(MatcherPos, 16); + +impl MatcherPos { + /// Adds `m` as a named match for the `metavar_idx`-th metavar. There are only two call sites, + /// and both are hot enough to be always worth inlining. + #[inline(always)] + fn push_match(&mut self, metavar_idx: usize, seq_depth: usize, m: NamedMatch) { + let matches = Lrc::make_mut(&mut self.matches); + match seq_depth { + 0 => { + // We are not within a sequence. Just append `m`. + assert_eq!(metavar_idx, matches.len()); + matches.push(m); + } + _ => { + // We are within a sequence. Find the final `MatchedSeq` at the appropriate depth + // and append `m` to its vector. + let mut curr = &mut matches[metavar_idx]; + for _ in 0..seq_depth - 1 { + match curr { + MatchedSeq(seq) => curr = seq.last_mut().unwrap(), + _ => unreachable!(), + } + } + match curr { + MatchedSeq(seq) => seq.push(m), + _ => unreachable!(), + } + } + } + } +} + +enum EofMatcherPositions { + None, + One(MatcherPos), + Multiple, +} + +/// Represents the possible results of an attempted parse. +pub(crate) enum ParseResult { + /// Parsed successfully. + Success(T), + /// Arm failed to match. If the second parameter is `token::Eof`, it indicates an unexpected + /// end of macro invocation. Otherwise, it indicates that no rules expected the given token. + Failure(Token, &'static str), + /// Fatal error (malformed macro?). Abort compilation. + Error(rustc_span::Span, String), + ErrorReported, +} + +/// A `ParseResult` where the `Success` variant contains a mapping of +/// `MacroRulesNormalizedIdent`s to `NamedMatch`es. This represents the mapping +/// of metavars to the token trees they bind to. +pub(crate) type NamedParseResult = ParseResult>; + +/// Count how many metavars declarations are in `matcher`. +pub(super) fn count_metavar_decls(matcher: &[TokenTree]) -> usize { + matcher + .iter() + .map(|tt| match tt { + TokenTree::MetaVarDecl(..) => 1, + TokenTree::Sequence(_, seq) => seq.num_captures, + TokenTree::Delimited(_, delim) => count_metavar_decls(&delim.tts), + TokenTree::Token(..) => 0, + TokenTree::MetaVar(..) | TokenTree::MetaVarExpr(..) => unreachable!(), + }) + .sum() +} + +/// `NamedMatch` is a pattern-match result for a single metavar. All +/// `MatchedNonterminal`s in the `NamedMatch` have the same non-terminal type +/// (expr, item, etc). +/// +/// The in-memory structure of a particular `NamedMatch` represents the match +/// that occurred when a particular subset of a matcher was applied to a +/// particular token tree. +/// +/// The width of each `MatchedSeq` in the `NamedMatch`, and the identity of +/// the `MatchedNtNonTts`s, will depend on the token tree it was applied +/// to: each `MatchedSeq` corresponds to a single repetition in the originating +/// token tree. The depth of the `NamedMatch` structure will therefore depend +/// only on the nesting depth of repetitions in the originating token tree it +/// was derived from. +/// +/// In layperson's terms: `NamedMatch` will form a tree representing nested matches of a particular +/// meta variable. For example, if we are matching the following macro against the following +/// invocation... +/// +/// ```rust +/// macro_rules! foo { +/// ($($($x:ident),+);+) => {} +/// } +/// +/// foo!(a, b, c, d; a, b, c, d, e); +/// ``` +/// +/// Then, the tree will have the following shape: +/// +/// ```ignore (private-internal) +/// # use NamedMatch::*; +/// MatchedSeq([ +/// MatchedSeq([ +/// MatchedNonterminal(a), +/// MatchedNonterminal(b), +/// MatchedNonterminal(c), +/// MatchedNonterminal(d), +/// ]), +/// MatchedSeq([ +/// MatchedNonterminal(a), +/// MatchedNonterminal(b), +/// MatchedNonterminal(c), +/// MatchedNonterminal(d), +/// MatchedNonterminal(e), +/// ]) +/// ]) +/// ``` +#[derive(Debug, Clone)] +pub(crate) enum NamedMatch { + MatchedSeq(Vec), + + // A metavar match of type `tt`. + MatchedTokenTree(rustc_ast::tokenstream::TokenTree), + + // A metavar match of any type other than `tt`. + MatchedNonterminal(Lrc), +} + +/// Performs a token equality check, ignoring syntax context (that is, an unhygienic comparison) +fn token_name_eq(t1: &Token, t2: &Token) -> bool { + if let (Some((ident1, is_raw1)), Some((ident2, is_raw2))) = (t1.ident(), t2.ident()) { + ident1.name == ident2.name && is_raw1 == is_raw2 + } else if let (Some(ident1), Some(ident2)) = (t1.lifetime(), t2.lifetime()) { + ident1.name == ident2.name + } else { + t1.kind == t2.kind + } +} + +// Note: the vectors could be created and dropped within `parse_tt`, but to avoid excess +// allocations we have a single vector for each kind that is cleared and reused repeatedly. +pub struct TtParser { + macro_name: Ident, + + /// The set of current mps to be processed. This should be empty by the end of a successful + /// execution of `parse_tt_inner`. + cur_mps: Vec, + + /// The set of newly generated mps. These are used to replenish `cur_mps` in the function + /// `parse_tt`. + next_mps: Vec, + + /// The set of mps that are waiting for the black-box parser. + bb_mps: Vec, + + /// Pre-allocate an empty match array, so it can be cloned cheaply for macros with many rules + /// that have no metavars. + empty_matches: Lrc>, +} + +impl TtParser { + pub(super) fn new(macro_name: Ident) -> TtParser { + TtParser { + macro_name, + cur_mps: vec![], + next_mps: vec![], + bb_mps: vec![], + empty_matches: Lrc::new(vec![]), + } + } + + /// Process the matcher positions of `cur_mps` until it is empty. In the process, this will + /// produce more mps in `next_mps` and `bb_mps`. + /// + /// # Returns + /// + /// `Some(result)` if everything is finished, `None` otherwise. Note that matches are kept + /// track of through the mps generated. + fn parse_tt_inner( + &mut self, + matcher: &[MatcherLoc], + token: &Token, + ) -> Option { + // Matcher positions that would be valid if the macro invocation was over now. Only + // modified if `token == Eof`. + let mut eof_mps = EofMatcherPositions::None; + + while let Some(mut mp) = self.cur_mps.pop() { + match &matcher[mp.idx] { + MatcherLoc::Token { token: t } => { + // If it's a doc comment, we just ignore it and move on to the next tt in the + // matcher. This is a bug, but #95267 showed that existing programs rely on + // this behaviour, and changing it would require some care and a transition + // period. + // + // If the token matches, we can just advance the parser. + // + // Otherwise, this match has failed, there is nothing to do, and hopefully + // another mp in `cur_mps` will match. + if matches!(t, Token { kind: DocComment(..), .. }) { + mp.idx += 1; + self.cur_mps.push(mp); + } else if token_name_eq(&t, token) { + mp.idx += 1; + self.next_mps.push(mp); + } + } + MatcherLoc::Delimited => { + // Entering the delimeter is trivial. + mp.idx += 1; + self.cur_mps.push(mp); + } + &MatcherLoc::Sequence { + op, + num_metavar_decls, + idx_first_after, + next_metavar, + seq_depth, + } => { + // Install an empty vec for each metavar within the sequence. + for metavar_idx in next_metavar..next_metavar + num_metavar_decls { + mp.push_match(metavar_idx, seq_depth, MatchedSeq(vec![])); + } + + if op == KleeneOp::ZeroOrMore || op == KleeneOp::ZeroOrOne { + // Try zero matches of this sequence, by skipping over it. + self.cur_mps.push(MatcherPos { + idx: idx_first_after, + matches: mp.matches.clone(), // a cheap clone + }); + } + + // Try one or more matches of this sequence, by entering it. + mp.idx += 1; + self.cur_mps.push(mp); + } + &MatcherLoc::SequenceKleeneOpNoSep { op, idx_first } => { + // We are past the end of a sequence with no separator. Try ending the + // sequence. If that's not possible, `ending_mp` will fail quietly when it is + // processed next time around the loop. + let ending_mp = MatcherPos { + idx: mp.idx + 1, // +1 skips the Kleene op + matches: mp.matches.clone(), // a cheap clone + }; + self.cur_mps.push(ending_mp); + + if op != KleeneOp::ZeroOrOne { + // Try another repetition. + mp.idx = idx_first; + self.cur_mps.push(mp); + } + } + MatcherLoc::SequenceSep { separator } => { + // We are past the end of a sequence with a separator but we haven't seen the + // separator yet. Try ending the sequence. If that's not possible, `ending_mp` + // will fail quietly when it is processed next time around the loop. + let ending_mp = MatcherPos { + idx: mp.idx + 2, // +2 skips the separator and the Kleene op + matches: mp.matches.clone(), // a cheap clone + }; + self.cur_mps.push(ending_mp); + + if token_name_eq(token, separator) { + // The separator matches the current token. Advance past it. + mp.idx += 1; + self.next_mps.push(mp); + } + } + &MatcherLoc::SequenceKleeneOpAfterSep { idx_first } => { + // We are past the sequence separator. This can't be a `?` Kleene op, because + // they don't permit separators. Try another repetition. + mp.idx = idx_first; + self.cur_mps.push(mp); + } + &MatcherLoc::MetaVarDecl { span, kind, .. } => { + // Built-in nonterminals never start with these tokens, so we can eliminate + // them from consideration. We use the span of the metavariable declaration + // to determine any edition-specific matching behavior for non-terminals. + if let Some(kind) = kind { + if Parser::nonterminal_may_begin_with(kind, token) { + self.bb_mps.push(mp); + } + } else { + // E.g. `$e` instead of `$e:expr`, reported as a hard error if actually used. + // Both this check and the one in `nameize` are necessary, surprisingly. + return Some(Error(span, "missing fragment specifier".to_string())); + } + } + MatcherLoc::Eof => { + // We are past the matcher's end, and not in a sequence. Try to end things. + debug_assert_eq!(mp.idx, matcher.len() - 1); + if *token == token::Eof { + eof_mps = match eof_mps { + EofMatcherPositions::None => EofMatcherPositions::One(mp), + EofMatcherPositions::One(_) | EofMatcherPositions::Multiple => { + EofMatcherPositions::Multiple + } + } + } + } + } + } + + // If we reached the end of input, check that there is EXACTLY ONE possible matcher. + // Otherwise, either the parse is ambiguous (which is an error) or there is a syntax error. + if *token == token::Eof { + Some(match eof_mps { + EofMatcherPositions::One(mut eof_mp) => { + // Need to take ownership of the matches from within the `Lrc`. + Lrc::make_mut(&mut eof_mp.matches); + let matches = Lrc::try_unwrap(eof_mp.matches).unwrap().into_iter(); + self.nameize(matcher, matches) + } + EofMatcherPositions::Multiple => { + Error(token.span, "ambiguity: multiple successful parses".to_string()) + } + EofMatcherPositions::None => Failure( + Token::new( + token::Eof, + if token.span.is_dummy() { token.span } else { token.span.shrink_to_hi() }, + ), + "missing tokens in macro arguments", + ), + }) + } else { + None + } + } + + /// Match the token stream from `parser` against `matcher`. + pub(super) fn parse_tt( + &mut self, + parser: &mut Cow<'_, Parser<'_>>, + matcher: &[MatcherLoc], + ) -> NamedParseResult { + // A queue of possible matcher positions. We initialize it with the matcher position in + // which the "dot" is before the first token of the first token tree in `matcher`. + // `parse_tt_inner` then processes all of these possible matcher positions and produces + // possible next positions into `next_mps`. After some post-processing, the contents of + // `next_mps` replenish `cur_mps` and we start over again. + self.cur_mps.clear(); + self.cur_mps.push(MatcherPos { idx: 0, matches: self.empty_matches.clone() }); + + loop { + self.next_mps.clear(); + self.bb_mps.clear(); + + // Process `cur_mps` until either we have finished the input or we need to get some + // parsing from the black-box parser done. + if let Some(res) = self.parse_tt_inner(matcher, &parser.token) { + return res; + } + + // `parse_tt_inner` handled all of `cur_mps`, so it's empty. + assert!(self.cur_mps.is_empty()); + + // Error messages here could be improved with links to original rules. + match (self.next_mps.len(), self.bb_mps.len()) { + (0, 0) => { + // There are no possible next positions AND we aren't waiting for the black-box + // parser: syntax error. + return Failure( + parser.token.clone(), + "no rules expected this token in macro call", + ); + } + + (_, 0) => { + // Dump all possible `next_mps` into `cur_mps` for the next iteration. Then + // process the next token. + self.cur_mps.append(&mut self.next_mps); + parser.to_mut().bump(); + } + + (0, 1) => { + // We need to call the black-box parser to get some nonterminal. + let mut mp = self.bb_mps.pop().unwrap(); + let loc = &matcher[mp.idx]; + if let &MatcherLoc::MetaVarDecl { + span, + kind: Some(kind), + next_metavar, + seq_depth, + .. + } = loc + { + // We use the span of the metavariable declaration to determine any + // edition-specific matching behavior for non-terminals. + let nt = match parser.to_mut().parse_nonterminal(kind) { + Err(mut err) => { + err.span_label( + span, + format!( + "while parsing argument for this `{kind}` macro fragment" + ), + ) + .emit(); + return ErrorReported; + } + Ok(nt) => nt, + }; + let m = match nt { + NtOrTt::Nt(nt) => MatchedNonterminal(Lrc::new(nt)), + NtOrTt::Tt(tt) => MatchedTokenTree(tt), + }; + mp.push_match(next_metavar, seq_depth, m); + mp.idx += 1; + } else { + unreachable!() + } + self.cur_mps.push(mp); + } + + (_, _) => { + // Too many possibilities! + return self.ambiguity_error(matcher, parser.token.span); + } + } + + assert!(!self.cur_mps.is_empty()); + } + } + + fn ambiguity_error( + &self, + matcher: &[MatcherLoc], + token_span: rustc_span::Span, + ) -> NamedParseResult { + let nts = self + .bb_mps + .iter() + .map(|mp| match &matcher[mp.idx] { + MatcherLoc::MetaVarDecl { bind, kind: Some(kind), .. } => { + format!("{} ('{}')", kind, bind) + } + _ => unreachable!(), + }) + .collect::>() + .join(" or "); + + Error( + token_span, + format!( + "local ambiguity when calling macro `{}`: multiple parsing options: {}", + self.macro_name, + match self.next_mps.len() { + 0 => format!("built-in NTs {}.", nts), + n => format!("built-in NTs {} or {n} other option{s}.", nts, s = pluralize!(n)), + } + ), + ) + } + + fn nameize>( + &self, + matcher: &[MatcherLoc], + mut res: I, + ) -> NamedParseResult { + // Make that each metavar has _exactly one_ binding. If so, insert the binding into the + // `NamedParseResult`. Otherwise, it's an error. + let mut ret_val = FxHashMap::default(); + for loc in matcher { + if let &MatcherLoc::MetaVarDecl { span, bind, kind, .. } = loc { + if kind.is_some() { + match ret_val.entry(MacroRulesNormalizedIdent::new(bind)) { + Vacant(spot) => spot.insert(res.next().unwrap()), + Occupied(..) => { + return Error(span, format!("duplicated bind name: {}", bind)); + } + }; + } else { + // E.g. `$e` instead of `$e:expr`, reported as a hard error if actually used. + // Both this check and the one in `parse_tt_inner` are necessary, surprisingly. + return Error(span, "missing fragment specifier".to_string()); + } + } + } + Success(ret_val) + } +} diff --git a/compiler/rustc_expand/src/mbe/macro_rules.rs b/compiler/rustc_expand/src/mbe/macro_rules.rs new file mode 100644 index 000000000..f7e1575af --- /dev/null +++ b/compiler/rustc_expand/src/mbe/macro_rules.rs @@ -0,0 +1,1420 @@ +use crate::base::{DummyResult, ExtCtxt, MacResult, TTMacroExpander}; +use crate::base::{SyntaxExtension, SyntaxExtensionKind}; +use crate::expand::{ensure_complete_parse, parse_ast_fragment, AstFragment, AstFragmentKind}; +use crate::mbe; +use crate::mbe::macro_check; +use crate::mbe::macro_parser::{Error, ErrorReported, Failure, Success, TtParser}; +use crate::mbe::macro_parser::{MatchedSeq, MatchedTokenTree, MatcherLoc}; +use crate::mbe::transcribe::transcribe; + +use rustc_ast as ast; +use rustc_ast::token::{self, Delimiter, NonterminalKind, Token, TokenKind, TokenKind::*}; +use rustc_ast::tokenstream::{DelimSpan, TokenStream}; +use rustc_ast::{NodeId, DUMMY_NODE_ID}; +use rustc_ast_pretty::pprust; +use rustc_attr::{self as attr, TransparencyError}; +use rustc_data_structures::fx::{FxHashMap, FxIndexMap}; +use rustc_errors::{Applicability, Diagnostic, DiagnosticBuilder, ErrorGuaranteed}; +use rustc_feature::Features; +use rustc_lint_defs::builtin::{ + RUST_2021_INCOMPATIBLE_OR_PATTERNS, SEMICOLON_IN_EXPRESSIONS_FROM_MACROS, +}; +use rustc_lint_defs::BuiltinLintDiagnostics; +use rustc_parse::parser::Parser; +use rustc_session::parse::ParseSess; +use rustc_session::Session; +use rustc_span::edition::Edition; +use rustc_span::hygiene::Transparency; +use rustc_span::source_map::SourceMap; +use rustc_span::symbol::{kw, sym, Ident, MacroRulesNormalizedIdent}; +use rustc_span::Span; + +use std::borrow::Cow; +use std::collections::hash_map::Entry; +use std::{mem, slice}; +use tracing::debug; + +pub(crate) struct ParserAnyMacro<'a> { + parser: Parser<'a>, + + /// Span of the expansion site of the macro this parser is for + site_span: Span, + /// The ident of the macro we're parsing + macro_ident: Ident, + lint_node_id: NodeId, + is_trailing_mac: bool, + arm_span: Span, + /// Whether or not this macro is defined in the current crate + is_local: bool, +} + +pub(crate) fn annotate_err_with_kind(err: &mut Diagnostic, kind: AstFragmentKind, span: Span) { + match kind { + AstFragmentKind::Ty => { + err.span_label(span, "this macro call doesn't expand to a type"); + } + AstFragmentKind::Pat => { + err.span_label(span, "this macro call doesn't expand to a pattern"); + } + _ => {} + }; +} + +fn emit_frag_parse_err( + mut e: DiagnosticBuilder<'_, rustc_errors::ErrorGuaranteed>, + parser: &Parser<'_>, + orig_parser: &mut Parser<'_>, + site_span: Span, + arm_span: Span, + kind: AstFragmentKind, +) { + // FIXME(davidtwco): avoid depending on the error message text + if parser.token == token::Eof && e.message[0].0.expect_str().ends_with(", found ``") { + if !e.span.is_dummy() { + // early end of macro arm (#52866) + e.replace_span_with(parser.sess.source_map().next_point(parser.token.span)); + } + let msg = &e.message[0]; + e.message[0] = ( + rustc_errors::DiagnosticMessage::Str(format!( + "macro expansion ends with an incomplete expression: {}", + msg.0.expect_str().replace(", found ``", ""), + )), + msg.1, + ); + } + if e.span.is_dummy() { + // Get around lack of span in error (#30128) + e.replace_span_with(site_span); + if !parser.sess.source_map().is_imported(arm_span) { + e.span_label(arm_span, "in this macro arm"); + } + } else if parser.sess.source_map().is_imported(parser.token.span) { + e.span_label(site_span, "in this macro invocation"); + } + match kind { + // Try a statement if an expression is wanted but failed and suggest adding `;` to call. + AstFragmentKind::Expr => match parse_ast_fragment(orig_parser, AstFragmentKind::Stmts) { + Err(err) => err.cancel(), + Ok(_) => { + e.note( + "the macro call doesn't expand to an expression, but it can expand to a statement", + ); + e.span_suggestion_verbose( + site_span.shrink_to_hi(), + "add `;` to interpret the expansion as a statement", + ";", + Applicability::MaybeIncorrect, + ); + } + }, + _ => annotate_err_with_kind(&mut e, kind, site_span), + }; + e.emit(); +} + +impl<'a> ParserAnyMacro<'a> { + pub(crate) fn make(mut self: Box>, kind: AstFragmentKind) -> AstFragment { + let ParserAnyMacro { + site_span, + macro_ident, + ref mut parser, + lint_node_id, + arm_span, + is_trailing_mac, + is_local, + } = *self; + let snapshot = &mut parser.create_snapshot_for_diagnostic(); + let fragment = match parse_ast_fragment(parser, kind) { + Ok(f) => f, + Err(err) => { + emit_frag_parse_err(err, parser, snapshot, site_span, arm_span, kind); + return kind.dummy(site_span); + } + }; + + // We allow semicolons at the end of expressions -- e.g., the semicolon in + // `macro_rules! m { () => { panic!(); } }` isn't parsed by `.parse_expr()`, + // but `m!()` is allowed in expression positions (cf. issue #34706). + if kind == AstFragmentKind::Expr && parser.token == token::Semi { + if is_local { + parser.sess.buffer_lint_with_diagnostic( + SEMICOLON_IN_EXPRESSIONS_FROM_MACROS, + parser.token.span, + lint_node_id, + "trailing semicolon in macro used in expression position", + BuiltinLintDiagnostics::TrailingMacro(is_trailing_mac, macro_ident), + ); + } + parser.bump(); + } + + // Make sure we don't have any tokens left to parse so we don't silently drop anything. + let path = ast::Path::from_ident(macro_ident.with_span_pos(site_span)); + ensure_complete_parse(parser, &path, kind.name(), site_span); + fragment + } +} + +struct MacroRulesMacroExpander { + node_id: NodeId, + name: Ident, + span: Span, + transparency: Transparency, + lhses: Vec>, + rhses: Vec, + valid: bool, +} + +impl TTMacroExpander for MacroRulesMacroExpander { + fn expand<'cx>( + &self, + cx: &'cx mut ExtCtxt<'_>, + sp: Span, + input: TokenStream, + ) -> Box { + if !self.valid { + return DummyResult::any(sp); + } + expand_macro( + cx, + sp, + self.span, + self.node_id, + self.name, + self.transparency, + input, + &self.lhses, + &self.rhses, + ) + } +} + +fn macro_rules_dummy_expander<'cx>( + _: &'cx mut ExtCtxt<'_>, + span: Span, + _: TokenStream, +) -> Box { + DummyResult::any(span) +} + +fn trace_macros_note(cx_expansions: &mut FxIndexMap>, sp: Span, message: String) { + let sp = sp.macro_backtrace().last().map_or(sp, |trace| trace.call_site); + cx_expansions.entry(sp).or_default().push(message); +} + +/// Expands the rules based macro defined by `lhses` and `rhses` for a given +/// input `arg`. +fn expand_macro<'cx>( + cx: &'cx mut ExtCtxt<'_>, + sp: Span, + def_span: Span, + node_id: NodeId, + name: Ident, + transparency: Transparency, + arg: TokenStream, + lhses: &[Vec], + rhses: &[mbe::TokenTree], +) -> Box { + let sess = &cx.sess.parse_sess; + // Macros defined in the current crate have a real node id, + // whereas macros from an external crate have a dummy id. + let is_local = node_id != DUMMY_NODE_ID; + + if cx.trace_macros() { + let msg = format!("expanding `{}! {{ {} }}`", name, pprust::tts_to_string(&arg)); + trace_macros_note(&mut cx.expansions, sp, msg); + } + + // Which arm's failure should we report? (the one furthest along) + let mut best_failure: Option<(Token, &str)> = None; + + // We create a base parser that can be used for the "black box" parts. + // Every iteration needs a fresh copy of that parser. However, the parser + // is not mutated on many of the iterations, particularly when dealing with + // macros like this: + // + // macro_rules! foo { + // ("a") => (A); + // ("b") => (B); + // ("c") => (C); + // // ... etc. (maybe hundreds more) + // } + // + // as seen in the `html5ever` benchmark. We use a `Cow` so that the base + // parser is only cloned when necessary (upon mutation). Furthermore, we + // reinitialize the `Cow` with the base parser at the start of every + // iteration, so that any mutated parsers are not reused. This is all quite + // hacky, but speeds up the `html5ever` benchmark significantly. (Issue + // 68836 suggests a more comprehensive but more complex change to deal with + // this situation.) + let parser = parser_from_cx(sess, arg.clone()); + + // Try each arm's matchers. + let mut tt_parser = TtParser::new(name); + for (i, lhs) in lhses.iter().enumerate() { + // Take a snapshot of the state of pre-expansion gating at this point. + // This is used so that if a matcher is not `Success(..)`ful, + // then the spans which became gated when parsing the unsuccessful matcher + // are not recorded. On the first `Success(..)`ful matcher, the spans are merged. + let mut gated_spans_snapshot = mem::take(&mut *sess.gated_spans.spans.borrow_mut()); + + match tt_parser.parse_tt(&mut Cow::Borrowed(&parser), lhs) { + Success(named_matches) => { + // The matcher was `Success(..)`ful. + // Merge the gated spans from parsing the matcher with the pre-existing ones. + sess.gated_spans.merge(gated_spans_snapshot); + + let (rhs, rhs_span): (&mbe::Delimited, DelimSpan) = match &rhses[i] { + mbe::TokenTree::Delimited(span, delimited) => (&delimited, *span), + _ => cx.span_bug(sp, "malformed macro rhs"), + }; + let arm_span = rhses[i].span(); + + let rhs_spans = rhs.tts.iter().map(|t| t.span()).collect::>(); + // rhs has holes ( `$id` and `$(...)` that need filled) + let mut tts = match transcribe(cx, &named_matches, &rhs, rhs_span, transparency) { + Ok(tts) => tts, + Err(mut err) => { + err.emit(); + return DummyResult::any(arm_span); + } + }; + + // Replace all the tokens for the corresponding positions in the macro, to maintain + // proper positions in error reporting, while maintaining the macro_backtrace. + if rhs_spans.len() == tts.len() { + tts = tts.map_enumerated(|i, tt| { + let mut tt = tt.clone(); + let mut sp = rhs_spans[i]; + sp = sp.with_ctxt(tt.span().ctxt()); + tt.set_span(sp); + tt + }); + } + + if cx.trace_macros() { + let msg = format!("to `{}`", pprust::tts_to_string(&tts)); + trace_macros_note(&mut cx.expansions, sp, msg); + } + + let mut p = Parser::new(sess, tts, false, None); + p.last_type_ascription = cx.current_expansion.prior_type_ascription; + + if is_local { + cx.resolver.record_macro_rule_usage(node_id, i); + } + + // Let the context choose how to interpret the result. + // Weird, but useful for X-macros. + return Box::new(ParserAnyMacro { + parser: p, + + // Pass along the original expansion site and the name of the macro + // so we can print a useful error message if the parse of the expanded + // macro leaves unparsed tokens. + site_span: sp, + macro_ident: name, + lint_node_id: cx.current_expansion.lint_node_id, + is_trailing_mac: cx.current_expansion.is_trailing_mac, + arm_span, + is_local, + }); + } + Failure(token, msg) => match best_failure { + Some((ref best_token, _)) if best_token.span.lo() >= token.span.lo() => {} + _ => best_failure = Some((token, msg)), + }, + Error(err_sp, ref msg) => { + let span = err_sp.substitute_dummy(sp); + cx.struct_span_err(span, &msg).emit(); + return DummyResult::any(span); + } + ErrorReported => return DummyResult::any(sp), + } + + // The matcher was not `Success(..)`ful. + // Restore to the state before snapshotting and maybe try again. + mem::swap(&mut gated_spans_snapshot, &mut sess.gated_spans.spans.borrow_mut()); + } + drop(parser); + + let (token, label) = best_failure.expect("ran no matchers"); + let span = token.span.substitute_dummy(sp); + let mut err = cx.struct_span_err(span, &parse_failure_msg(&token)); + err.span_label(span, label); + if !def_span.is_dummy() && !cx.source_map().is_imported(def_span) { + err.span_label(cx.source_map().guess_head_span(def_span), "when calling this macro"); + } + annotate_doc_comment(&mut err, sess.source_map(), span); + // Check whether there's a missing comma in this macro call, like `println!("{}" a);` + if let Some((arg, comma_span)) = arg.add_comma() { + for lhs in lhses { + let parser = parser_from_cx(sess, arg.clone()); + if let Success(_) = tt_parser.parse_tt(&mut Cow::Borrowed(&parser), lhs) { + if comma_span.is_dummy() { + err.note("you might be missing a comma"); + } else { + err.span_suggestion_short( + comma_span, + "missing comma here", + ", ", + Applicability::MachineApplicable, + ); + } + } + } + } + err.emit(); + cx.trace_macros_diag(); + DummyResult::any(sp) +} + +// Note that macro-by-example's input is also matched against a token tree: +// $( $lhs:tt => $rhs:tt );+ +// +// Holy self-referential! + +/// Converts a macro item into a syntax extension. +pub fn compile_declarative_macro( + sess: &Session, + features: &Features, + def: &ast::Item, + edition: Edition, +) -> (SyntaxExtension, Vec<(usize, Span)>) { + debug!("compile_declarative_macro: {:?}", def); + let mk_syn_ext = |expander| { + SyntaxExtension::new( + sess, + SyntaxExtensionKind::LegacyBang(expander), + def.span, + Vec::new(), + edition, + def.ident.name, + &def.attrs, + ) + }; + let dummy_syn_ext = || (mk_syn_ext(Box::new(macro_rules_dummy_expander)), Vec::new()); + + let diag = &sess.parse_sess.span_diagnostic; + let lhs_nm = Ident::new(sym::lhs, def.span); + let rhs_nm = Ident::new(sym::rhs, def.span); + let tt_spec = Some(NonterminalKind::TT); + + // Parse the macro_rules! invocation + let (macro_rules, body) = match &def.kind { + ast::ItemKind::MacroDef(def) => (def.macro_rules, def.body.inner_tokens()), + _ => unreachable!(), + }; + + // The pattern that macro_rules matches. + // The grammar for macro_rules! is: + // $( $lhs:tt => $rhs:tt );+ + // ...quasiquoting this would be nice. + // These spans won't matter, anyways + let argument_gram = vec![ + mbe::TokenTree::Sequence( + DelimSpan::dummy(), + mbe::SequenceRepetition { + tts: vec![ + mbe::TokenTree::MetaVarDecl(def.span, lhs_nm, tt_spec), + mbe::TokenTree::token(token::FatArrow, def.span), + mbe::TokenTree::MetaVarDecl(def.span, rhs_nm, tt_spec), + ], + separator: Some(Token::new( + if macro_rules { token::Semi } else { token::Comma }, + def.span, + )), + kleene: mbe::KleeneToken::new(mbe::KleeneOp::OneOrMore, def.span), + num_captures: 2, + }, + ), + // to phase into semicolon-termination instead of semicolon-separation + mbe::TokenTree::Sequence( + DelimSpan::dummy(), + mbe::SequenceRepetition { + tts: vec![mbe::TokenTree::token( + if macro_rules { token::Semi } else { token::Comma }, + def.span, + )], + separator: None, + kleene: mbe::KleeneToken::new(mbe::KleeneOp::ZeroOrMore, def.span), + num_captures: 0, + }, + ), + ]; + // Convert it into `MatcherLoc` form. + let argument_gram = mbe::macro_parser::compute_locs(&argument_gram); + + let parser = Parser::new(&sess.parse_sess, body, true, rustc_parse::MACRO_ARGUMENTS); + let mut tt_parser = + TtParser::new(Ident::with_dummy_span(if macro_rules { kw::MacroRules } else { kw::Macro })); + let argument_map = match tt_parser.parse_tt(&mut Cow::Borrowed(&parser), &argument_gram) { + Success(m) => m, + Failure(token, msg) => { + let s = parse_failure_msg(&token); + let sp = token.span.substitute_dummy(def.span); + let mut err = sess.parse_sess.span_diagnostic.struct_span_err(sp, &s); + err.span_label(sp, msg); + annotate_doc_comment(&mut err, sess.source_map(), sp); + err.emit(); + return dummy_syn_ext(); + } + Error(sp, msg) => { + sess.parse_sess + .span_diagnostic + .struct_span_err(sp.substitute_dummy(def.span), &msg) + .emit(); + return dummy_syn_ext(); + } + ErrorReported => { + return dummy_syn_ext(); + } + }; + + let mut valid = true; + + // Extract the arguments: + let lhses = match argument_map[&MacroRulesNormalizedIdent::new(lhs_nm)] { + MatchedSeq(ref s) => s + .iter() + .map(|m| { + if let MatchedTokenTree(ref tt) = *m { + let tt = mbe::quoted::parse( + TokenStream::new(vec![tt.clone()]), + true, + &sess.parse_sess, + def.id, + features, + edition, + ) + .pop() + .unwrap(); + valid &= check_lhs_nt_follows(&sess.parse_sess, &def, &tt); + return tt; + } + sess.parse_sess.span_diagnostic.span_bug(def.span, "wrong-structured lhs") + }) + .collect::>(), + _ => sess.parse_sess.span_diagnostic.span_bug(def.span, "wrong-structured lhs"), + }; + + let rhses = match argument_map[&MacroRulesNormalizedIdent::new(rhs_nm)] { + MatchedSeq(ref s) => s + .iter() + .map(|m| { + if let MatchedTokenTree(ref tt) = *m { + return mbe::quoted::parse( + TokenStream::new(vec![tt.clone()]), + false, + &sess.parse_sess, + def.id, + features, + edition, + ) + .pop() + .unwrap(); + } + sess.parse_sess.span_diagnostic.span_bug(def.span, "wrong-structured lhs") + }) + .collect::>(), + _ => sess.parse_sess.span_diagnostic.span_bug(def.span, "wrong-structured rhs"), + }; + + for rhs in &rhses { + valid &= check_rhs(&sess.parse_sess, rhs); + } + + // don't abort iteration early, so that errors for multiple lhses can be reported + for lhs in &lhses { + valid &= check_lhs_no_empty_seq(&sess.parse_sess, slice::from_ref(lhs)); + } + + valid &= macro_check::check_meta_variables(&sess.parse_sess, def.id, def.span, &lhses, &rhses); + + let (transparency, transparency_error) = attr::find_transparency(&def.attrs, macro_rules); + match transparency_error { + Some(TransparencyError::UnknownTransparency(value, span)) => { + diag.span_err(span, &format!("unknown macro transparency: `{}`", value)); + } + Some(TransparencyError::MultipleTransparencyAttrs(old_span, new_span)) => { + diag.span_err(vec![old_span, new_span], "multiple macro transparency attributes"); + } + None => {} + } + + // Compute the spans of the macro rules for unused rule linting. + // To avoid warning noise, only consider the rules of this + // macro for the lint, if all rules are valid. + // Also, we are only interested in non-foreign macros. + let rule_spans = if valid && def.id != DUMMY_NODE_ID { + lhses + .iter() + .zip(rhses.iter()) + .enumerate() + // If the rhs contains an invocation like compile_error!, + // don't consider the rule for the unused rule lint. + .filter(|(_idx, (_lhs, rhs))| !has_compile_error_macro(rhs)) + // We only take the span of the lhs here, + // so that the spans of created warnings are smaller. + .map(|(idx, (lhs, _rhs))| (idx, lhs.span())) + .collect::>() + } else { + Vec::new() + }; + + // Convert the lhses into `MatcherLoc` form, which is better for doing the + // actual matching. Unless the matcher is invalid. + let lhses = if valid { + lhses + .iter() + .map(|lhs| { + // Ignore the delimiters around the matcher. + match lhs { + mbe::TokenTree::Delimited(_, delimited) => { + mbe::macro_parser::compute_locs(&delimited.tts) + } + _ => sess.parse_sess.span_diagnostic.span_bug(def.span, "malformed macro lhs"), + } + }) + .collect() + } else { + vec![] + }; + + let expander = Box::new(MacroRulesMacroExpander { + name: def.ident, + span: def.span, + node_id: def.id, + transparency, + lhses, + rhses, + valid, + }); + (mk_syn_ext(expander), rule_spans) +} + +#[derive(SessionSubdiagnostic)] +enum ExplainDocComment { + #[label(expand::explain_doc_comment_inner)] + Inner { + #[primary_span] + span: Span, + }, + #[label(expand::explain_doc_comment_outer)] + Outer { + #[primary_span] + span: Span, + }, +} + +fn annotate_doc_comment( + err: &mut DiagnosticBuilder<'_, ErrorGuaranteed>, + sm: &SourceMap, + span: Span, +) { + if let Ok(src) = sm.span_to_snippet(span) { + if src.starts_with("///") || src.starts_with("/**") { + err.subdiagnostic(ExplainDocComment::Outer { span }); + } else if src.starts_with("//!") || src.starts_with("/*!") { + err.subdiagnostic(ExplainDocComment::Inner { span }); + } + } +} + +fn check_lhs_nt_follows(sess: &ParseSess, def: &ast::Item, lhs: &mbe::TokenTree) -> bool { + // lhs is going to be like TokenTree::Delimited(...), where the + // entire lhs is those tts. Or, it can be a "bare sequence", not wrapped in parens. + if let mbe::TokenTree::Delimited(_, delimited) = lhs { + check_matcher(sess, def, &delimited.tts) + } else { + let msg = "invalid macro matcher; matchers must be contained in balanced delimiters"; + sess.span_diagnostic.span_err(lhs.span(), msg); + false + } + // we don't abort on errors on rejection, the driver will do that for us + // after parsing/expansion. we can report every error in every macro this way. +} + +/// Checks that the lhs contains no repetition which could match an empty token +/// tree, because then the matcher would hang indefinitely. +fn check_lhs_no_empty_seq(sess: &ParseSess, tts: &[mbe::TokenTree]) -> bool { + use mbe::TokenTree; + for tt in tts { + match *tt { + TokenTree::Token(..) + | TokenTree::MetaVar(..) + | TokenTree::MetaVarDecl(..) + | TokenTree::MetaVarExpr(..) => (), + TokenTree::Delimited(_, ref del) => { + if !check_lhs_no_empty_seq(sess, &del.tts) { + return false; + } + } + TokenTree::Sequence(span, ref seq) => { + if seq.separator.is_none() + && seq.tts.iter().all(|seq_tt| match *seq_tt { + TokenTree::MetaVarDecl(_, _, Some(NonterminalKind::Vis)) => true, + TokenTree::Sequence(_, ref sub_seq) => { + sub_seq.kleene.op == mbe::KleeneOp::ZeroOrMore + || sub_seq.kleene.op == mbe::KleeneOp::ZeroOrOne + } + _ => false, + }) + { + let sp = span.entire(); + sess.span_diagnostic.span_err(sp, "repetition matches empty token tree"); + return false; + } + if !check_lhs_no_empty_seq(sess, &seq.tts) { + return false; + } + } + } + } + + true +} + +fn check_rhs(sess: &ParseSess, rhs: &mbe::TokenTree) -> bool { + match *rhs { + mbe::TokenTree::Delimited(..) => return true, + _ => { + sess.span_diagnostic.span_err(rhs.span(), "macro rhs must be delimited"); + } + } + false +} + +fn check_matcher(sess: &ParseSess, def: &ast::Item, matcher: &[mbe::TokenTree]) -> bool { + let first_sets = FirstSets::new(matcher); + let empty_suffix = TokenSet::empty(); + let err = sess.span_diagnostic.err_count(); + check_matcher_core(sess, def, &first_sets, matcher, &empty_suffix); + err == sess.span_diagnostic.err_count() +} + +fn has_compile_error_macro(rhs: &mbe::TokenTree) -> bool { + match rhs { + mbe::TokenTree::Delimited(_sp, d) => { + let has_compile_error = d.tts.array_windows::<3>().any(|[ident, bang, args]| { + if let mbe::TokenTree::Token(ident) = ident && + let TokenKind::Ident(ident, _) = ident.kind && + ident == sym::compile_error && + let mbe::TokenTree::Token(bang) = bang && + let TokenKind::Not = bang.kind && + let mbe::TokenTree::Delimited(_, del) = args && + del.delim != Delimiter::Invisible + { + true + } else { + false + } + }); + if has_compile_error { true } else { d.tts.iter().any(has_compile_error_macro) } + } + _ => false, + } +} + +// `The FirstSets` for a matcher is a mapping from subsequences in the +// matcher to the FIRST set for that subsequence. +// +// This mapping is partially precomputed via a backwards scan over the +// token trees of the matcher, which provides a mapping from each +// repetition sequence to its *first* set. +// +// (Hypothetically, sequences should be uniquely identifiable via their +// spans, though perhaps that is false, e.g., for macro-generated macros +// that do not try to inject artificial span information. My plan is +// to try to catch such cases ahead of time and not include them in +// the precomputed mapping.) +struct FirstSets<'tt> { + // this maps each TokenTree::Sequence `$(tt ...) SEP OP` that is uniquely identified by its + // span in the original matcher to the First set for the inner sequence `tt ...`. + // + // If two sequences have the same span in a matcher, then map that + // span to None (invalidating the mapping here and forcing the code to + // use a slow path). + first: FxHashMap>>, +} + +impl<'tt> FirstSets<'tt> { + fn new(tts: &'tt [mbe::TokenTree]) -> FirstSets<'tt> { + use mbe::TokenTree; + + let mut sets = FirstSets { first: FxHashMap::default() }; + build_recur(&mut sets, tts); + return sets; + + // walks backward over `tts`, returning the FIRST for `tts` + // and updating `sets` at the same time for all sequence + // substructure we find within `tts`. + fn build_recur<'tt>(sets: &mut FirstSets<'tt>, tts: &'tt [TokenTree]) -> TokenSet<'tt> { + let mut first = TokenSet::empty(); + for tt in tts.iter().rev() { + match *tt { + TokenTree::Token(..) + | TokenTree::MetaVar(..) + | TokenTree::MetaVarDecl(..) + | TokenTree::MetaVarExpr(..) => { + first.replace_with(TtHandle::TtRef(tt)); + } + TokenTree::Delimited(span, ref delimited) => { + build_recur(sets, &delimited.tts); + first.replace_with(TtHandle::from_token_kind( + token::OpenDelim(delimited.delim), + span.open, + )); + } + TokenTree::Sequence(sp, ref seq_rep) => { + let subfirst = build_recur(sets, &seq_rep.tts); + + match sets.first.entry(sp.entire()) { + Entry::Vacant(vac) => { + vac.insert(Some(subfirst.clone())); + } + Entry::Occupied(mut occ) => { + // if there is already an entry, then a span must have collided. + // This should not happen with typical macro_rules macros, + // but syntax extensions need not maintain distinct spans, + // so distinct syntax trees can be assigned the same span. + // In such a case, the map cannot be trusted; so mark this + // entry as unusable. + occ.insert(None); + } + } + + // If the sequence contents can be empty, then the first + // token could be the separator token itself. + + if let (Some(sep), true) = (&seq_rep.separator, subfirst.maybe_empty) { + first.add_one_maybe(TtHandle::from_token(sep.clone())); + } + + // Reverse scan: Sequence comes before `first`. + if subfirst.maybe_empty + || seq_rep.kleene.op == mbe::KleeneOp::ZeroOrMore + || seq_rep.kleene.op == mbe::KleeneOp::ZeroOrOne + { + // If sequence is potentially empty, then + // union them (preserving first emptiness). + first.add_all(&TokenSet { maybe_empty: true, ..subfirst }); + } else { + // Otherwise, sequence guaranteed + // non-empty; replace first. + first = subfirst; + } + } + } + } + + first + } + } + + // walks forward over `tts` until all potential FIRST tokens are + // identified. + fn first(&self, tts: &'tt [mbe::TokenTree]) -> TokenSet<'tt> { + use mbe::TokenTree; + + let mut first = TokenSet::empty(); + for tt in tts.iter() { + assert!(first.maybe_empty); + match *tt { + TokenTree::Token(..) + | TokenTree::MetaVar(..) + | TokenTree::MetaVarDecl(..) + | TokenTree::MetaVarExpr(..) => { + first.add_one(TtHandle::TtRef(tt)); + return first; + } + TokenTree::Delimited(span, ref delimited) => { + first.add_one(TtHandle::from_token_kind( + token::OpenDelim(delimited.delim), + span.open, + )); + return first; + } + TokenTree::Sequence(sp, ref seq_rep) => { + let subfirst_owned; + let subfirst = match self.first.get(&sp.entire()) { + Some(&Some(ref subfirst)) => subfirst, + Some(&None) => { + subfirst_owned = self.first(&seq_rep.tts); + &subfirst_owned + } + None => { + panic!("We missed a sequence during FirstSets construction"); + } + }; + + // If the sequence contents can be empty, then the first + // token could be the separator token itself. + if let (Some(sep), true) = (&seq_rep.separator, subfirst.maybe_empty) { + first.add_one_maybe(TtHandle::from_token(sep.clone())); + } + + assert!(first.maybe_empty); + first.add_all(subfirst); + if subfirst.maybe_empty + || seq_rep.kleene.op == mbe::KleeneOp::ZeroOrMore + || seq_rep.kleene.op == mbe::KleeneOp::ZeroOrOne + { + // Continue scanning for more first + // tokens, but also make sure we + // restore empty-tracking state. + first.maybe_empty = true; + continue; + } else { + return first; + } + } + } + } + + // we only exit the loop if `tts` was empty or if every + // element of `tts` matches the empty sequence. + assert!(first.maybe_empty); + first + } +} + +// Most `mbe::TokenTree`s are pre-existing in the matcher, but some are defined +// implicitly, such as opening/closing delimiters and sequence repetition ops. +// This type encapsulates both kinds. It implements `Clone` while avoiding the +// need for `mbe::TokenTree` to implement `Clone`. +#[derive(Debug)] +enum TtHandle<'tt> { + /// This is used in most cases. + TtRef(&'tt mbe::TokenTree), + + /// This is only used for implicit token trees. The `mbe::TokenTree` *must* + /// be `mbe::TokenTree::Token`. No other variants are allowed. We store an + /// `mbe::TokenTree` rather than a `Token` so that `get()` can return a + /// `&mbe::TokenTree`. + Token(mbe::TokenTree), +} + +impl<'tt> TtHandle<'tt> { + fn from_token(tok: Token) -> Self { + TtHandle::Token(mbe::TokenTree::Token(tok)) + } + + fn from_token_kind(kind: TokenKind, span: Span) -> Self { + TtHandle::from_token(Token::new(kind, span)) + } + + // Get a reference to a token tree. + fn get(&'tt self) -> &'tt mbe::TokenTree { + match self { + TtHandle::TtRef(tt) => tt, + TtHandle::Token(token_tt) => &token_tt, + } + } +} + +impl<'tt> PartialEq for TtHandle<'tt> { + fn eq(&self, other: &TtHandle<'tt>) -> bool { + self.get() == other.get() + } +} + +impl<'tt> Clone for TtHandle<'tt> { + fn clone(&self) -> Self { + match self { + TtHandle::TtRef(tt) => TtHandle::TtRef(tt), + + // This variant *must* contain a `mbe::TokenTree::Token`, and not + // any other variant of `mbe::TokenTree`. + TtHandle::Token(mbe::TokenTree::Token(tok)) => { + TtHandle::Token(mbe::TokenTree::Token(tok.clone())) + } + + _ => unreachable!(), + } + } +} + +// A set of `mbe::TokenTree`s, which may include `TokenTree::Match`s +// (for macro-by-example syntactic variables). It also carries the +// `maybe_empty` flag; that is true if and only if the matcher can +// match an empty token sequence. +// +// The First set is computed on submatchers like `$($a:expr b),* $(c)* d`, +// which has corresponding FIRST = {$a:expr, c, d}. +// Likewise, `$($a:expr b),* $(c)+ d` has FIRST = {$a:expr, c}. +// +// (Notably, we must allow for *-op to occur zero times.) +#[derive(Clone, Debug)] +struct TokenSet<'tt> { + tokens: Vec>, + maybe_empty: bool, +} + +impl<'tt> TokenSet<'tt> { + // Returns a set for the empty sequence. + fn empty() -> Self { + TokenSet { tokens: Vec::new(), maybe_empty: true } + } + + // Returns the set `{ tok }` for the single-token (and thus + // non-empty) sequence [tok]. + fn singleton(tt: TtHandle<'tt>) -> Self { + TokenSet { tokens: vec![tt], maybe_empty: false } + } + + // Changes self to be the set `{ tok }`. + // Since `tok` is always present, marks self as non-empty. + fn replace_with(&mut self, tt: TtHandle<'tt>) { + self.tokens.clear(); + self.tokens.push(tt); + self.maybe_empty = false; + } + + // Changes self to be the empty set `{}`; meant for use when + // the particular token does not matter, but we want to + // record that it occurs. + fn replace_with_irrelevant(&mut self) { + self.tokens.clear(); + self.maybe_empty = false; + } + + // Adds `tok` to the set for `self`, marking sequence as non-empy. + fn add_one(&mut self, tt: TtHandle<'tt>) { + if !self.tokens.contains(&tt) { + self.tokens.push(tt); + } + self.maybe_empty = false; + } + + // Adds `tok` to the set for `self`. (Leaves `maybe_empty` flag alone.) + fn add_one_maybe(&mut self, tt: TtHandle<'tt>) { + if !self.tokens.contains(&tt) { + self.tokens.push(tt); + } + } + + // Adds all elements of `other` to this. + // + // (Since this is a set, we filter out duplicates.) + // + // If `other` is potentially empty, then preserves the previous + // setting of the empty flag of `self`. If `other` is guaranteed + // non-empty, then `self` is marked non-empty. + fn add_all(&mut self, other: &Self) { + for tt in &other.tokens { + if !self.tokens.contains(tt) { + self.tokens.push(tt.clone()); + } + } + if !other.maybe_empty { + self.maybe_empty = false; + } + } +} + +// Checks that `matcher` is internally consistent and that it +// can legally be followed by a token `N`, for all `N` in `follow`. +// (If `follow` is empty, then it imposes no constraint on +// the `matcher`.) +// +// Returns the set of NT tokens that could possibly come last in +// `matcher`. (If `matcher` matches the empty sequence, then +// `maybe_empty` will be set to true.) +// +// Requires that `first_sets` is pre-computed for `matcher`; +// see `FirstSets::new`. +fn check_matcher_core<'tt>( + sess: &ParseSess, + def: &ast::Item, + first_sets: &FirstSets<'tt>, + matcher: &'tt [mbe::TokenTree], + follow: &TokenSet<'tt>, +) -> TokenSet<'tt> { + use mbe::TokenTree; + + let mut last = TokenSet::empty(); + + // 2. For each token and suffix [T, SUFFIX] in M: + // ensure that T can be followed by SUFFIX, and if SUFFIX may be empty, + // then ensure T can also be followed by any element of FOLLOW. + 'each_token: for i in 0..matcher.len() { + let token = &matcher[i]; + let suffix = &matcher[i + 1..]; + + let build_suffix_first = || { + let mut s = first_sets.first(suffix); + if s.maybe_empty { + s.add_all(follow); + } + s + }; + + // (we build `suffix_first` on demand below; you can tell + // which cases are supposed to fall through by looking for the + // initialization of this variable.) + let suffix_first; + + // First, update `last` so that it corresponds to the set + // of NT tokens that might end the sequence `... token`. + match *token { + TokenTree::Token(..) + | TokenTree::MetaVar(..) + | TokenTree::MetaVarDecl(..) + | TokenTree::MetaVarExpr(..) => { + if token_can_be_followed_by_any(token) { + // don't need to track tokens that work with any, + last.replace_with_irrelevant(); + // ... and don't need to check tokens that can be + // followed by anything against SUFFIX. + continue 'each_token; + } else { + last.replace_with(TtHandle::TtRef(token)); + suffix_first = build_suffix_first(); + } + } + TokenTree::Delimited(span, ref d) => { + let my_suffix = TokenSet::singleton(TtHandle::from_token_kind( + token::CloseDelim(d.delim), + span.close, + )); + check_matcher_core(sess, def, first_sets, &d.tts, &my_suffix); + // don't track non NT tokens + last.replace_with_irrelevant(); + + // also, we don't need to check delimited sequences + // against SUFFIX + continue 'each_token; + } + TokenTree::Sequence(_, ref seq_rep) => { + suffix_first = build_suffix_first(); + // The trick here: when we check the interior, we want + // to include the separator (if any) as a potential + // (but not guaranteed) element of FOLLOW. So in that + // case, we make a temp copy of suffix and stuff + // delimiter in there. + // + // FIXME: Should I first scan suffix_first to see if + // delimiter is already in it before I go through the + // work of cloning it? But then again, this way I may + // get a "tighter" span? + let mut new; + let my_suffix = if let Some(sep) = &seq_rep.separator { + new = suffix_first.clone(); + new.add_one_maybe(TtHandle::from_token(sep.clone())); + &new + } else { + &suffix_first + }; + + // At this point, `suffix_first` is built, and + // `my_suffix` is some TokenSet that we can use + // for checking the interior of `seq_rep`. + let next = check_matcher_core(sess, def, first_sets, &seq_rep.tts, my_suffix); + if next.maybe_empty { + last.add_all(&next); + } else { + last = next; + } + + // the recursive call to check_matcher_core already ran the 'each_last + // check below, so we can just keep going forward here. + continue 'each_token; + } + } + + // (`suffix_first` guaranteed initialized once reaching here.) + + // Now `last` holds the complete set of NT tokens that could + // end the sequence before SUFFIX. Check that every one works with `suffix`. + for tt in &last.tokens { + if let &TokenTree::MetaVarDecl(span, name, Some(kind)) = tt.get() { + for next_token in &suffix_first.tokens { + let next_token = next_token.get(); + + // Check if the old pat is used and the next token is `|` + // to warn about incompatibility with Rust 2021. + // We only emit this lint if we're parsing the original + // definition of this macro_rules, not while (re)parsing + // the macro when compiling another crate that is using the + // macro. (See #86567.) + // Macros defined in the current crate have a real node id, + // whereas macros from an external crate have a dummy id. + if def.id != DUMMY_NODE_ID + && matches!(kind, NonterminalKind::PatParam { inferred: true }) + && matches!(next_token, TokenTree::Token(token) if token.kind == BinOp(token::BinOpToken::Or)) + { + // It is suggestion to use pat_param, for example: $x:pat -> $x:pat_param. + let suggestion = quoted_tt_to_string(&TokenTree::MetaVarDecl( + span, + name, + Some(NonterminalKind::PatParam { inferred: false }), + )); + sess.buffer_lint_with_diagnostic( + &RUST_2021_INCOMPATIBLE_OR_PATTERNS, + span, + ast::CRATE_NODE_ID, + "the meaning of the `pat` fragment specifier is changing in Rust 2021, which may affect this macro", + BuiltinLintDiagnostics::OrPatternsBackCompat(span, suggestion), + ); + } + match is_in_follow(next_token, kind) { + IsInFollow::Yes => {} + IsInFollow::No(possible) => { + let may_be = if last.tokens.len() == 1 && suffix_first.tokens.len() == 1 + { + "is" + } else { + "may be" + }; + + let sp = next_token.span(); + let mut err = sess.span_diagnostic.struct_span_err( + sp, + &format!( + "`${name}:{frag}` {may_be} followed by `{next}`, which \ + is not allowed for `{frag}` fragments", + name = name, + frag = kind, + next = quoted_tt_to_string(next_token), + may_be = may_be + ), + ); + err.span_label(sp, format!("not allowed after `{}` fragments", kind)); + + if kind == NonterminalKind::PatWithOr + && sess.edition.rust_2021() + && next_token.is_token(&BinOp(token::BinOpToken::Or)) + { + let suggestion = quoted_tt_to_string(&TokenTree::MetaVarDecl( + span, + name, + Some(NonterminalKind::PatParam { inferred: false }), + )); + err.span_suggestion( + span, + "try a `pat_param` fragment specifier instead", + suggestion, + Applicability::MaybeIncorrect, + ); + } + + let msg = "allowed there are: "; + match possible { + &[] => {} + &[t] => { + err.note(&format!( + "only {} is allowed after `{}` fragments", + t, kind, + )); + } + ts => { + err.note(&format!( + "{}{} or {}", + msg, + ts[..ts.len() - 1] + .iter() + .copied() + .collect::>() + .join(", "), + ts[ts.len() - 1], + )); + } + } + err.emit(); + } + } + } + } + } + } + last +} + +fn token_can_be_followed_by_any(tok: &mbe::TokenTree) -> bool { + if let mbe::TokenTree::MetaVarDecl(_, _, Some(kind)) = *tok { + frag_can_be_followed_by_any(kind) + } else { + // (Non NT's can always be followed by anything in matchers.) + true + } +} + +/// Returns `true` if a fragment of type `frag` can be followed by any sort of +/// token. We use this (among other things) as a useful approximation +/// for when `frag` can be followed by a repetition like `$(...)*` or +/// `$(...)+`. In general, these can be a bit tricky to reason about, +/// so we adopt a conservative position that says that any fragment +/// specifier which consumes at most one token tree can be followed by +/// a fragment specifier (indeed, these fragments can be followed by +/// ANYTHING without fear of future compatibility hazards). +fn frag_can_be_followed_by_any(kind: NonterminalKind) -> bool { + matches!( + kind, + NonterminalKind::Item // always terminated by `}` or `;` + | NonterminalKind::Block // exactly one token tree + | NonterminalKind::Ident // exactly one token tree + | NonterminalKind::Literal // exactly one token tree + | NonterminalKind::Meta // exactly one token tree + | NonterminalKind::Lifetime // exactly one token tree + | NonterminalKind::TT // exactly one token tree + ) +} + +enum IsInFollow { + Yes, + No(&'static [&'static str]), +} + +/// Returns `true` if `frag` can legally be followed by the token `tok`. For +/// fragments that can consume an unbounded number of tokens, `tok` +/// must be within a well-defined follow set. This is intended to +/// guarantee future compatibility: for example, without this rule, if +/// we expanded `expr` to include a new binary operator, we might +/// break macros that were relying on that binary operator as a +/// separator. +// when changing this do not forget to update doc/book/macros.md! +fn is_in_follow(tok: &mbe::TokenTree, kind: NonterminalKind) -> IsInFollow { + use mbe::TokenTree; + + if let TokenTree::Token(Token { kind: token::CloseDelim(_), .. }) = *tok { + // closing a token tree can never be matched by any fragment; + // iow, we always require that `(` and `)` match, etc. + IsInFollow::Yes + } else { + match kind { + NonterminalKind::Item => { + // since items *must* be followed by either a `;` or a `}`, we can + // accept anything after them + IsInFollow::Yes + } + NonterminalKind::Block => { + // anything can follow block, the braces provide an easy boundary to + // maintain + IsInFollow::Yes + } + NonterminalKind::Stmt | NonterminalKind::Expr => { + const TOKENS: &[&str] = &["`=>`", "`,`", "`;`"]; + match tok { + TokenTree::Token(token) => match token.kind { + FatArrow | Comma | Semi => IsInFollow::Yes, + _ => IsInFollow::No(TOKENS), + }, + _ => IsInFollow::No(TOKENS), + } + } + NonterminalKind::PatParam { .. } => { + const TOKENS: &[&str] = &["`=>`", "`,`", "`=`", "`|`", "`if`", "`in`"]; + match tok { + TokenTree::Token(token) => match token.kind { + FatArrow | Comma | Eq | BinOp(token::Or) => IsInFollow::Yes, + Ident(name, false) if name == kw::If || name == kw::In => IsInFollow::Yes, + _ => IsInFollow::No(TOKENS), + }, + _ => IsInFollow::No(TOKENS), + } + } + NonterminalKind::PatWithOr { .. } => { + const TOKENS: &[&str] = &["`=>`", "`,`", "`=`", "`if`", "`in`"]; + match tok { + TokenTree::Token(token) => match token.kind { + FatArrow | Comma | Eq => IsInFollow::Yes, + Ident(name, false) if name == kw::If || name == kw::In => IsInFollow::Yes, + _ => IsInFollow::No(TOKENS), + }, + _ => IsInFollow::No(TOKENS), + } + } + NonterminalKind::Path | NonterminalKind::Ty => { + const TOKENS: &[&str] = &[ + "`{`", "`[`", "`=>`", "`,`", "`>`", "`=`", "`:`", "`;`", "`|`", "`as`", + "`where`", + ]; + match tok { + TokenTree::Token(token) => match token.kind { + OpenDelim(Delimiter::Brace) + | OpenDelim(Delimiter::Bracket) + | Comma + | FatArrow + | Colon + | Eq + | Gt + | BinOp(token::Shr) + | Semi + | BinOp(token::Or) => IsInFollow::Yes, + Ident(name, false) if name == kw::As || name == kw::Where => { + IsInFollow::Yes + } + _ => IsInFollow::No(TOKENS), + }, + TokenTree::MetaVarDecl(_, _, Some(NonterminalKind::Block)) => IsInFollow::Yes, + _ => IsInFollow::No(TOKENS), + } + } + NonterminalKind::Ident | NonterminalKind::Lifetime => { + // being a single token, idents and lifetimes are harmless + IsInFollow::Yes + } + NonterminalKind::Literal => { + // literals may be of a single token, or two tokens (negative numbers) + IsInFollow::Yes + } + NonterminalKind::Meta | NonterminalKind::TT => { + // being either a single token or a delimited sequence, tt is + // harmless + IsInFollow::Yes + } + NonterminalKind::Vis => { + // Explicitly disallow `priv`, on the off chance it comes back. + const TOKENS: &[&str] = &["`,`", "an ident", "a type"]; + match tok { + TokenTree::Token(token) => match token.kind { + Comma => IsInFollow::Yes, + Ident(name, is_raw) if is_raw || name != kw::Priv => IsInFollow::Yes, + _ => { + if token.can_begin_type() { + IsInFollow::Yes + } else { + IsInFollow::No(TOKENS) + } + } + }, + TokenTree::MetaVarDecl( + _, + _, + Some(NonterminalKind::Ident | NonterminalKind::Ty | NonterminalKind::Path), + ) => IsInFollow::Yes, + _ => IsInFollow::No(TOKENS), + } + } + } + } +} + +fn quoted_tt_to_string(tt: &mbe::TokenTree) -> String { + match *tt { + mbe::TokenTree::Token(ref token) => pprust::token_to_string(&token).into(), + mbe::TokenTree::MetaVar(_, name) => format!("${}", name), + mbe::TokenTree::MetaVarDecl(_, name, Some(kind)) => format!("${}:{}", name, kind), + mbe::TokenTree::MetaVarDecl(_, name, None) => format!("${}:", name), + _ => panic!( + "{}", + "unexpected mbe::TokenTree::{Sequence or Delimited} \ + in follow set checker" + ), + } +} + +fn parser_from_cx(sess: &ParseSess, tts: TokenStream) -> Parser<'_> { + Parser::new(sess, tts, true, rustc_parse::MACRO_ARGUMENTS) +} + +/// Generates an appropriate parsing failure message. For EOF, this is "unexpected end...". For +/// other tokens, this is "unexpected token...". +fn parse_failure_msg(tok: &Token) -> String { + match tok.kind { + token::Eof => "unexpected end of macro invocation".to_string(), + _ => format!("no rules expected the token `{}`", pprust::token_to_string(tok),), + } +} diff --git a/compiler/rustc_expand/src/mbe/metavar_expr.rs b/compiler/rustc_expand/src/mbe/metavar_expr.rs new file mode 100644 index 000000000..fc808401a --- /dev/null +++ b/compiler/rustc_expand/src/mbe/metavar_expr.rs @@ -0,0 +1,161 @@ +use rustc_ast::token::{self, Delimiter}; +use rustc_ast::tokenstream::{CursorRef, TokenStream, TokenTree}; +use rustc_ast::{LitIntType, LitKind}; +use rustc_ast_pretty::pprust; +use rustc_errors::{Applicability, PResult}; +use rustc_session::parse::ParseSess; +use rustc_span::symbol::Ident; +use rustc_span::Span; + +/// A meta-variable expression, for expansions based on properties of meta-variables. +#[derive(Debug, Clone, PartialEq, Encodable, Decodable)] +pub(crate) enum MetaVarExpr { + /// The number of repetitions of an identifier, optionally limited to a number + /// of outer-most repetition depths. If the depth limit is `None` then the depth is unlimited. + Count(Ident, Option), + + /// Ignore a meta-variable for repetition without expansion. + Ignore(Ident), + + /// The index of the repetition at a particular depth, where 0 is the inner-most + /// repetition. The `usize` is the depth. + Index(usize), + + /// The length of the repetition at a particular depth, where 0 is the inner-most + /// repetition. The `usize` is the depth. + Length(usize), +} + +impl MetaVarExpr { + /// Attempt to parse a meta-variable expression from a token stream. + pub(crate) fn parse<'sess>( + input: &TokenStream, + outer_span: Span, + sess: &'sess ParseSess, + ) -> PResult<'sess, MetaVarExpr> { + let mut tts = input.trees(); + let ident = parse_ident(&mut tts, sess, outer_span)?; + let Some(TokenTree::Delimited(_, Delimiter::Parenthesis, args)) = tts.next() else { + let msg = "meta-variable expression parameter must be wrapped in parentheses"; + return Err(sess.span_diagnostic.struct_span_err(ident.span, msg)); + }; + check_trailing_token(&mut tts, sess)?; + let mut iter = args.trees(); + let rslt = match &*ident.as_str() { + "count" => parse_count(&mut iter, sess, ident.span)?, + "ignore" => MetaVarExpr::Ignore(parse_ident(&mut iter, sess, ident.span)?), + "index" => MetaVarExpr::Index(parse_depth(&mut iter, sess, ident.span)?), + "length" => MetaVarExpr::Length(parse_depth(&mut iter, sess, ident.span)?), + _ => { + let err_msg = "unrecognized meta-variable expression"; + let mut err = sess.span_diagnostic.struct_span_err(ident.span, err_msg); + err.span_suggestion( + ident.span, + "supported expressions are count, ignore, index and length", + "", + Applicability::MachineApplicable, + ); + return Err(err); + } + }; + check_trailing_token(&mut iter, sess)?; + Ok(rslt) + } + + pub(crate) fn ident(&self) -> Option { + match *self { + MetaVarExpr::Count(ident, _) | MetaVarExpr::Ignore(ident) => Some(ident), + MetaVarExpr::Index(..) | MetaVarExpr::Length(..) => None, + } + } +} + +// Checks if there are any remaining tokens. For example, `${ignore(ident ... a b c ...)}` +fn check_trailing_token<'sess>( + iter: &mut CursorRef<'_>, + sess: &'sess ParseSess, +) -> PResult<'sess, ()> { + if let Some(tt) = iter.next() { + let mut diag = sess + .span_diagnostic + .struct_span_err(tt.span(), &format!("unexpected token: {}", pprust::tt_to_string(tt))); + diag.span_note(tt.span(), "meta-variable expression must not have trailing tokens"); + Err(diag) + } else { + Ok(()) + } +} + +/// Parse a meta-variable `count` expression: `count(ident[, depth])` +fn parse_count<'sess>( + iter: &mut CursorRef<'_>, + sess: &'sess ParseSess, + span: Span, +) -> PResult<'sess, MetaVarExpr> { + let ident = parse_ident(iter, sess, span)?; + let depth = if try_eat_comma(iter) { Some(parse_depth(iter, sess, span)?) } else { None }; + Ok(MetaVarExpr::Count(ident, depth)) +} + +/// Parses the depth used by index(depth) and length(depth). +fn parse_depth<'sess>( + iter: &mut CursorRef<'_>, + sess: &'sess ParseSess, + span: Span, +) -> PResult<'sess, usize> { + let Some(tt) = iter.next() else { return Ok(0) }; + let TokenTree::Token(token::Token { + kind: token::TokenKind::Literal(lit), .. + }, _) = tt else { + return Err(sess.span_diagnostic.struct_span_err( + span, + "meta-variable expression depth must be a literal" + )); + }; + if let Ok(lit_kind) = LitKind::from_lit_token(*lit) + && let LitKind::Int(n_u128, LitIntType::Unsuffixed) = lit_kind + && let Ok(n_usize) = usize::try_from(n_u128) + { + Ok(n_usize) + } + else { + let msg = "only unsuffixes integer literals are supported in meta-variable expressions"; + Err(sess.span_diagnostic.struct_span_err(span, msg)) + } +} + +/// Parses an generic ident +fn parse_ident<'sess>( + iter: &mut CursorRef<'_>, + sess: &'sess ParseSess, + span: Span, +) -> PResult<'sess, Ident> { + if let Some(tt) = iter.next() && let TokenTree::Token(token, _) = tt { + if let Some((elem, false)) = token.ident() { + return Ok(elem); + } + let token_str = pprust::token_to_string(token); + let mut err = sess.span_diagnostic.struct_span_err( + span, + &format!("expected identifier, found `{}`", &token_str) + ); + err.span_suggestion( + token.span, + &format!("try removing `{}`", &token_str), + "", + Applicability::MaybeIncorrect, + ); + return Err(err); + } + Err(sess.span_diagnostic.struct_span_err(span, "expected identifier")) +} + +/// Tries to move the iterator forward returning `true` if there is a comma. If not, then the +/// iterator is not modified and the result is `false`. +fn try_eat_comma(iter: &mut CursorRef<'_>) -> bool { + if let Some(TokenTree::Token(token::Token { kind: token::Comma, .. }, _)) = iter.look_ahead(0) { + let _ = iter.next(); + return true; + } + false +} diff --git a/compiler/rustc_expand/src/mbe/quoted.rs b/compiler/rustc_expand/src/mbe/quoted.rs new file mode 100644 index 000000000..ee17d54f6 --- /dev/null +++ b/compiler/rustc_expand/src/mbe/quoted.rs @@ -0,0 +1,366 @@ +use crate::mbe::macro_parser::count_metavar_decls; +use crate::mbe::{Delimited, KleeneOp, KleeneToken, MetaVarExpr, SequenceRepetition, TokenTree}; + +use rustc_ast::token::{self, Delimiter, Token}; +use rustc_ast::{tokenstream, NodeId}; +use rustc_ast_pretty::pprust; +use rustc_feature::Features; +use rustc_session::parse::{feature_err, ParseSess}; +use rustc_span::symbol::{kw, sym, Ident}; + +use rustc_span::edition::Edition; +use rustc_span::{Span, SyntaxContext}; + +const VALID_FRAGMENT_NAMES_MSG: &str = "valid fragment specifiers are \ + `ident`, `block`, `stmt`, `expr`, `pat`, `ty`, `lifetime`, \ + `literal`, `path`, `meta`, `tt`, `item` and `vis`"; + +/// Takes a `tokenstream::TokenStream` and returns a `Vec`. Specifically, this +/// takes a generic `TokenStream`, such as is used in the rest of the compiler, and returns a +/// collection of `TokenTree` for use in parsing a macro. +/// +/// # Parameters +/// +/// - `input`: a token stream to read from, the contents of which we are parsing. +/// - `parsing_patterns`: `parse` can be used to parse either the "patterns" or the "body" of a +/// macro. Both take roughly the same form _except_ that: +/// - In a pattern, metavars are declared with their "matcher" type. For example `$var:expr` or +/// `$id:ident`. In this example, `expr` and `ident` are "matchers". They are not present in the +/// body of a macro rule -- just in the pattern. +/// - Metavariable expressions are only valid in the "body", not the "pattern". +/// - `sess`: the parsing session. Any errors will be emitted to this session. +/// - `node_id`: the NodeId of the macro we are parsing. +/// - `features`: language features so we can do feature gating. +/// +/// # Returns +/// +/// A collection of `self::TokenTree`. There may also be some errors emitted to `sess`. +pub(super) fn parse( + input: tokenstream::TokenStream, + parsing_patterns: bool, + sess: &ParseSess, + node_id: NodeId, + features: &Features, + edition: Edition, +) -> Vec { + // Will contain the final collection of `self::TokenTree` + let mut result = Vec::new(); + + // For each token tree in `input`, parse the token into a `self::TokenTree`, consuming + // additional trees if need be. + let mut trees = input.into_trees(); + while let Some(tree) = trees.next() { + // Given the parsed tree, if there is a metavar and we are expecting matchers, actually + // parse out the matcher (i.e., in `$id:ident` this would parse the `:` and `ident`). + let tree = parse_tree(tree, &mut trees, parsing_patterns, sess, node_id, features, edition); + match tree { + TokenTree::MetaVar(start_sp, ident) if parsing_patterns => { + let span = match trees.next() { + Some(tokenstream::TokenTree::Token(Token { kind: token::Colon, span }, _)) => { + match trees.next() { + Some(tokenstream::TokenTree::Token(token, _)) => match token.ident() { + Some((frag, _)) => { + let span = token.span.with_lo(start_sp.lo()); + + let kind = + token::NonterminalKind::from_symbol(frag.name, || { + // FIXME(#85708) - once we properly decode a foreign + // crate's `SyntaxContext::root`, then we can replace + // this with just `span.edition()`. A + // `SyntaxContext::root()` from the current crate will + // have the edition of the current crate, and a + // `SyntaxContext::root()` from a foreign crate will + // have the edition of that crate (which we manually + // retrieve via the `edition` parameter). + if span.ctxt() == SyntaxContext::root() { + edition + } else { + span.edition() + } + }) + .unwrap_or_else( + || { + let msg = format!( + "invalid fragment specifier `{}`", + frag.name + ); + sess.span_diagnostic + .struct_span_err(span, &msg) + .help(VALID_FRAGMENT_NAMES_MSG) + .emit(); + token::NonterminalKind::Ident + }, + ); + result.push(TokenTree::MetaVarDecl(span, ident, Some(kind))); + continue; + } + _ => token.span, + }, + tree => tree.as_ref().map_or(span, tokenstream::TokenTree::span), + } + } + tree => tree.as_ref().map_or(start_sp, tokenstream::TokenTree::span), + }; + + result.push(TokenTree::MetaVarDecl(span, ident, None)); + } + + // Not a metavar or no matchers allowed, so just return the tree + _ => result.push(tree), + } + } + result +} + +/// Asks for the `macro_metavar_expr` feature if it is not already declared +fn maybe_emit_macro_metavar_expr_feature(features: &Features, sess: &ParseSess, span: Span) { + if !features.macro_metavar_expr { + let msg = "meta-variable expressions are unstable"; + feature_err(&sess, sym::macro_metavar_expr, span, msg).emit(); + } +} + +/// Takes a `tokenstream::TokenTree` and returns a `self::TokenTree`. Specifically, this takes a +/// generic `TokenTree`, such as is used in the rest of the compiler, and returns a `TokenTree` +/// for use in parsing a macro. +/// +/// Converting the given tree may involve reading more tokens. +/// +/// # Parameters +/// +/// - `tree`: the tree we wish to convert. +/// - `outer_trees`: an iterator over trees. We may need to read more tokens from it in order to finish +/// converting `tree` +/// - `parsing_patterns`: same as [parse]. +/// - `sess`: the parsing session. Any errors will be emitted to this session. +/// - `features`: language features so we can do feature gating. +fn parse_tree( + tree: tokenstream::TokenTree, + outer_trees: &mut impl Iterator, + parsing_patterns: bool, + sess: &ParseSess, + node_id: NodeId, + features: &Features, + edition: Edition, +) -> TokenTree { + // Depending on what `tree` is, we could be parsing different parts of a macro + match tree { + // `tree` is a `$` token. Look at the next token in `trees` + tokenstream::TokenTree::Token(Token { kind: token::Dollar, span }, _) => { + // FIXME: Handle `Invisible`-delimited groups in a more systematic way + // during parsing. + let mut next = outer_trees.next(); + let mut trees: Box>; + if let Some(tokenstream::TokenTree::Delimited(_, Delimiter::Invisible, tts)) = next { + trees = Box::new(tts.into_trees()); + next = trees.next(); + } else { + trees = Box::new(outer_trees); + } + + match next { + // `tree` is followed by a delimited set of token trees. + Some(tokenstream::TokenTree::Delimited(delim_span, delim, tts)) => { + if parsing_patterns { + if delim != Delimiter::Parenthesis { + span_dollar_dollar_or_metavar_in_the_lhs_err( + sess, + &Token { kind: token::OpenDelim(delim), span: delim_span.entire() }, + ); + } + } else { + match delim { + Delimiter::Brace => { + // The delimiter is `{`. This indicates the beginning + // of a meta-variable expression (e.g. `${count(ident)}`). + // Try to parse the meta-variable expression. + match MetaVarExpr::parse(&tts, delim_span.entire(), sess) { + Err(mut err) => { + err.emit(); + // Returns early the same read `$` to avoid spanning + // unrelated diagnostics that could be performed afterwards + return TokenTree::token(token::Dollar, span); + } + Ok(elem) => { + maybe_emit_macro_metavar_expr_feature( + features, + sess, + delim_span.entire(), + ); + return TokenTree::MetaVarExpr(delim_span, elem); + } + } + } + Delimiter::Parenthesis => {} + _ => { + let tok = pprust::token_kind_to_string(&token::OpenDelim(delim)); + let msg = format!("expected `(` or `{{`, found `{}`", tok); + sess.span_diagnostic.span_err(delim_span.entire(), &msg); + } + } + } + // If we didn't find a metavar expression above, then we must have a + // repetition sequence in the macro (e.g. `$(pat)*`). Parse the + // contents of the sequence itself + let sequence = parse(tts, parsing_patterns, sess, node_id, features, edition); + // Get the Kleene operator and optional separator + let (separator, kleene) = + parse_sep_and_kleene_op(&mut trees, delim_span.entire(), sess); + // Count the number of captured "names" (i.e., named metavars) + let num_captures = + if parsing_patterns { count_metavar_decls(&sequence) } else { 0 }; + TokenTree::Sequence( + delim_span, + SequenceRepetition { tts: sequence, separator, kleene, num_captures }, + ) + } + + // `tree` is followed by an `ident`. This could be `$meta_var` or the `$crate` + // special metavariable that names the crate of the invocation. + Some(tokenstream::TokenTree::Token(token, _)) if token.is_ident() => { + let (ident, is_raw) = token.ident().unwrap(); + let span = ident.span.with_lo(span.lo()); + if ident.name == kw::Crate && !is_raw { + TokenTree::token(token::Ident(kw::DollarCrate, is_raw), span) + } else { + TokenTree::MetaVar(span, ident) + } + } + + // `tree` is followed by another `$`. This is an escaped `$`. + Some(tokenstream::TokenTree::Token(Token { kind: token::Dollar, span }, _)) => { + if parsing_patterns { + span_dollar_dollar_or_metavar_in_the_lhs_err( + sess, + &Token { kind: token::Dollar, span }, + ); + } else { + maybe_emit_macro_metavar_expr_feature(features, sess, span); + } + TokenTree::token(token::Dollar, span) + } + + // `tree` is followed by some other token. This is an error. + Some(tokenstream::TokenTree::Token(token, _)) => { + let msg = format!( + "expected identifier, found `{}`", + pprust::token_to_string(&token), + ); + sess.span_diagnostic.span_err(token.span, &msg); + TokenTree::MetaVar(token.span, Ident::empty()) + } + + // There are no more tokens. Just return the `$` we already have. + None => TokenTree::token(token::Dollar, span), + } + } + + // `tree` is an arbitrary token. Keep it. + tokenstream::TokenTree::Token(token, _) => TokenTree::Token(token), + + // `tree` is the beginning of a delimited set of tokens (e.g., `(` or `{`). We need to + // descend into the delimited set and further parse it. + tokenstream::TokenTree::Delimited(span, delim, tts) => TokenTree::Delimited( + span, + Delimited { + delim, + tts: parse(tts, parsing_patterns, sess, node_id, features, edition), + }, + ), + } +} + +/// Takes a token and returns `Some(KleeneOp)` if the token is `+` `*` or `?`. Otherwise, return +/// `None`. +fn kleene_op(token: &Token) -> Option { + match token.kind { + token::BinOp(token::Star) => Some(KleeneOp::ZeroOrMore), + token::BinOp(token::Plus) => Some(KleeneOp::OneOrMore), + token::Question => Some(KleeneOp::ZeroOrOne), + _ => None, + } +} + +/// Parse the next token tree of the input looking for a KleeneOp. Returns +/// +/// - Ok(Ok((op, span))) if the next token tree is a KleeneOp +/// - Ok(Err(tok, span)) if the next token tree is a token but not a KleeneOp +/// - Err(span) if the next token tree is not a token +fn parse_kleene_op( + input: &mut impl Iterator, + span: Span, +) -> Result, Span> { + match input.next() { + Some(tokenstream::TokenTree::Token(token, _)) => match kleene_op(&token) { + Some(op) => Ok(Ok((op, token.span))), + None => Ok(Err(token)), + }, + tree => Err(tree.as_ref().map_or(span, tokenstream::TokenTree::span)), + } +} + +/// Attempt to parse a single Kleene star, possibly with a separator. +/// +/// For example, in a pattern such as `$(a),*`, `a` is the pattern to be repeated, `,` is the +/// separator, and `*` is the Kleene operator. This function is specifically concerned with parsing +/// the last two tokens of such a pattern: namely, the optional separator and the Kleene operator +/// itself. Note that here we are parsing the _macro_ itself, rather than trying to match some +/// stream of tokens in an invocation of a macro. +/// +/// This function will take some input iterator `input` corresponding to `span` and a parsing +/// session `sess`. If the next one (or possibly two) tokens in `input` correspond to a Kleene +/// operator and separator, then a tuple with `(separator, KleeneOp)` is returned. Otherwise, an +/// error with the appropriate span is emitted to `sess` and a dummy value is returned. +fn parse_sep_and_kleene_op( + input: &mut impl Iterator, + span: Span, + sess: &ParseSess, +) -> (Option, KleeneToken) { + // We basically look at two token trees here, denoted as #1 and #2 below + let span = match parse_kleene_op(input, span) { + // #1 is a `?`, `+`, or `*` KleeneOp + Ok(Ok((op, span))) => return (None, KleeneToken::new(op, span)), + + // #1 is a separator followed by #2, a KleeneOp + Ok(Err(token)) => match parse_kleene_op(input, token.span) { + // #2 is the `?` Kleene op, which does not take a separator (error) + Ok(Ok((KleeneOp::ZeroOrOne, span))) => { + // Error! + sess.span_diagnostic.span_err( + token.span, + "the `?` macro repetition operator does not take a separator", + ); + + // Return a dummy + return (None, KleeneToken::new(KleeneOp::ZeroOrMore, span)); + } + + // #2 is a KleeneOp :D + Ok(Ok((op, span))) => return (Some(token), KleeneToken::new(op, span)), + + // #2 is a random token or not a token at all :( + Ok(Err(Token { span, .. })) | Err(span) => span, + }, + + // #1 is not a token + Err(span) => span, + }; + + // If we ever get to this point, we have experienced an "unexpected token" error + sess.span_diagnostic.span_err(span, "expected one of: `*`, `+`, or `?`"); + + // Return a dummy + (None, KleeneToken::new(KleeneOp::ZeroOrMore, span)) +} + +// `$$` or a meta-variable is the lhs of a macro but shouldn't. +// +// For example, `macro_rules! foo { ( ${length()} ) => {} }` +fn span_dollar_dollar_or_metavar_in_the_lhs_err<'sess>(sess: &'sess ParseSess, token: &Token) { + sess.span_diagnostic + .span_err(token.span, &format!("unexpected token: {}", pprust::token_to_string(token))); + sess.span_diagnostic.span_note_without_error( + token.span, + "`$$` and meta-variable expressions are not allowed inside macro parameter definitions", + ); +} diff --git a/compiler/rustc_expand/src/mbe/transcribe.rs b/compiler/rustc_expand/src/mbe/transcribe.rs new file mode 100644 index 000000000..e47ea83ac --- /dev/null +++ b/compiler/rustc_expand/src/mbe/transcribe.rs @@ -0,0 +1,580 @@ +use crate::base::ExtCtxt; +use crate::mbe::macro_parser::{MatchedNonterminal, MatchedSeq, MatchedTokenTree, NamedMatch}; +use crate::mbe::{self, MetaVarExpr}; +use rustc_ast::mut_visit::{self, MutVisitor}; +use rustc_ast::token::{self, Delimiter, Token, TokenKind}; +use rustc_ast::tokenstream::{DelimSpan, Spacing, TokenStream, TokenTree}; +use rustc_data_structures::fx::FxHashMap; +use rustc_errors::{pluralize, PResult}; +use rustc_errors::{DiagnosticBuilder, ErrorGuaranteed}; +use rustc_span::hygiene::{LocalExpnId, Transparency}; +use rustc_span::symbol::{sym, Ident, MacroRulesNormalizedIdent}; +use rustc_span::Span; + +use smallvec::{smallvec, SmallVec}; +use std::mem; + +// A Marker adds the given mark to the syntax context. +struct Marker(LocalExpnId, Transparency); + +impl MutVisitor for Marker { + const VISIT_TOKENS: bool = true; + + fn visit_span(&mut self, span: &mut Span) { + *span = span.apply_mark(self.0.to_expn_id(), self.1) + } +} + +/// An iterator over the token trees in a delimited token tree (`{ ... }`) or a sequence (`$(...)`). +enum Frame<'a> { + Delimited { tts: &'a [mbe::TokenTree], idx: usize, delim: Delimiter, span: DelimSpan }, + Sequence { tts: &'a [mbe::TokenTree], idx: usize, sep: Option }, +} + +impl<'a> Frame<'a> { + /// Construct a new frame around the delimited set of tokens. + fn new(src: &'a mbe::Delimited, span: DelimSpan) -> Frame<'a> { + Frame::Delimited { tts: &src.tts, idx: 0, delim: src.delim, span } + } +} + +impl<'a> Iterator for Frame<'a> { + type Item = &'a mbe::TokenTree; + + fn next(&mut self) -> Option<&'a mbe::TokenTree> { + match self { + Frame::Delimited { tts, ref mut idx, .. } + | Frame::Sequence { tts, ref mut idx, .. } => { + let res = tts.get(*idx); + *idx += 1; + res + } + } + } +} + +/// This can do Macro-By-Example transcription. +/// - `interp` is a map of meta-variables to the tokens (non-terminals) they matched in the +/// invocation. We are assuming we already know there is a match. +/// - `src` is the RHS of the MBE, that is, the "example" we are filling in. +/// +/// For example, +/// +/// ```rust +/// macro_rules! foo { +/// ($id:ident) => { println!("{}", stringify!($id)); } +/// } +/// +/// foo!(bar); +/// ``` +/// +/// `interp` would contain `$id => bar` and `src` would contain `println!("{}", stringify!($id));`. +/// +/// `transcribe` would return a `TokenStream` containing `println!("{}", stringify!(bar));`. +/// +/// Along the way, we do some additional error checking. +pub(super) fn transcribe<'a>( + cx: &ExtCtxt<'a>, + interp: &FxHashMap, + src: &mbe::Delimited, + src_span: DelimSpan, + transparency: Transparency, +) -> PResult<'a, TokenStream> { + // Nothing for us to transcribe... + if src.tts.is_empty() { + return Ok(TokenStream::default()); + } + + // We descend into the RHS (`src`), expanding things as we go. This stack contains the things + // we have yet to expand/are still expanding. We start the stack off with the whole RHS. + let mut stack: SmallVec<[Frame<'_>; 1]> = smallvec![Frame::new(&src, src_span)]; + + // As we descend in the RHS, we will need to be able to match nested sequences of matchers. + // `repeats` keeps track of where we are in matching at each level, with the last element being + // the most deeply nested sequence. This is used as a stack. + let mut repeats = Vec::new(); + + // `result` contains resulting token stream from the TokenTree we just finished processing. At + // the end, this will contain the full result of transcription, but at arbitrary points during + // `transcribe`, `result` will contain subsets of the final result. + // + // Specifically, as we descend into each TokenTree, we will push the existing results onto the + // `result_stack` and clear `results`. We will then produce the results of transcribing the + // TokenTree into `results`. Then, as we unwind back out of the `TokenTree`, we will pop the + // `result_stack` and append `results` too it to produce the new `results` up to that point. + // + // Thus, if we try to pop the `result_stack` and it is empty, we have reached the top-level + // again, and we are done transcribing. + let mut result: Vec = Vec::new(); + let mut result_stack = Vec::new(); + let mut marker = Marker(cx.current_expansion.id, transparency); + + loop { + // Look at the last frame on the stack. + // If it still has a TokenTree we have not looked at yet, use that tree. + let Some(tree) = stack.last_mut().unwrap().next() else { + // This else-case never produces a value for `tree` (it `continue`s or `return`s). + + // Otherwise, if we have just reached the end of a sequence and we can keep repeating, + // go back to the beginning of the sequence. + if let Frame::Sequence { idx, sep, .. } = stack.last_mut().unwrap() { + let (repeat_idx, repeat_len) = repeats.last_mut().unwrap(); + *repeat_idx += 1; + if repeat_idx < repeat_len { + *idx = 0; + if let Some(sep) = sep { + result.push(TokenTree::Token(sep.clone(), Spacing::Alone)); + } + continue; + } + } + + // We are done with the top of the stack. Pop it. Depending on what it was, we do + // different things. Note that the outermost item must be the delimited, wrapped RHS + // that was passed in originally to `transcribe`. + match stack.pop().unwrap() { + // Done with a sequence. Pop from repeats. + Frame::Sequence { .. } => { + repeats.pop(); + } + + // We are done processing a Delimited. If this is the top-level delimited, we are + // done. Otherwise, we unwind the result_stack to append what we have produced to + // any previous results. + Frame::Delimited { delim, span, .. } => { + if result_stack.is_empty() { + // No results left to compute! We are back at the top-level. + return Ok(TokenStream::new(result)); + } + + // Step back into the parent Delimited. + let tree = TokenTree::Delimited(span, delim, TokenStream::new(result)); + result = result_stack.pop().unwrap(); + result.push(tree); + } + } + continue; + }; + + // At this point, we know we are in the middle of a TokenTree (the last one on `stack`). + // `tree` contains the next `TokenTree` to be processed. + match tree { + // We are descending into a sequence. We first make sure that the matchers in the RHS + // and the matches in `interp` have the same shape. Otherwise, either the caller or the + // macro writer has made a mistake. + seq @ mbe::TokenTree::Sequence(_, delimited) => { + match lockstep_iter_size(&seq, interp, &repeats) { + LockstepIterSize::Unconstrained => { + return Err(cx.struct_span_err( + seq.span(), /* blame macro writer */ + "attempted to repeat an expression containing no syntax variables \ + matched as repeating at this depth", + )); + } + + LockstepIterSize::Contradiction(msg) => { + // FIXME: this really ought to be caught at macro definition time... It + // happens when two meta-variables are used in the same repetition in a + // sequence, but they come from different sequence matchers and repeat + // different amounts. + return Err(cx.struct_span_err(seq.span(), &msg)); + } + + LockstepIterSize::Constraint(len, _) => { + // We do this to avoid an extra clone above. We know that this is a + // sequence already. + let mbe::TokenTree::Sequence(sp, seq) = seq else { + unreachable!() + }; + + // Is the repetition empty? + if len == 0 { + if seq.kleene.op == mbe::KleeneOp::OneOrMore { + // FIXME: this really ought to be caught at macro definition + // time... It happens when the Kleene operator in the matcher and + // the body for the same meta-variable do not match. + return Err(cx.struct_span_err( + sp.entire(), + "this must repeat at least once", + )); + } + } else { + // 0 is the initial counter (we have done 0 repetitions so far). `len` + // is the total number of repetitions we should generate. + repeats.push((0, len)); + + // The first time we encounter the sequence we push it to the stack. It + // then gets reused (see the beginning of the loop) until we are done + // repeating. + stack.push(Frame::Sequence { + idx: 0, + sep: seq.separator.clone(), + tts: &delimited.tts, + }); + } + } + } + } + + // Replace the meta-var with the matched token tree from the invocation. + mbe::TokenTree::MetaVar(mut sp, mut original_ident) => { + // Find the matched nonterminal from the macro invocation, and use it to replace + // the meta-var. + let ident = MacroRulesNormalizedIdent::new(original_ident); + if let Some(cur_matched) = lookup_cur_matched(ident, interp, &repeats) { + match cur_matched { + MatchedTokenTree(ref tt) => { + // `tt`s are emitted into the output stream directly as "raw tokens", + // without wrapping them into groups. + let token = tt.clone(); + result.push(token); + } + MatchedNonterminal(ref nt) => { + // Other variables are emitted into the output stream as groups with + // `Delimiter::Invisible` to maintain parsing priorities. + // `Interpolated` is currently used for such groups in rustc parser. + marker.visit_span(&mut sp); + let token = TokenTree::token_alone(token::Interpolated(nt.clone()), sp); + result.push(token); + } + MatchedSeq(..) => { + // We were unable to descend far enough. This is an error. + return Err(cx.struct_span_err( + sp, /* blame the macro writer */ + &format!("variable '{}' is still repeating at this depth", ident), + )); + } + } + } else { + // If we aren't able to match the meta-var, we push it back into the result but + // with modified syntax context. (I believe this supports nested macros). + marker.visit_span(&mut sp); + marker.visit_ident(&mut original_ident); + result.push(TokenTree::token_alone(token::Dollar, sp)); + result.push(TokenTree::Token( + Token::from_ast_ident(original_ident), + Spacing::Alone, + )); + } + } + + // Replace meta-variable expressions with the result of their expansion. + mbe::TokenTree::MetaVarExpr(sp, expr) => { + transcribe_metavar_expr(cx, expr, interp, &mut marker, &repeats, &mut result, &sp)?; + } + + // If we are entering a new delimiter, we push its contents to the `stack` to be + // processed, and we push all of the currently produced results to the `result_stack`. + // We will produce all of the results of the inside of the `Delimited` and then we will + // jump back out of the Delimited, pop the result_stack and add the new results back to + // the previous results (from outside the Delimited). + mbe::TokenTree::Delimited(mut span, delimited) => { + mut_visit::visit_delim_span(&mut span, &mut marker); + stack.push(Frame::Delimited { + tts: &delimited.tts, + delim: delimited.delim, + idx: 0, + span, + }); + result_stack.push(mem::take(&mut result)); + } + + // Nothing much to do here. Just push the token to the result, being careful to + // preserve syntax context. + mbe::TokenTree::Token(token) => { + let mut token = token.clone(); + mut_visit::visit_token(&mut token, &mut marker); + let tt = TokenTree::Token(token, Spacing::Alone); + result.push(tt); + } + + // There should be no meta-var declarations in the invocation of a macro. + mbe::TokenTree::MetaVarDecl(..) => panic!("unexpected `TokenTree::MetaVarDecl"), + } + } +} + +/// Lookup the meta-var named `ident` and return the matched token tree from the invocation using +/// the set of matches `interpolations`. +/// +/// See the definition of `repeats` in the `transcribe` function. `repeats` is used to descend +/// into the right place in nested matchers. If we attempt to descend too far, the macro writer has +/// made a mistake, and we return `None`. +fn lookup_cur_matched<'a>( + ident: MacroRulesNormalizedIdent, + interpolations: &'a FxHashMap, + repeats: &[(usize, usize)], +) -> Option<&'a NamedMatch> { + interpolations.get(&ident).map(|matched| { + let mut matched = matched; + for &(idx, _) in repeats { + match matched { + MatchedTokenTree(_) | MatchedNonterminal(_) => break, + MatchedSeq(ref ads) => matched = ads.get(idx).unwrap(), + } + } + + matched + }) +} + +/// An accumulator over a TokenTree to be used with `fold`. During transcription, we need to make +/// sure that the size of each sequence and all of its nested sequences are the same as the sizes +/// of all the matched (nested) sequences in the macro invocation. If they don't match, somebody +/// has made a mistake (either the macro writer or caller). +#[derive(Clone)] +enum LockstepIterSize { + /// No constraints on length of matcher. This is true for any TokenTree variants except a + /// `MetaVar` with an actual `MatchedSeq` (as opposed to a `MatchedNonterminal`). + Unconstrained, + + /// A `MetaVar` with an actual `MatchedSeq`. The length of the match and the name of the + /// meta-var are returned. + Constraint(usize, MacroRulesNormalizedIdent), + + /// Two `Constraint`s on the same sequence had different lengths. This is an error. + Contradiction(String), +} + +impl LockstepIterSize { + /// Find incompatibilities in matcher/invocation sizes. + /// - `Unconstrained` is compatible with everything. + /// - `Contradiction` is incompatible with everything. + /// - `Constraint(len)` is only compatible with other constraints of the same length. + fn with(self, other: LockstepIterSize) -> LockstepIterSize { + match self { + LockstepIterSize::Unconstrained => other, + LockstepIterSize::Contradiction(_) => self, + LockstepIterSize::Constraint(l_len, ref l_id) => match other { + LockstepIterSize::Unconstrained => self, + LockstepIterSize::Contradiction(_) => other, + LockstepIterSize::Constraint(r_len, _) if l_len == r_len => self, + LockstepIterSize::Constraint(r_len, r_id) => { + let msg = format!( + "meta-variable `{}` repeats {} time{}, but `{}` repeats {} time{}", + l_id, + l_len, + pluralize!(l_len), + r_id, + r_len, + pluralize!(r_len), + ); + LockstepIterSize::Contradiction(msg) + } + }, + } + } +} + +/// Given a `tree`, make sure that all sequences have the same length as the matches for the +/// appropriate meta-vars in `interpolations`. +/// +/// Note that if `repeats` does not match the exact correct depth of a meta-var, +/// `lookup_cur_matched` will return `None`, which is why this still works even in the presence of +/// multiple nested matcher sequences. +/// +/// Example: `$($($x $y)+*);+` -- we need to make sure that `x` and `y` repeat the same amount as +/// each other at the given depth when the macro was invoked. If they don't it might mean they were +/// declared at unequal depths or there was a compile bug. For example, if we have 3 repetitions of +/// the outer sequence and 4 repetitions of the inner sequence for `x`, we should have the same for +/// `y`; otherwise, we can't transcribe them both at the given depth. +fn lockstep_iter_size( + tree: &mbe::TokenTree, + interpolations: &FxHashMap, + repeats: &[(usize, usize)], +) -> LockstepIterSize { + use mbe::TokenTree; + match *tree { + TokenTree::Delimited(_, ref delimited) => { + delimited.tts.iter().fold(LockstepIterSize::Unconstrained, |size, tt| { + size.with(lockstep_iter_size(tt, interpolations, repeats)) + }) + } + TokenTree::Sequence(_, ref seq) => { + seq.tts.iter().fold(LockstepIterSize::Unconstrained, |size, tt| { + size.with(lockstep_iter_size(tt, interpolations, repeats)) + }) + } + TokenTree::MetaVar(_, name) | TokenTree::MetaVarDecl(_, name, _) => { + let name = MacroRulesNormalizedIdent::new(name); + match lookup_cur_matched(name, interpolations, repeats) { + Some(matched) => match matched { + MatchedTokenTree(_) | MatchedNonterminal(_) => LockstepIterSize::Unconstrained, + MatchedSeq(ref ads) => LockstepIterSize::Constraint(ads.len(), name), + }, + _ => LockstepIterSize::Unconstrained, + } + } + TokenTree::MetaVarExpr(_, ref expr) => { + let default_rslt = LockstepIterSize::Unconstrained; + let Some(ident) = expr.ident() else { return default_rslt; }; + let name = MacroRulesNormalizedIdent::new(ident); + match lookup_cur_matched(name, interpolations, repeats) { + Some(MatchedSeq(ref ads)) => { + default_rslt.with(LockstepIterSize::Constraint(ads.len(), name)) + } + _ => default_rslt, + } + } + TokenTree::Token(..) => LockstepIterSize::Unconstrained, + } +} + +/// Used solely by the `count` meta-variable expression, counts the outer-most repetitions at a +/// given optional nested depth. +/// +/// For example, a macro parameter of `$( { $( $foo:ident ),* } )*` called with `{ a, b } { c }`: +/// +/// * `[ $( ${count(foo)} ),* ]` will return [2, 1] with a, b = 2 and c = 1 +/// * `[ $( ${count(foo, 0)} ),* ]` will be the same as `[ $( ${count(foo)} ),* ]` +/// * `[ $( ${count(foo, 1)} ),* ]` will return an error because `${count(foo, 1)}` is +/// declared inside a single repetition and the index `1` implies two nested repetitions. +fn count_repetitions<'a>( + cx: &ExtCtxt<'a>, + depth_opt: Option, + mut matched: &NamedMatch, + repeats: &[(usize, usize)], + sp: &DelimSpan, +) -> PResult<'a, usize> { + // Recursively count the number of matches in `matched` at given depth + // (or at the top-level of `matched` if no depth is given). + fn count<'a>( + cx: &ExtCtxt<'a>, + declared_lhs_depth: usize, + depth_opt: Option, + matched: &NamedMatch, + sp: &DelimSpan, + ) -> PResult<'a, usize> { + match matched { + MatchedTokenTree(_) | MatchedNonterminal(_) => { + if declared_lhs_depth == 0 { + return Err(cx.struct_span_err( + sp.entire(), + "`count` can not be placed inside the inner-most repetition", + )); + } + match depth_opt { + None => Ok(1), + Some(_) => Err(out_of_bounds_err(cx, declared_lhs_depth, sp.entire(), "count")), + } + } + MatchedSeq(ref named_matches) => { + let new_declared_lhs_depth = declared_lhs_depth + 1; + match depth_opt { + None => named_matches + .iter() + .map(|elem| count(cx, new_declared_lhs_depth, None, elem, sp)) + .sum(), + Some(0) => Ok(named_matches.len()), + Some(depth) => named_matches + .iter() + .map(|elem| count(cx, new_declared_lhs_depth, Some(depth - 1), elem, sp)) + .sum(), + } + } + } + } + // `repeats` records all of the nested levels at which we are currently + // matching meta-variables. The meta-var-expr `count($x)` only counts + // matches that occur in this "subtree" of the `NamedMatch` where we + // are currently transcribing, so we need to descend to that subtree + // before we start counting. `matched` contains the various levels of the + // tree as we descend, and its final value is the subtree we are currently at. + for &(idx, _) in repeats { + if let MatchedSeq(ref ads) = matched { + matched = &ads[idx]; + } + } + count(cx, 0, depth_opt, matched, sp) +} + +/// Returns a `NamedMatch` item declared on the LHS given an arbitrary [Ident] +fn matched_from_ident<'ctx, 'interp, 'rslt>( + cx: &ExtCtxt<'ctx>, + ident: Ident, + interp: &'interp FxHashMap, +) -> PResult<'ctx, &'rslt NamedMatch> +where + 'interp: 'rslt, +{ + let span = ident.span; + let key = MacroRulesNormalizedIdent::new(ident); + interp.get(&key).ok_or_else(|| { + cx.struct_span_err( + span, + &format!("variable `{}` is not recognized in meta-variable expression", key), + ) + }) +} + +/// Used by meta-variable expressions when an user input is out of the actual declared bounds. For +/// example, index(999999) in an repetition of only three elements. +fn out_of_bounds_err<'a>( + cx: &ExtCtxt<'a>, + max: usize, + span: Span, + ty: &str, +) -> DiagnosticBuilder<'a, ErrorGuaranteed> { + let msg = if max == 0 { + format!( + "meta-variable expression `{ty}` with depth parameter \ + must be called inside of a macro repetition" + ) + } else { + format!( + "depth parameter on meta-variable expression `{ty}` \ + must be less than {max}" + ) + }; + cx.struct_span_err(span, &msg) +} + +fn transcribe_metavar_expr<'a>( + cx: &ExtCtxt<'a>, + expr: &MetaVarExpr, + interp: &FxHashMap, + marker: &mut Marker, + repeats: &[(usize, usize)], + result: &mut Vec, + sp: &DelimSpan, +) -> PResult<'a, ()> { + let mut visited_span = || { + let mut span = sp.entire(); + marker.visit_span(&mut span); + span + }; + match *expr { + MetaVarExpr::Count(original_ident, depth_opt) => { + let matched = matched_from_ident(cx, original_ident, interp)?; + let count = count_repetitions(cx, depth_opt, matched, &repeats, sp)?; + let tt = TokenTree::token_alone( + TokenKind::lit(token::Integer, sym::integer(count), None), + visited_span(), + ); + result.push(tt); + } + MetaVarExpr::Ignore(original_ident) => { + // Used to ensure that `original_ident` is present in the LHS + let _ = matched_from_ident(cx, original_ident, interp)?; + } + MetaVarExpr::Index(depth) => match repeats.iter().nth_back(depth) { + Some((index, _)) => { + result.push(TokenTree::token_alone( + TokenKind::lit(token::Integer, sym::integer(*index), None), + visited_span(), + )); + } + None => return Err(out_of_bounds_err(cx, repeats.len(), sp.entire(), "index")), + }, + MetaVarExpr::Length(depth) => match repeats.iter().nth_back(depth) { + Some((_, length)) => { + result.push(TokenTree::token_alone( + TokenKind::lit(token::Integer, sym::integer(*length), None), + visited_span(), + )); + } + None => return Err(out_of_bounds_err(cx, repeats.len(), sp.entire(), "length")), + }, + } + Ok(()) +} -- cgit v1.2.3