summaryrefslogtreecommitdiffstats
path: root/compiler/rustc_expand/src/mbe
diff options
context:
space:
mode:
authorDaniel Baumann <daniel.baumann@progress-linux.org>2024-04-17 12:02:58 +0000
committerDaniel Baumann <daniel.baumann@progress-linux.org>2024-04-17 12:02:58 +0000
commit698f8c2f01ea549d77d7dc3338a12e04c11057b9 (patch)
tree173a775858bd501c378080a10dca74132f05bc50 /compiler/rustc_expand/src/mbe
parentInitial commit. (diff)
downloadrustc-698f8c2f01ea549d77d7dc3338a12e04c11057b9.tar.xz
rustc-698f8c2f01ea549d77d7dc3338a12e04c11057b9.zip
Adding upstream version 1.64.0+dfsg1.upstream/1.64.0+dfsg1
Signed-off-by: Daniel Baumann <daniel.baumann@progress-linux.org>
Diffstat (limited to 'compiler/rustc_expand/src/mbe')
-rw-r--r--compiler/rustc_expand/src/mbe/macro_check.rs652
-rw-r--r--compiler/rustc_expand/src/mbe/macro_parser.rs704
-rw-r--r--compiler/rustc_expand/src/mbe/macro_rules.rs1420
-rw-r--r--compiler/rustc_expand/src/mbe/metavar_expr.rs161
-rw-r--r--compiler/rustc_expand/src/mbe/quoted.rs366
-rw-r--r--compiler/rustc_expand/src/mbe/transcribe.rs580
6 files changed, 3883 insertions, 0 deletions
diff --git a/compiler/rustc_expand/src/mbe/macro_check.rs b/compiler/rustc_expand/src/mbe/macro_check.rs
new file mode 100644
index 000000000..8994a2f78
--- /dev/null
+++ b/compiler/rustc_expand/src/mbe/macro_check.rs
@@ -0,0 +1,652 @@
+//! Checks that meta-variables in macro definition are correctly declared and used.
+//!
+//! # What is checked
+//!
+//! ## Meta-variables must not be bound twice
+//!
+//! ```compile_fail
+//! macro_rules! foo { ($x:tt $x:tt) => { $x }; }
+//! ```
+//!
+//! This check is sound (no false-negative) and complete (no false-positive).
+//!
+//! ## Meta-variables must not be free
+//!
+//! ```
+//! macro_rules! foo { () => { $x }; }
+//! ```
+//!
+//! This check is also done at macro instantiation but only if the branch is taken.
+//!
+//! ## Meta-variables must repeat at least as many times as their binder
+//!
+//! ```
+//! macro_rules! foo { ($($x:tt)*) => { $x }; }
+//! ```
+//!
+//! This check is also done at macro instantiation but only if the branch is taken.
+//!
+//! ## Meta-variables must repeat with the same Kleene operators as their binder
+//!
+//! ```
+//! macro_rules! foo { ($($x:tt)+) => { $($x)* }; }
+//! ```
+//!
+//! This check is not done at macro instantiation.
+//!
+//! # Disclaimer
+//!
+//! In the presence of nested macros (a macro defined in a macro), those checks may have false
+//! positives and false negatives. We try to detect those cases by recognizing potential macro
+//! definitions in RHSes, but nested macros may be hidden through the use of particular values of
+//! meta-variables.
+//!
+//! ## Examples of false positive
+//!
+//! False positives can come from cases where we don't recognize a nested macro, because it depends
+//! on particular values of meta-variables. In the following example, we think both instances of
+//! `$x` are free, which is a correct statement if `$name` is anything but `macro_rules`. But when
+//! `$name` is `macro_rules`, like in the instantiation below, then `$x:tt` is actually a binder of
+//! the nested macro and `$x` is bound to it.
+//!
+//! ```
+//! macro_rules! foo { ($name:ident) => { $name! bar { ($x:tt) => { $x }; } }; }
+//! foo!(macro_rules);
+//! ```
+//!
+//! False positives can also come from cases where we think there is a nested macro while there
+//! isn't. In the following example, we think `$x` is free, which is incorrect because `bar` is not
+//! a nested macro since it is not evaluated as code by `stringify!`.
+//!
+//! ```
+//! macro_rules! foo { () => { stringify!(macro_rules! bar { () => { $x }; }) }; }
+//! ```
+//!
+//! ## Examples of false negative
+//!
+//! False negatives can come from cases where we don't recognize a meta-variable, because it depends
+//! on particular values of meta-variables. In the following examples, we don't see that if `$d` is
+//! instantiated with `$` then `$d z` becomes `$z` in the nested macro definition and is thus a free
+//! meta-variable. Note however, that if `foo` is instantiated, then we would check the definition
+//! of `bar` and would see the issue.
+//!
+//! ```
+//! macro_rules! foo { ($d:tt) => { macro_rules! bar { ($y:tt) => { $d z }; } }; }
+//! ```
+//!
+//! # How it is checked
+//!
+//! There are 3 main functions: `check_binders`, `check_occurrences`, and `check_nested_macro`. They
+//! all need some kind of environment.
+//!
+//! ## Environments
+//!
+//! Environments are used to pass information.
+//!
+//! ### From LHS to RHS
+//!
+//! When checking a LHS with `check_binders`, we produce (and use) an environment for binders,
+//! namely `Binders`. This is a mapping from binder name to information about that binder: the span
+//! of the binder for error messages and the stack of Kleene operators under which it was bound in
+//! the LHS.
+//!
+//! This environment is used by both the LHS and RHS. The LHS uses it to detect duplicate binders.
+//! The RHS uses it to detect the other errors.
+//!
+//! ### From outer macro to inner macro
+//!
+//! When checking the RHS of an outer macro and we detect a nested macro definition, we push the
+//! current state, namely `MacroState`, to an environment of nested macro definitions. Each state
+//! stores the LHS binders when entering the macro definition as well as the stack of Kleene
+//! operators under which the inner macro is defined in the RHS.
+//!
+//! This environment is a stack representing the nesting of macro definitions. As such, the stack of
+//! Kleene operators under which a meta-variable is repeating is the concatenation of the stacks
+//! stored when entering a macro definition starting from the state in which the meta-variable is
+//! bound.
+use crate::mbe::{KleeneToken, TokenTree};
+
+use rustc_ast::token::{Delimiter, Token, TokenKind};
+use rustc_ast::{NodeId, DUMMY_NODE_ID};
+use rustc_data_structures::fx::FxHashMap;
+use rustc_errors::MultiSpan;
+use rustc_session::lint::builtin::{META_VARIABLE_MISUSE, MISSING_FRAGMENT_SPECIFIER};
+use rustc_session::parse::ParseSess;
+use rustc_span::symbol::kw;
+use rustc_span::{symbol::MacroRulesNormalizedIdent, Span};
+
+use smallvec::SmallVec;
+
+use std::iter;
+
+/// Stack represented as linked list.
+///
+/// Those are used for environments because they grow incrementally and are not mutable.
+enum Stack<'a, T> {
+ /// Empty stack.
+ Empty,
+ /// A non-empty stack.
+ Push {
+ /// The top element.
+ top: T,
+ /// The previous elements.
+ prev: &'a Stack<'a, T>,
+ },
+}
+
+impl<'a, T> Stack<'a, T> {
+ /// Returns whether a stack is empty.
+ fn is_empty(&self) -> bool {
+ matches!(*self, Stack::Empty)
+ }
+
+ /// Returns a new stack with an element of top.
+ fn push(&'a self, top: T) -> Stack<'a, T> {
+ Stack::Push { top, prev: self }
+ }
+}
+
+impl<'a, T> Iterator for &'a Stack<'a, T> {
+ type Item = &'a T;
+
+ // Iterates from top to bottom of the stack.
+ fn next(&mut self) -> Option<&'a T> {
+ match *self {
+ Stack::Empty => None,
+ Stack::Push { ref top, ref prev } => {
+ *self = prev;
+ Some(top)
+ }
+ }
+ }
+}
+
+impl From<&Stack<'_, KleeneToken>> for SmallVec<[KleeneToken; 1]> {
+ fn from(ops: &Stack<'_, KleeneToken>) -> SmallVec<[KleeneToken; 1]> {
+ let mut ops: SmallVec<[KleeneToken; 1]> = ops.cloned().collect();
+ // The stack is innermost on top. We want outermost first.
+ ops.reverse();
+ ops
+ }
+}
+
+/// Information attached to a meta-variable binder in LHS.
+struct BinderInfo {
+ /// The span of the meta-variable in LHS.
+ span: Span,
+ /// The stack of Kleene operators (outermost first).
+ ops: SmallVec<[KleeneToken; 1]>,
+}
+
+/// An environment of meta-variables to their binder information.
+type Binders = FxHashMap<MacroRulesNormalizedIdent, BinderInfo>;
+
+/// The state at which we entered a macro definition in the RHS of another macro definition.
+struct MacroState<'a> {
+ /// The binders of the branch where we entered the macro definition.
+ binders: &'a Binders,
+ /// The stack of Kleene operators (outermost first) where we entered the macro definition.
+ ops: SmallVec<[KleeneToken; 1]>,
+}
+
+/// Checks that meta-variables are used correctly in a macro definition.
+///
+/// Arguments:
+/// - `sess` is used to emit diagnostics and lints
+/// - `node_id` is used to emit lints
+/// - `span` is used when no spans are available
+/// - `lhses` and `rhses` should have the same length and represent the macro definition
+pub(super) fn check_meta_variables(
+ sess: &ParseSess,
+ node_id: NodeId,
+ span: Span,
+ lhses: &[TokenTree],
+ rhses: &[TokenTree],
+) -> bool {
+ if lhses.len() != rhses.len() {
+ sess.span_diagnostic.span_bug(span, "length mismatch between LHSes and RHSes")
+ }
+ let mut valid = true;
+ for (lhs, rhs) in iter::zip(lhses, rhses) {
+ let mut binders = Binders::default();
+ check_binders(sess, node_id, lhs, &Stack::Empty, &mut binders, &Stack::Empty, &mut valid);
+ check_occurrences(sess, node_id, rhs, &Stack::Empty, &binders, &Stack::Empty, &mut valid);
+ }
+ valid
+}
+
+/// Checks `lhs` as part of the LHS of a macro definition, extends `binders` with new binders, and
+/// sets `valid` to false in case of errors.
+///
+/// Arguments:
+/// - `sess` is used to emit diagnostics and lints
+/// - `node_id` is used to emit lints
+/// - `lhs` is checked as part of a LHS
+/// - `macros` is the stack of possible outer macros
+/// - `binders` contains the binders of the LHS
+/// - `ops` is the stack of Kleene operators from the LHS
+/// - `valid` is set in case of errors
+fn check_binders(
+ sess: &ParseSess,
+ node_id: NodeId,
+ lhs: &TokenTree,
+ macros: &Stack<'_, MacroState<'_>>,
+ binders: &mut Binders,
+ ops: &Stack<'_, KleeneToken>,
+ valid: &mut bool,
+) {
+ match *lhs {
+ TokenTree::Token(..) => {}
+ // This can only happen when checking a nested macro because this LHS is then in the RHS of
+ // the outer macro. See ui/macros/macro-of-higher-order.rs where $y:$fragment in the
+ // LHS of the nested macro (and RHS of the outer macro) is parsed as MetaVar(y) Colon
+ // MetaVar(fragment) and not as MetaVarDecl(y, fragment).
+ TokenTree::MetaVar(span, name) => {
+ if macros.is_empty() {
+ sess.span_diagnostic.span_bug(span, "unexpected MetaVar in lhs");
+ }
+ let name = MacroRulesNormalizedIdent::new(name);
+ // There are 3 possibilities:
+ if let Some(prev_info) = binders.get(&name) {
+ // 1. The meta-variable is already bound in the current LHS: This is an error.
+ let mut span = MultiSpan::from_span(span);
+ span.push_span_label(prev_info.span, "previous declaration");
+ buffer_lint(sess, span, node_id, "duplicate matcher binding");
+ } else if get_binder_info(macros, binders, name).is_none() {
+ // 2. The meta-variable is free: This is a binder.
+ binders.insert(name, BinderInfo { span, ops: ops.into() });
+ } else {
+ // 3. The meta-variable is bound: This is an occurrence.
+ check_occurrences(sess, node_id, lhs, macros, binders, ops, valid);
+ }
+ }
+ // Similarly, this can only happen when checking a toplevel macro.
+ TokenTree::MetaVarDecl(span, name, kind) => {
+ if kind.is_none() && node_id != DUMMY_NODE_ID {
+ // FIXME: Report this as a hard error eventually and remove equivalent errors from
+ // `parse_tt_inner` and `nameize`. Until then the error may be reported twice, once
+ // as a hard error and then once as a buffered lint.
+ sess.buffer_lint(
+ MISSING_FRAGMENT_SPECIFIER,
+ span,
+ node_id,
+ "missing fragment specifier",
+ );
+ }
+ if !macros.is_empty() {
+ sess.span_diagnostic.span_bug(span, "unexpected MetaVarDecl in nested lhs");
+ }
+ let name = MacroRulesNormalizedIdent::new(name);
+ if let Some(prev_info) = get_binder_info(macros, binders, name) {
+ // Duplicate binders at the top-level macro definition are errors. The lint is only
+ // for nested macro definitions.
+ sess.span_diagnostic
+ .struct_span_err(span, "duplicate matcher binding")
+ .span_label(span, "duplicate binding")
+ .span_label(prev_info.span, "previous binding")
+ .emit();
+ *valid = false;
+ } else {
+ binders.insert(name, BinderInfo { span, ops: ops.into() });
+ }
+ }
+ // `MetaVarExpr` can not appear in the LHS of a macro arm
+ TokenTree::MetaVarExpr(..) => {}
+ TokenTree::Delimited(_, ref del) => {
+ for tt in &del.tts {
+ check_binders(sess, node_id, tt, macros, binders, ops, valid);
+ }
+ }
+ TokenTree::Sequence(_, ref seq) => {
+ let ops = ops.push(seq.kleene);
+ for tt in &seq.tts {
+ check_binders(sess, node_id, tt, macros, binders, &ops, valid);
+ }
+ }
+ }
+}
+
+/// Returns the binder information of a meta-variable.
+///
+/// Arguments:
+/// - `macros` is the stack of possible outer macros
+/// - `binders` contains the current binders
+/// - `name` is the name of the meta-variable we are looking for
+fn get_binder_info<'a>(
+ mut macros: &'a Stack<'a, MacroState<'a>>,
+ binders: &'a Binders,
+ name: MacroRulesNormalizedIdent,
+) -> Option<&'a BinderInfo> {
+ binders.get(&name).or_else(|| macros.find_map(|state| state.binders.get(&name)))
+}
+
+/// Checks `rhs` as part of the RHS of a macro definition and sets `valid` to false in case of
+/// errors.
+///
+/// Arguments:
+/// - `sess` is used to emit diagnostics and lints
+/// - `node_id` is used to emit lints
+/// - `rhs` is checked as part of a RHS
+/// - `macros` is the stack of possible outer macros
+/// - `binders` contains the binders of the associated LHS
+/// - `ops` is the stack of Kleene operators from the RHS
+/// - `valid` is set in case of errors
+fn check_occurrences(
+ sess: &ParseSess,
+ node_id: NodeId,
+ rhs: &TokenTree,
+ macros: &Stack<'_, MacroState<'_>>,
+ binders: &Binders,
+ ops: &Stack<'_, KleeneToken>,
+ valid: &mut bool,
+) {
+ match *rhs {
+ TokenTree::Token(..) => {}
+ TokenTree::MetaVarDecl(span, _name, _kind) => {
+ sess.span_diagnostic.span_bug(span, "unexpected MetaVarDecl in rhs")
+ }
+ TokenTree::MetaVar(span, name) => {
+ let name = MacroRulesNormalizedIdent::new(name);
+ check_ops_is_prefix(sess, node_id, macros, binders, ops, span, name);
+ }
+ TokenTree::MetaVarExpr(dl, ref mve) => {
+ let Some(name) = mve.ident().map(MacroRulesNormalizedIdent::new) else {
+ return;
+ };
+ check_ops_is_prefix(sess, node_id, macros, binders, ops, dl.entire(), name);
+ }
+ TokenTree::Delimited(_, ref del) => {
+ check_nested_occurrences(sess, node_id, &del.tts, macros, binders, ops, valid);
+ }
+ TokenTree::Sequence(_, ref seq) => {
+ let ops = ops.push(seq.kleene);
+ check_nested_occurrences(sess, node_id, &seq.tts, macros, binders, &ops, valid);
+ }
+ }
+}
+
+/// Represents the processed prefix of a nested macro.
+#[derive(Clone, Copy, PartialEq, Eq)]
+enum NestedMacroState {
+ /// Nothing that matches a nested macro definition was processed yet.
+ Empty,
+ /// The token `macro_rules` was processed.
+ MacroRules,
+ /// The tokens `macro_rules!` were processed.
+ MacroRulesNot,
+ /// The tokens `macro_rules!` followed by a name were processed. The name may be either directly
+ /// an identifier or a meta-variable (that hopefully would be instantiated by an identifier).
+ MacroRulesNotName,
+ /// The keyword `macro` was processed.
+ Macro,
+ /// The keyword `macro` followed by a name was processed.
+ MacroName,
+ /// The keyword `macro` followed by a name and a token delimited by parentheses was processed.
+ MacroNameParen,
+}
+
+/// Checks `tts` as part of the RHS of a macro definition, tries to recognize nested macro
+/// definitions, and sets `valid` to false in case of errors.
+///
+/// Arguments:
+/// - `sess` is used to emit diagnostics and lints
+/// - `node_id` is used to emit lints
+/// - `tts` is checked as part of a RHS and may contain macro definitions
+/// - `macros` is the stack of possible outer macros
+/// - `binders` contains the binders of the associated LHS
+/// - `ops` is the stack of Kleene operators from the RHS
+/// - `valid` is set in case of errors
+fn check_nested_occurrences(
+ sess: &ParseSess,
+ node_id: NodeId,
+ tts: &[TokenTree],
+ macros: &Stack<'_, MacroState<'_>>,
+ binders: &Binders,
+ ops: &Stack<'_, KleeneToken>,
+ valid: &mut bool,
+) {
+ let mut state = NestedMacroState::Empty;
+ let nested_macros = macros.push(MacroState { binders, ops: ops.into() });
+ let mut nested_binders = Binders::default();
+ for tt in tts {
+ match (state, tt) {
+ (
+ NestedMacroState::Empty,
+ &TokenTree::Token(Token { kind: TokenKind::Ident(name, false), .. }),
+ ) => {
+ if name == kw::MacroRules {
+ state = NestedMacroState::MacroRules;
+ } else if name == kw::Macro {
+ state = NestedMacroState::Macro;
+ }
+ }
+ (
+ NestedMacroState::MacroRules,
+ &TokenTree::Token(Token { kind: TokenKind::Not, .. }),
+ ) => {
+ state = NestedMacroState::MacroRulesNot;
+ }
+ (
+ NestedMacroState::MacroRulesNot,
+ &TokenTree::Token(Token { kind: TokenKind::Ident(..), .. }),
+ ) => {
+ state = NestedMacroState::MacroRulesNotName;
+ }
+ (NestedMacroState::MacroRulesNot, &TokenTree::MetaVar(..)) => {
+ state = NestedMacroState::MacroRulesNotName;
+ // We check that the meta-variable is correctly used.
+ check_occurrences(sess, node_id, tt, macros, binders, ops, valid);
+ }
+ (NestedMacroState::MacroRulesNotName, &TokenTree::Delimited(_, ref del))
+ | (NestedMacroState::MacroName, &TokenTree::Delimited(_, ref del))
+ if del.delim == Delimiter::Brace =>
+ {
+ let macro_rules = state == NestedMacroState::MacroRulesNotName;
+ state = NestedMacroState::Empty;
+ let rest =
+ check_nested_macro(sess, node_id, macro_rules, &del.tts, &nested_macros, valid);
+ // If we did not check the whole macro definition, then check the rest as if outside
+ // the macro definition.
+ check_nested_occurrences(
+ sess,
+ node_id,
+ &del.tts[rest..],
+ macros,
+ binders,
+ ops,
+ valid,
+ );
+ }
+ (
+ NestedMacroState::Macro,
+ &TokenTree::Token(Token { kind: TokenKind::Ident(..), .. }),
+ ) => {
+ state = NestedMacroState::MacroName;
+ }
+ (NestedMacroState::Macro, &TokenTree::MetaVar(..)) => {
+ state = NestedMacroState::MacroName;
+ // We check that the meta-variable is correctly used.
+ check_occurrences(sess, node_id, tt, macros, binders, ops, valid);
+ }
+ (NestedMacroState::MacroName, &TokenTree::Delimited(_, ref del))
+ if del.delim == Delimiter::Parenthesis =>
+ {
+ state = NestedMacroState::MacroNameParen;
+ nested_binders = Binders::default();
+ check_binders(
+ sess,
+ node_id,
+ tt,
+ &nested_macros,
+ &mut nested_binders,
+ &Stack::Empty,
+ valid,
+ );
+ }
+ (NestedMacroState::MacroNameParen, &TokenTree::Delimited(_, ref del))
+ if del.delim == Delimiter::Brace =>
+ {
+ state = NestedMacroState::Empty;
+ check_occurrences(
+ sess,
+ node_id,
+ tt,
+ &nested_macros,
+ &nested_binders,
+ &Stack::Empty,
+ valid,
+ );
+ }
+ (_, ref tt) => {
+ state = NestedMacroState::Empty;
+ check_occurrences(sess, node_id, tt, macros, binders, ops, valid);
+ }
+ }
+ }
+}
+
+/// Checks the body of nested macro, returns where the check stopped, and sets `valid` to false in
+/// case of errors.
+///
+/// The token trees are checked as long as they look like a list of (LHS) => {RHS} token trees. This
+/// check is a best-effort to detect a macro definition. It returns the position in `tts` where we
+/// stopped checking because we detected we were not in a macro definition anymore.
+///
+/// Arguments:
+/// - `sess` is used to emit diagnostics and lints
+/// - `node_id` is used to emit lints
+/// - `macro_rules` specifies whether the macro is `macro_rules`
+/// - `tts` is checked as a list of (LHS) => {RHS}
+/// - `macros` is the stack of outer macros
+/// - `valid` is set in case of errors
+fn check_nested_macro(
+ sess: &ParseSess,
+ node_id: NodeId,
+ macro_rules: bool,
+ tts: &[TokenTree],
+ macros: &Stack<'_, MacroState<'_>>,
+ valid: &mut bool,
+) -> usize {
+ let n = tts.len();
+ let mut i = 0;
+ let separator = if macro_rules { TokenKind::Semi } else { TokenKind::Comma };
+ loop {
+ // We expect 3 token trees: `(LHS) => {RHS}`. The separator is checked after.
+ if i + 2 >= n
+ || !tts[i].is_delimited()
+ || !tts[i + 1].is_token(&TokenKind::FatArrow)
+ || !tts[i + 2].is_delimited()
+ {
+ break;
+ }
+ let lhs = &tts[i];
+ let rhs = &tts[i + 2];
+ let mut binders = Binders::default();
+ check_binders(sess, node_id, lhs, macros, &mut binders, &Stack::Empty, valid);
+ check_occurrences(sess, node_id, rhs, macros, &binders, &Stack::Empty, valid);
+ // Since the last semicolon is optional for `macro_rules` macros and decl_macro are not terminated,
+ // we increment our checked position by how many token trees we already checked (the 3
+ // above) before checking for the separator.
+ i += 3;
+ if i == n || !tts[i].is_token(&separator) {
+ break;
+ }
+ // We increment our checked position for the semicolon.
+ i += 1;
+ }
+ i
+}
+
+/// Checks that a meta-variable occurrence is valid.
+///
+/// Arguments:
+/// - `sess` is used to emit diagnostics and lints
+/// - `node_id` is used to emit lints
+/// - `macros` is the stack of possible outer macros
+/// - `binders` contains the binders of the associated LHS
+/// - `ops` is the stack of Kleene operators from the RHS
+/// - `span` is the span of the meta-variable to check
+/// - `name` is the name of the meta-variable to check
+fn check_ops_is_prefix(
+ sess: &ParseSess,
+ node_id: NodeId,
+ macros: &Stack<'_, MacroState<'_>>,
+ binders: &Binders,
+ ops: &Stack<'_, KleeneToken>,
+ span: Span,
+ name: MacroRulesNormalizedIdent,
+) {
+ let macros = macros.push(MacroState { binders, ops: ops.into() });
+ // Accumulates the stacks the operators of each state until (and including when) the
+ // meta-variable is found. The innermost stack is first.
+ let mut acc: SmallVec<[&SmallVec<[KleeneToken; 1]>; 1]> = SmallVec::new();
+ for state in &macros {
+ acc.push(&state.ops);
+ if let Some(binder) = state.binders.get(&name) {
+ // This variable concatenates the stack of operators from the RHS of the LHS where the
+ // meta-variable was defined to where it is used (in possibly nested macros). The
+ // outermost operator is first.
+ let mut occurrence_ops: SmallVec<[KleeneToken; 2]> = SmallVec::new();
+ // We need to iterate from the end to start with outermost stack.
+ for ops in acc.iter().rev() {
+ occurrence_ops.extend_from_slice(ops);
+ }
+ ops_is_prefix(sess, node_id, span, name, &binder.ops, &occurrence_ops);
+ return;
+ }
+ }
+ buffer_lint(sess, span.into(), node_id, &format!("unknown macro variable `{}`", name));
+}
+
+/// Returns whether `binder_ops` is a prefix of `occurrence_ops`.
+///
+/// The stack of Kleene operators of a meta-variable occurrence just needs to have the stack of
+/// Kleene operators of its binder as a prefix.
+///
+/// Consider $i in the following example:
+/// ```ignore (illustrative)
+/// ( $( $i:ident = $($j:ident),+ );* ) => { $($( $i += $j; )+)* }
+/// ```
+/// It occurs under the Kleene stack ["*", "+"] and is bound under ["*"] only.
+///
+/// Arguments:
+/// - `sess` is used to emit diagnostics and lints
+/// - `node_id` is used to emit lints
+/// - `span` is the span of the meta-variable being check
+/// - `name` is the name of the meta-variable being check
+/// - `binder_ops` is the stack of Kleene operators for the binder
+/// - `occurrence_ops` is the stack of Kleene operators for the occurrence
+fn ops_is_prefix(
+ sess: &ParseSess,
+ node_id: NodeId,
+ span: Span,
+ name: MacroRulesNormalizedIdent,
+ binder_ops: &[KleeneToken],
+ occurrence_ops: &[KleeneToken],
+) {
+ for (i, binder) in binder_ops.iter().enumerate() {
+ if i >= occurrence_ops.len() {
+ let mut span = MultiSpan::from_span(span);
+ span.push_span_label(binder.span, "expected repetition");
+ let message = &format!("variable '{}' is still repeating at this depth", name);
+ buffer_lint(sess, span, node_id, message);
+ return;
+ }
+ let occurrence = &occurrence_ops[i];
+ if occurrence.op != binder.op {
+ let mut span = MultiSpan::from_span(span);
+ span.push_span_label(binder.span, "expected repetition");
+ span.push_span_label(occurrence.span, "conflicting repetition");
+ let message = "meta-variable repeats with different Kleene operator";
+ buffer_lint(sess, span, node_id, message);
+ return;
+ }
+ }
+}
+
+fn buffer_lint(sess: &ParseSess, span: MultiSpan, node_id: NodeId, message: &str) {
+ // Macros loaded from other crates have dummy node ids.
+ if node_id != DUMMY_NODE_ID {
+ sess.buffer_lint(&META_VARIABLE_MISUSE, span, node_id, message);
+ }
+}
diff --git a/compiler/rustc_expand/src/mbe/macro_parser.rs b/compiler/rustc_expand/src/mbe/macro_parser.rs
new file mode 100644
index 000000000..4fa91dfea
--- /dev/null
+++ b/compiler/rustc_expand/src/mbe/macro_parser.rs
@@ -0,0 +1,704 @@
+//! This is an NFA-based parser, which calls out to the main Rust parser for named non-terminals
+//! (which it commits to fully when it hits one in a grammar). There's a set of current NFA threads
+//! and a set of next ones. Instead of NTs, we have a special case for Kleene star. The big-O, in
+//! pathological cases, is worse than traditional use of NFA or Earley parsing, but it's an easier
+//! fit for Macro-by-Example-style rules.
+//!
+//! (In order to prevent the pathological case, we'd need to lazily construct the resulting
+//! `NamedMatch`es at the very end. It'd be a pain, and require more memory to keep around old
+//! matcher positions, but it would also save overhead)
+//!
+//! We don't say this parser uses the Earley algorithm, because it's unnecessarily inaccurate.
+//! The macro parser restricts itself to the features of finite state automata. Earley parsers
+//! can be described as an extension of NFAs with completion rules, prediction rules, and recursion.
+//!
+//! Quick intro to how the parser works:
+//!
+//! A "matcher position" (a.k.a. "position" or "mp") is a dot in the middle of a matcher, usually
+//! written as a `·`. For example `· a $( a )* a b` is one, as is `a $( · a )* a b`.
+//!
+//! The parser walks through the input a token at a time, maintaining a list
+//! of threads consistent with the current position in the input string: `cur_mps`.
+//!
+//! As it processes them, it fills up `eof_mps` with threads that would be valid if
+//! the macro invocation is now over, `bb_mps` with threads that are waiting on
+//! a Rust non-terminal like `$e:expr`, and `next_mps` with threads that are waiting
+//! on a particular token. Most of the logic concerns moving the · through the
+//! repetitions indicated by Kleene stars. The rules for moving the · without
+//! consuming any input are called epsilon transitions. It only advances or calls
+//! out to the real Rust parser when no `cur_mps` threads remain.
+//!
+//! Example:
+//!
+//! ```text, ignore
+//! Start parsing a a a a b against [· a $( a )* a b].
+//!
+//! Remaining input: a a a a b
+//! next: [· a $( a )* a b]
+//!
+//! - - - Advance over an a. - - -
+//!
+//! Remaining input: a a a b
+//! cur: [a · $( a )* a b]
+//! Descend/Skip (first position).
+//! next: [a $( · a )* a b] [a $( a )* · a b].
+//!
+//! - - - Advance over an a. - - -
+//!
+//! Remaining input: a a b
+//! cur: [a $( a · )* a b] [a $( a )* a · b]
+//! Follow epsilon transition: Finish/Repeat (first position)
+//! next: [a $( a )* · a b] [a $( · a )* a b] [a $( a )* a · b]
+//!
+//! - - - Advance over an a. - - - (this looks exactly like the last step)
+//!
+//! Remaining input: a b
+//! cur: [a $( a · )* a b] [a $( a )* a · b]
+//! Follow epsilon transition: Finish/Repeat (first position)
+//! next: [a $( a )* · a b] [a $( · a )* a b] [a $( a )* a · b]
+//!
+//! - - - Advance over an a. - - - (this looks exactly like the last step)
+//!
+//! Remaining input: b
+//! cur: [a $( a · )* a b] [a $( a )* a · b]
+//! Follow epsilon transition: Finish/Repeat (first position)
+//! next: [a $( a )* · a b] [a $( · a )* a b] [a $( a )* a · b]
+//!
+//! - - - Advance over a b. - - -
+//!
+//! Remaining input: ''
+//! eof: [a $( a )* a b ·]
+//! ```
+
+pub(crate) use NamedMatch::*;
+pub(crate) use ParseResult::*;
+
+use crate::mbe::{KleeneOp, TokenTree};
+
+use rustc_ast::token::{self, DocComment, Nonterminal, NonterminalKind, Token};
+use rustc_lint_defs::pluralize;
+use rustc_parse::parser::{NtOrTt, Parser};
+use rustc_span::symbol::MacroRulesNormalizedIdent;
+use rustc_span::Span;
+
+use rustc_data_structures::fx::FxHashMap;
+use rustc_data_structures::sync::Lrc;
+use rustc_span::symbol::Ident;
+use std::borrow::Cow;
+use std::collections::hash_map::Entry::{Occupied, Vacant};
+
+/// A unit within a matcher that a `MatcherPos` can refer to. Similar to (and derived from)
+/// `mbe::TokenTree`, but designed specifically for fast and easy traversal during matching.
+/// Notable differences to `mbe::TokenTree`:
+/// - It is non-recursive, i.e. there is no nesting.
+/// - The end pieces of each sequence (the separator, if present, and the Kleene op) are
+/// represented explicitly, as is the very end of the matcher.
+///
+/// This means a matcher can be represented by `&[MatcherLoc]`, and traversal mostly involves
+/// simply incrementing the current matcher position index by one.
+pub(super) enum MatcherLoc {
+ Token {
+ token: Token,
+ },
+ Delimited,
+ Sequence {
+ op: KleeneOp,
+ num_metavar_decls: usize,
+ idx_first_after: usize,
+ next_metavar: usize,
+ seq_depth: usize,
+ },
+ SequenceKleeneOpNoSep {
+ op: KleeneOp,
+ idx_first: usize,
+ },
+ SequenceSep {
+ separator: Token,
+ },
+ SequenceKleeneOpAfterSep {
+ idx_first: usize,
+ },
+ MetaVarDecl {
+ span: Span,
+ bind: Ident,
+ kind: Option<NonterminalKind>,
+ next_metavar: usize,
+ seq_depth: usize,
+ },
+ Eof,
+}
+
+pub(super) fn compute_locs(matcher: &[TokenTree]) -> Vec<MatcherLoc> {
+ fn inner(
+ tts: &[TokenTree],
+ locs: &mut Vec<MatcherLoc>,
+ next_metavar: &mut usize,
+ seq_depth: usize,
+ ) {
+ for tt in tts {
+ match tt {
+ TokenTree::Token(token) => {
+ locs.push(MatcherLoc::Token { token: token.clone() });
+ }
+ TokenTree::Delimited(span, delimited) => {
+ let open_token = Token::new(token::OpenDelim(delimited.delim), span.open);
+ let close_token = Token::new(token::CloseDelim(delimited.delim), span.close);
+
+ locs.push(MatcherLoc::Delimited);
+ locs.push(MatcherLoc::Token { token: open_token });
+ inner(&delimited.tts, locs, next_metavar, seq_depth);
+ locs.push(MatcherLoc::Token { token: close_token });
+ }
+ TokenTree::Sequence(_, seq) => {
+ // We can't determine `idx_first_after` and construct the final
+ // `MatcherLoc::Sequence` until after `inner()` is called and the sequence end
+ // pieces are processed. So we push a dummy value (`Eof` is cheapest to
+ // construct) now, and overwrite it with the proper value below.
+ let dummy = MatcherLoc::Eof;
+ locs.push(dummy);
+
+ let next_metavar_orig = *next_metavar;
+ let op = seq.kleene.op;
+ let idx_first = locs.len();
+ let idx_seq = idx_first - 1;
+ inner(&seq.tts, locs, next_metavar, seq_depth + 1);
+
+ if let Some(separator) = &seq.separator {
+ locs.push(MatcherLoc::SequenceSep { separator: separator.clone() });
+ locs.push(MatcherLoc::SequenceKleeneOpAfterSep { idx_first });
+ } else {
+ locs.push(MatcherLoc::SequenceKleeneOpNoSep { op, idx_first });
+ }
+
+ // Overwrite the dummy value pushed above with the proper value.
+ locs[idx_seq] = MatcherLoc::Sequence {
+ op,
+ num_metavar_decls: seq.num_captures,
+ idx_first_after: locs.len(),
+ next_metavar: next_metavar_orig,
+ seq_depth,
+ };
+ }
+ &TokenTree::MetaVarDecl(span, bind, kind) => {
+ locs.push(MatcherLoc::MetaVarDecl {
+ span,
+ bind,
+ kind,
+ next_metavar: *next_metavar,
+ seq_depth,
+ });
+ *next_metavar += 1;
+ }
+ TokenTree::MetaVar(..) | TokenTree::MetaVarExpr(..) => unreachable!(),
+ }
+ }
+ }
+
+ let mut locs = vec![];
+ let mut next_metavar = 0;
+ inner(matcher, &mut locs, &mut next_metavar, /* seq_depth */ 0);
+
+ // A final entry is needed for eof.
+ locs.push(MatcherLoc::Eof);
+
+ locs
+}
+
+/// A single matcher position, representing the state of matching.
+struct MatcherPos {
+ /// The index into `TtParser::locs`, which represents the "dot".
+ idx: usize,
+
+ /// The matches made against metavar decls so far. On a successful match, this vector ends up
+ /// with one element per metavar decl in the matcher. Each element records token trees matched
+ /// against the relevant metavar by the black box parser. An element will be a `MatchedSeq` if
+ /// the corresponding metavar decl is within a sequence.
+ ///
+ /// It is critical to performance that this is an `Lrc`, because it gets cloned frequently when
+ /// processing sequences. Mostly for sequence-ending possibilities that must be tried but end
+ /// up failing.
+ matches: Lrc<Vec<NamedMatch>>,
+}
+
+// This type is used a lot. Make sure it doesn't unintentionally get bigger.
+#[cfg(all(target_arch = "x86_64", target_pointer_width = "64"))]
+rustc_data_structures::static_assert_size!(MatcherPos, 16);
+
+impl MatcherPos {
+ /// Adds `m` as a named match for the `metavar_idx`-th metavar. There are only two call sites,
+ /// and both are hot enough to be always worth inlining.
+ #[inline(always)]
+ fn push_match(&mut self, metavar_idx: usize, seq_depth: usize, m: NamedMatch) {
+ let matches = Lrc::make_mut(&mut self.matches);
+ match seq_depth {
+ 0 => {
+ // We are not within a sequence. Just append `m`.
+ assert_eq!(metavar_idx, matches.len());
+ matches.push(m);
+ }
+ _ => {
+ // We are within a sequence. Find the final `MatchedSeq` at the appropriate depth
+ // and append `m` to its vector.
+ let mut curr = &mut matches[metavar_idx];
+ for _ in 0..seq_depth - 1 {
+ match curr {
+ MatchedSeq(seq) => curr = seq.last_mut().unwrap(),
+ _ => unreachable!(),
+ }
+ }
+ match curr {
+ MatchedSeq(seq) => seq.push(m),
+ _ => unreachable!(),
+ }
+ }
+ }
+ }
+}
+
+enum EofMatcherPositions {
+ None,
+ One(MatcherPos),
+ Multiple,
+}
+
+/// Represents the possible results of an attempted parse.
+pub(crate) enum ParseResult<T> {
+ /// Parsed successfully.
+ Success(T),
+ /// Arm failed to match. If the second parameter is `token::Eof`, it indicates an unexpected
+ /// end of macro invocation. Otherwise, it indicates that no rules expected the given token.
+ Failure(Token, &'static str),
+ /// Fatal error (malformed macro?). Abort compilation.
+ Error(rustc_span::Span, String),
+ ErrorReported,
+}
+
+/// A `ParseResult` where the `Success` variant contains a mapping of
+/// `MacroRulesNormalizedIdent`s to `NamedMatch`es. This represents the mapping
+/// of metavars to the token trees they bind to.
+pub(crate) type NamedParseResult = ParseResult<FxHashMap<MacroRulesNormalizedIdent, NamedMatch>>;
+
+/// Count how many metavars declarations are in `matcher`.
+pub(super) fn count_metavar_decls(matcher: &[TokenTree]) -> usize {
+ matcher
+ .iter()
+ .map(|tt| match tt {
+ TokenTree::MetaVarDecl(..) => 1,
+ TokenTree::Sequence(_, seq) => seq.num_captures,
+ TokenTree::Delimited(_, delim) => count_metavar_decls(&delim.tts),
+ TokenTree::Token(..) => 0,
+ TokenTree::MetaVar(..) | TokenTree::MetaVarExpr(..) => unreachable!(),
+ })
+ .sum()
+}
+
+/// `NamedMatch` is a pattern-match result for a single metavar. All
+/// `MatchedNonterminal`s in the `NamedMatch` have the same non-terminal type
+/// (expr, item, etc).
+///
+/// The in-memory structure of a particular `NamedMatch` represents the match
+/// that occurred when a particular subset of a matcher was applied to a
+/// particular token tree.
+///
+/// The width of each `MatchedSeq` in the `NamedMatch`, and the identity of
+/// the `MatchedNtNonTts`s, will depend on the token tree it was applied
+/// to: each `MatchedSeq` corresponds to a single repetition in the originating
+/// token tree. The depth of the `NamedMatch` structure will therefore depend
+/// only on the nesting depth of repetitions in the originating token tree it
+/// was derived from.
+///
+/// In layperson's terms: `NamedMatch` will form a tree representing nested matches of a particular
+/// meta variable. For example, if we are matching the following macro against the following
+/// invocation...
+///
+/// ```rust
+/// macro_rules! foo {
+/// ($($($x:ident),+);+) => {}
+/// }
+///
+/// foo!(a, b, c, d; a, b, c, d, e);
+/// ```
+///
+/// Then, the tree will have the following shape:
+///
+/// ```ignore (private-internal)
+/// # use NamedMatch::*;
+/// MatchedSeq([
+/// MatchedSeq([
+/// MatchedNonterminal(a),
+/// MatchedNonterminal(b),
+/// MatchedNonterminal(c),
+/// MatchedNonterminal(d),
+/// ]),
+/// MatchedSeq([
+/// MatchedNonterminal(a),
+/// MatchedNonterminal(b),
+/// MatchedNonterminal(c),
+/// MatchedNonterminal(d),
+/// MatchedNonterminal(e),
+/// ])
+/// ])
+/// ```
+#[derive(Debug, Clone)]
+pub(crate) enum NamedMatch {
+ MatchedSeq(Vec<NamedMatch>),
+
+ // A metavar match of type `tt`.
+ MatchedTokenTree(rustc_ast::tokenstream::TokenTree),
+
+ // A metavar match of any type other than `tt`.
+ MatchedNonterminal(Lrc<Nonterminal>),
+}
+
+/// Performs a token equality check, ignoring syntax context (that is, an unhygienic comparison)
+fn token_name_eq(t1: &Token, t2: &Token) -> bool {
+ if let (Some((ident1, is_raw1)), Some((ident2, is_raw2))) = (t1.ident(), t2.ident()) {
+ ident1.name == ident2.name && is_raw1 == is_raw2
+ } else if let (Some(ident1), Some(ident2)) = (t1.lifetime(), t2.lifetime()) {
+ ident1.name == ident2.name
+ } else {
+ t1.kind == t2.kind
+ }
+}
+
+// Note: the vectors could be created and dropped within `parse_tt`, but to avoid excess
+// allocations we have a single vector for each kind that is cleared and reused repeatedly.
+pub struct TtParser {
+ macro_name: Ident,
+
+ /// The set of current mps to be processed. This should be empty by the end of a successful
+ /// execution of `parse_tt_inner`.
+ cur_mps: Vec<MatcherPos>,
+
+ /// The set of newly generated mps. These are used to replenish `cur_mps` in the function
+ /// `parse_tt`.
+ next_mps: Vec<MatcherPos>,
+
+ /// The set of mps that are waiting for the black-box parser.
+ bb_mps: Vec<MatcherPos>,
+
+ /// Pre-allocate an empty match array, so it can be cloned cheaply for macros with many rules
+ /// that have no metavars.
+ empty_matches: Lrc<Vec<NamedMatch>>,
+}
+
+impl TtParser {
+ pub(super) fn new(macro_name: Ident) -> TtParser {
+ TtParser {
+ macro_name,
+ cur_mps: vec![],
+ next_mps: vec![],
+ bb_mps: vec![],
+ empty_matches: Lrc::new(vec![]),
+ }
+ }
+
+ /// Process the matcher positions of `cur_mps` until it is empty. In the process, this will
+ /// produce more mps in `next_mps` and `bb_mps`.
+ ///
+ /// # Returns
+ ///
+ /// `Some(result)` if everything is finished, `None` otherwise. Note that matches are kept
+ /// track of through the mps generated.
+ fn parse_tt_inner(
+ &mut self,
+ matcher: &[MatcherLoc],
+ token: &Token,
+ ) -> Option<NamedParseResult> {
+ // Matcher positions that would be valid if the macro invocation was over now. Only
+ // modified if `token == Eof`.
+ let mut eof_mps = EofMatcherPositions::None;
+
+ while let Some(mut mp) = self.cur_mps.pop() {
+ match &matcher[mp.idx] {
+ MatcherLoc::Token { token: t } => {
+ // If it's a doc comment, we just ignore it and move on to the next tt in the
+ // matcher. This is a bug, but #95267 showed that existing programs rely on
+ // this behaviour, and changing it would require some care and a transition
+ // period.
+ //
+ // If the token matches, we can just advance the parser.
+ //
+ // Otherwise, this match has failed, there is nothing to do, and hopefully
+ // another mp in `cur_mps` will match.
+ if matches!(t, Token { kind: DocComment(..), .. }) {
+ mp.idx += 1;
+ self.cur_mps.push(mp);
+ } else if token_name_eq(&t, token) {
+ mp.idx += 1;
+ self.next_mps.push(mp);
+ }
+ }
+ MatcherLoc::Delimited => {
+ // Entering the delimeter is trivial.
+ mp.idx += 1;
+ self.cur_mps.push(mp);
+ }
+ &MatcherLoc::Sequence {
+ op,
+ num_metavar_decls,
+ idx_first_after,
+ next_metavar,
+ seq_depth,
+ } => {
+ // Install an empty vec for each metavar within the sequence.
+ for metavar_idx in next_metavar..next_metavar + num_metavar_decls {
+ mp.push_match(metavar_idx, seq_depth, MatchedSeq(vec![]));
+ }
+
+ if op == KleeneOp::ZeroOrMore || op == KleeneOp::ZeroOrOne {
+ // Try zero matches of this sequence, by skipping over it.
+ self.cur_mps.push(MatcherPos {
+ idx: idx_first_after,
+ matches: mp.matches.clone(), // a cheap clone
+ });
+ }
+
+ // Try one or more matches of this sequence, by entering it.
+ mp.idx += 1;
+ self.cur_mps.push(mp);
+ }
+ &MatcherLoc::SequenceKleeneOpNoSep { op, idx_first } => {
+ // We are past the end of a sequence with no separator. Try ending the
+ // sequence. If that's not possible, `ending_mp` will fail quietly when it is
+ // processed next time around the loop.
+ let ending_mp = MatcherPos {
+ idx: mp.idx + 1, // +1 skips the Kleene op
+ matches: mp.matches.clone(), // a cheap clone
+ };
+ self.cur_mps.push(ending_mp);
+
+ if op != KleeneOp::ZeroOrOne {
+ // Try another repetition.
+ mp.idx = idx_first;
+ self.cur_mps.push(mp);
+ }
+ }
+ MatcherLoc::SequenceSep { separator } => {
+ // We are past the end of a sequence with a separator but we haven't seen the
+ // separator yet. Try ending the sequence. If that's not possible, `ending_mp`
+ // will fail quietly when it is processed next time around the loop.
+ let ending_mp = MatcherPos {
+ idx: mp.idx + 2, // +2 skips the separator and the Kleene op
+ matches: mp.matches.clone(), // a cheap clone
+ };
+ self.cur_mps.push(ending_mp);
+
+ if token_name_eq(token, separator) {
+ // The separator matches the current token. Advance past it.
+ mp.idx += 1;
+ self.next_mps.push(mp);
+ }
+ }
+ &MatcherLoc::SequenceKleeneOpAfterSep { idx_first } => {
+ // We are past the sequence separator. This can't be a `?` Kleene op, because
+ // they don't permit separators. Try another repetition.
+ mp.idx = idx_first;
+ self.cur_mps.push(mp);
+ }
+ &MatcherLoc::MetaVarDecl { span, kind, .. } => {
+ // Built-in nonterminals never start with these tokens, so we can eliminate
+ // them from consideration. We use the span of the metavariable declaration
+ // to determine any edition-specific matching behavior for non-terminals.
+ if let Some(kind) = kind {
+ if Parser::nonterminal_may_begin_with(kind, token) {
+ self.bb_mps.push(mp);
+ }
+ } else {
+ // E.g. `$e` instead of `$e:expr`, reported as a hard error if actually used.
+ // Both this check and the one in `nameize` are necessary, surprisingly.
+ return Some(Error(span, "missing fragment specifier".to_string()));
+ }
+ }
+ MatcherLoc::Eof => {
+ // We are past the matcher's end, and not in a sequence. Try to end things.
+ debug_assert_eq!(mp.idx, matcher.len() - 1);
+ if *token == token::Eof {
+ eof_mps = match eof_mps {
+ EofMatcherPositions::None => EofMatcherPositions::One(mp),
+ EofMatcherPositions::One(_) | EofMatcherPositions::Multiple => {
+ EofMatcherPositions::Multiple
+ }
+ }
+ }
+ }
+ }
+ }
+
+ // If we reached the end of input, check that there is EXACTLY ONE possible matcher.
+ // Otherwise, either the parse is ambiguous (which is an error) or there is a syntax error.
+ if *token == token::Eof {
+ Some(match eof_mps {
+ EofMatcherPositions::One(mut eof_mp) => {
+ // Need to take ownership of the matches from within the `Lrc`.
+ Lrc::make_mut(&mut eof_mp.matches);
+ let matches = Lrc::try_unwrap(eof_mp.matches).unwrap().into_iter();
+ self.nameize(matcher, matches)
+ }
+ EofMatcherPositions::Multiple => {
+ Error(token.span, "ambiguity: multiple successful parses".to_string())
+ }
+ EofMatcherPositions::None => Failure(
+ Token::new(
+ token::Eof,
+ if token.span.is_dummy() { token.span } else { token.span.shrink_to_hi() },
+ ),
+ "missing tokens in macro arguments",
+ ),
+ })
+ } else {
+ None
+ }
+ }
+
+ /// Match the token stream from `parser` against `matcher`.
+ pub(super) fn parse_tt(
+ &mut self,
+ parser: &mut Cow<'_, Parser<'_>>,
+ matcher: &[MatcherLoc],
+ ) -> NamedParseResult {
+ // A queue of possible matcher positions. We initialize it with the matcher position in
+ // which the "dot" is before the first token of the first token tree in `matcher`.
+ // `parse_tt_inner` then processes all of these possible matcher positions and produces
+ // possible next positions into `next_mps`. After some post-processing, the contents of
+ // `next_mps` replenish `cur_mps` and we start over again.
+ self.cur_mps.clear();
+ self.cur_mps.push(MatcherPos { idx: 0, matches: self.empty_matches.clone() });
+
+ loop {
+ self.next_mps.clear();
+ self.bb_mps.clear();
+
+ // Process `cur_mps` until either we have finished the input or we need to get some
+ // parsing from the black-box parser done.
+ if let Some(res) = self.parse_tt_inner(matcher, &parser.token) {
+ return res;
+ }
+
+ // `parse_tt_inner` handled all of `cur_mps`, so it's empty.
+ assert!(self.cur_mps.is_empty());
+
+ // Error messages here could be improved with links to original rules.
+ match (self.next_mps.len(), self.bb_mps.len()) {
+ (0, 0) => {
+ // There are no possible next positions AND we aren't waiting for the black-box
+ // parser: syntax error.
+ return Failure(
+ parser.token.clone(),
+ "no rules expected this token in macro call",
+ );
+ }
+
+ (_, 0) => {
+ // Dump all possible `next_mps` into `cur_mps` for the next iteration. Then
+ // process the next token.
+ self.cur_mps.append(&mut self.next_mps);
+ parser.to_mut().bump();
+ }
+
+ (0, 1) => {
+ // We need to call the black-box parser to get some nonterminal.
+ let mut mp = self.bb_mps.pop().unwrap();
+ let loc = &matcher[mp.idx];
+ if let &MatcherLoc::MetaVarDecl {
+ span,
+ kind: Some(kind),
+ next_metavar,
+ seq_depth,
+ ..
+ } = loc
+ {
+ // We use the span of the metavariable declaration to determine any
+ // edition-specific matching behavior for non-terminals.
+ let nt = match parser.to_mut().parse_nonterminal(kind) {
+ Err(mut err) => {
+ err.span_label(
+ span,
+ format!(
+ "while parsing argument for this `{kind}` macro fragment"
+ ),
+ )
+ .emit();
+ return ErrorReported;
+ }
+ Ok(nt) => nt,
+ };
+ let m = match nt {
+ NtOrTt::Nt(nt) => MatchedNonterminal(Lrc::new(nt)),
+ NtOrTt::Tt(tt) => MatchedTokenTree(tt),
+ };
+ mp.push_match(next_metavar, seq_depth, m);
+ mp.idx += 1;
+ } else {
+ unreachable!()
+ }
+ self.cur_mps.push(mp);
+ }
+
+ (_, _) => {
+ // Too many possibilities!
+ return self.ambiguity_error(matcher, parser.token.span);
+ }
+ }
+
+ assert!(!self.cur_mps.is_empty());
+ }
+ }
+
+ fn ambiguity_error(
+ &self,
+ matcher: &[MatcherLoc],
+ token_span: rustc_span::Span,
+ ) -> NamedParseResult {
+ let nts = self
+ .bb_mps
+ .iter()
+ .map(|mp| match &matcher[mp.idx] {
+ MatcherLoc::MetaVarDecl { bind, kind: Some(kind), .. } => {
+ format!("{} ('{}')", kind, bind)
+ }
+ _ => unreachable!(),
+ })
+ .collect::<Vec<String>>()
+ .join(" or ");
+
+ Error(
+ token_span,
+ format!(
+ "local ambiguity when calling macro `{}`: multiple parsing options: {}",
+ self.macro_name,
+ match self.next_mps.len() {
+ 0 => format!("built-in NTs {}.", nts),
+ n => format!("built-in NTs {} or {n} other option{s}.", nts, s = pluralize!(n)),
+ }
+ ),
+ )
+ }
+
+ fn nameize<I: Iterator<Item = NamedMatch>>(
+ &self,
+ matcher: &[MatcherLoc],
+ mut res: I,
+ ) -> NamedParseResult {
+ // Make that each metavar has _exactly one_ binding. If so, insert the binding into the
+ // `NamedParseResult`. Otherwise, it's an error.
+ let mut ret_val = FxHashMap::default();
+ for loc in matcher {
+ if let &MatcherLoc::MetaVarDecl { span, bind, kind, .. } = loc {
+ if kind.is_some() {
+ match ret_val.entry(MacroRulesNormalizedIdent::new(bind)) {
+ Vacant(spot) => spot.insert(res.next().unwrap()),
+ Occupied(..) => {
+ return Error(span, format!("duplicated bind name: {}", bind));
+ }
+ };
+ } else {
+ // E.g. `$e` instead of `$e:expr`, reported as a hard error if actually used.
+ // Both this check and the one in `parse_tt_inner` are necessary, surprisingly.
+ return Error(span, "missing fragment specifier".to_string());
+ }
+ }
+ }
+ Success(ret_val)
+ }
+}
diff --git a/compiler/rustc_expand/src/mbe/macro_rules.rs b/compiler/rustc_expand/src/mbe/macro_rules.rs
new file mode 100644
index 000000000..f7e1575af
--- /dev/null
+++ b/compiler/rustc_expand/src/mbe/macro_rules.rs
@@ -0,0 +1,1420 @@
+use crate::base::{DummyResult, ExtCtxt, MacResult, TTMacroExpander};
+use crate::base::{SyntaxExtension, SyntaxExtensionKind};
+use crate::expand::{ensure_complete_parse, parse_ast_fragment, AstFragment, AstFragmentKind};
+use crate::mbe;
+use crate::mbe::macro_check;
+use crate::mbe::macro_parser::{Error, ErrorReported, Failure, Success, TtParser};
+use crate::mbe::macro_parser::{MatchedSeq, MatchedTokenTree, MatcherLoc};
+use crate::mbe::transcribe::transcribe;
+
+use rustc_ast as ast;
+use rustc_ast::token::{self, Delimiter, NonterminalKind, Token, TokenKind, TokenKind::*};
+use rustc_ast::tokenstream::{DelimSpan, TokenStream};
+use rustc_ast::{NodeId, DUMMY_NODE_ID};
+use rustc_ast_pretty::pprust;
+use rustc_attr::{self as attr, TransparencyError};
+use rustc_data_structures::fx::{FxHashMap, FxIndexMap};
+use rustc_errors::{Applicability, Diagnostic, DiagnosticBuilder, ErrorGuaranteed};
+use rustc_feature::Features;
+use rustc_lint_defs::builtin::{
+ RUST_2021_INCOMPATIBLE_OR_PATTERNS, SEMICOLON_IN_EXPRESSIONS_FROM_MACROS,
+};
+use rustc_lint_defs::BuiltinLintDiagnostics;
+use rustc_parse::parser::Parser;
+use rustc_session::parse::ParseSess;
+use rustc_session::Session;
+use rustc_span::edition::Edition;
+use rustc_span::hygiene::Transparency;
+use rustc_span::source_map::SourceMap;
+use rustc_span::symbol::{kw, sym, Ident, MacroRulesNormalizedIdent};
+use rustc_span::Span;
+
+use std::borrow::Cow;
+use std::collections::hash_map::Entry;
+use std::{mem, slice};
+use tracing::debug;
+
+pub(crate) struct ParserAnyMacro<'a> {
+ parser: Parser<'a>,
+
+ /// Span of the expansion site of the macro this parser is for
+ site_span: Span,
+ /// The ident of the macro we're parsing
+ macro_ident: Ident,
+ lint_node_id: NodeId,
+ is_trailing_mac: bool,
+ arm_span: Span,
+ /// Whether or not this macro is defined in the current crate
+ is_local: bool,
+}
+
+pub(crate) fn annotate_err_with_kind(err: &mut Diagnostic, kind: AstFragmentKind, span: Span) {
+ match kind {
+ AstFragmentKind::Ty => {
+ err.span_label(span, "this macro call doesn't expand to a type");
+ }
+ AstFragmentKind::Pat => {
+ err.span_label(span, "this macro call doesn't expand to a pattern");
+ }
+ _ => {}
+ };
+}
+
+fn emit_frag_parse_err(
+ mut e: DiagnosticBuilder<'_, rustc_errors::ErrorGuaranteed>,
+ parser: &Parser<'_>,
+ orig_parser: &mut Parser<'_>,
+ site_span: Span,
+ arm_span: Span,
+ kind: AstFragmentKind,
+) {
+ // FIXME(davidtwco): avoid depending on the error message text
+ if parser.token == token::Eof && e.message[0].0.expect_str().ends_with(", found `<eof>`") {
+ if !e.span.is_dummy() {
+ // early end of macro arm (#52866)
+ e.replace_span_with(parser.sess.source_map().next_point(parser.token.span));
+ }
+ let msg = &e.message[0];
+ e.message[0] = (
+ rustc_errors::DiagnosticMessage::Str(format!(
+ "macro expansion ends with an incomplete expression: {}",
+ msg.0.expect_str().replace(", found `<eof>`", ""),
+ )),
+ msg.1,
+ );
+ }
+ if e.span.is_dummy() {
+ // Get around lack of span in error (#30128)
+ e.replace_span_with(site_span);
+ if !parser.sess.source_map().is_imported(arm_span) {
+ e.span_label(arm_span, "in this macro arm");
+ }
+ } else if parser.sess.source_map().is_imported(parser.token.span) {
+ e.span_label(site_span, "in this macro invocation");
+ }
+ match kind {
+ // Try a statement if an expression is wanted but failed and suggest adding `;` to call.
+ AstFragmentKind::Expr => match parse_ast_fragment(orig_parser, AstFragmentKind::Stmts) {
+ Err(err) => err.cancel(),
+ Ok(_) => {
+ e.note(
+ "the macro call doesn't expand to an expression, but it can expand to a statement",
+ );
+ e.span_suggestion_verbose(
+ site_span.shrink_to_hi(),
+ "add `;` to interpret the expansion as a statement",
+ ";",
+ Applicability::MaybeIncorrect,
+ );
+ }
+ },
+ _ => annotate_err_with_kind(&mut e, kind, site_span),
+ };
+ e.emit();
+}
+
+impl<'a> ParserAnyMacro<'a> {
+ pub(crate) fn make(mut self: Box<ParserAnyMacro<'a>>, kind: AstFragmentKind) -> AstFragment {
+ let ParserAnyMacro {
+ site_span,
+ macro_ident,
+ ref mut parser,
+ lint_node_id,
+ arm_span,
+ is_trailing_mac,
+ is_local,
+ } = *self;
+ let snapshot = &mut parser.create_snapshot_for_diagnostic();
+ let fragment = match parse_ast_fragment(parser, kind) {
+ Ok(f) => f,
+ Err(err) => {
+ emit_frag_parse_err(err, parser, snapshot, site_span, arm_span, kind);
+ return kind.dummy(site_span);
+ }
+ };
+
+ // We allow semicolons at the end of expressions -- e.g., the semicolon in
+ // `macro_rules! m { () => { panic!(); } }` isn't parsed by `.parse_expr()`,
+ // but `m!()` is allowed in expression positions (cf. issue #34706).
+ if kind == AstFragmentKind::Expr && parser.token == token::Semi {
+ if is_local {
+ parser.sess.buffer_lint_with_diagnostic(
+ SEMICOLON_IN_EXPRESSIONS_FROM_MACROS,
+ parser.token.span,
+ lint_node_id,
+ "trailing semicolon in macro used in expression position",
+ BuiltinLintDiagnostics::TrailingMacro(is_trailing_mac, macro_ident),
+ );
+ }
+ parser.bump();
+ }
+
+ // Make sure we don't have any tokens left to parse so we don't silently drop anything.
+ let path = ast::Path::from_ident(macro_ident.with_span_pos(site_span));
+ ensure_complete_parse(parser, &path, kind.name(), site_span);
+ fragment
+ }
+}
+
+struct MacroRulesMacroExpander {
+ node_id: NodeId,
+ name: Ident,
+ span: Span,
+ transparency: Transparency,
+ lhses: Vec<Vec<MatcherLoc>>,
+ rhses: Vec<mbe::TokenTree>,
+ valid: bool,
+}
+
+impl TTMacroExpander for MacroRulesMacroExpander {
+ fn expand<'cx>(
+ &self,
+ cx: &'cx mut ExtCtxt<'_>,
+ sp: Span,
+ input: TokenStream,
+ ) -> Box<dyn MacResult + 'cx> {
+ if !self.valid {
+ return DummyResult::any(sp);
+ }
+ expand_macro(
+ cx,
+ sp,
+ self.span,
+ self.node_id,
+ self.name,
+ self.transparency,
+ input,
+ &self.lhses,
+ &self.rhses,
+ )
+ }
+}
+
+fn macro_rules_dummy_expander<'cx>(
+ _: &'cx mut ExtCtxt<'_>,
+ span: Span,
+ _: TokenStream,
+) -> Box<dyn MacResult + 'cx> {
+ DummyResult::any(span)
+}
+
+fn trace_macros_note(cx_expansions: &mut FxIndexMap<Span, Vec<String>>, sp: Span, message: String) {
+ let sp = sp.macro_backtrace().last().map_or(sp, |trace| trace.call_site);
+ cx_expansions.entry(sp).or_default().push(message);
+}
+
+/// Expands the rules based macro defined by `lhses` and `rhses` for a given
+/// input `arg`.
+fn expand_macro<'cx>(
+ cx: &'cx mut ExtCtxt<'_>,
+ sp: Span,
+ def_span: Span,
+ node_id: NodeId,
+ name: Ident,
+ transparency: Transparency,
+ arg: TokenStream,
+ lhses: &[Vec<MatcherLoc>],
+ rhses: &[mbe::TokenTree],
+) -> Box<dyn MacResult + 'cx> {
+ let sess = &cx.sess.parse_sess;
+ // Macros defined in the current crate have a real node id,
+ // whereas macros from an external crate have a dummy id.
+ let is_local = node_id != DUMMY_NODE_ID;
+
+ if cx.trace_macros() {
+ let msg = format!("expanding `{}! {{ {} }}`", name, pprust::tts_to_string(&arg));
+ trace_macros_note(&mut cx.expansions, sp, msg);
+ }
+
+ // Which arm's failure should we report? (the one furthest along)
+ let mut best_failure: Option<(Token, &str)> = None;
+
+ // We create a base parser that can be used for the "black box" parts.
+ // Every iteration needs a fresh copy of that parser. However, the parser
+ // is not mutated on many of the iterations, particularly when dealing with
+ // macros like this:
+ //
+ // macro_rules! foo {
+ // ("a") => (A);
+ // ("b") => (B);
+ // ("c") => (C);
+ // // ... etc. (maybe hundreds more)
+ // }
+ //
+ // as seen in the `html5ever` benchmark. We use a `Cow` so that the base
+ // parser is only cloned when necessary (upon mutation). Furthermore, we
+ // reinitialize the `Cow` with the base parser at the start of every
+ // iteration, so that any mutated parsers are not reused. This is all quite
+ // hacky, but speeds up the `html5ever` benchmark significantly. (Issue
+ // 68836 suggests a more comprehensive but more complex change to deal with
+ // this situation.)
+ let parser = parser_from_cx(sess, arg.clone());
+
+ // Try each arm's matchers.
+ let mut tt_parser = TtParser::new(name);
+ for (i, lhs) in lhses.iter().enumerate() {
+ // Take a snapshot of the state of pre-expansion gating at this point.
+ // This is used so that if a matcher is not `Success(..)`ful,
+ // then the spans which became gated when parsing the unsuccessful matcher
+ // are not recorded. On the first `Success(..)`ful matcher, the spans are merged.
+ let mut gated_spans_snapshot = mem::take(&mut *sess.gated_spans.spans.borrow_mut());
+
+ match tt_parser.parse_tt(&mut Cow::Borrowed(&parser), lhs) {
+ Success(named_matches) => {
+ // The matcher was `Success(..)`ful.
+ // Merge the gated spans from parsing the matcher with the pre-existing ones.
+ sess.gated_spans.merge(gated_spans_snapshot);
+
+ let (rhs, rhs_span): (&mbe::Delimited, DelimSpan) = match &rhses[i] {
+ mbe::TokenTree::Delimited(span, delimited) => (&delimited, *span),
+ _ => cx.span_bug(sp, "malformed macro rhs"),
+ };
+ let arm_span = rhses[i].span();
+
+ let rhs_spans = rhs.tts.iter().map(|t| t.span()).collect::<Vec<_>>();
+ // rhs has holes ( `$id` and `$(...)` that need filled)
+ let mut tts = match transcribe(cx, &named_matches, &rhs, rhs_span, transparency) {
+ Ok(tts) => tts,
+ Err(mut err) => {
+ err.emit();
+ return DummyResult::any(arm_span);
+ }
+ };
+
+ // Replace all the tokens for the corresponding positions in the macro, to maintain
+ // proper positions in error reporting, while maintaining the macro_backtrace.
+ if rhs_spans.len() == tts.len() {
+ tts = tts.map_enumerated(|i, tt| {
+ let mut tt = tt.clone();
+ let mut sp = rhs_spans[i];
+ sp = sp.with_ctxt(tt.span().ctxt());
+ tt.set_span(sp);
+ tt
+ });
+ }
+
+ if cx.trace_macros() {
+ let msg = format!("to `{}`", pprust::tts_to_string(&tts));
+ trace_macros_note(&mut cx.expansions, sp, msg);
+ }
+
+ let mut p = Parser::new(sess, tts, false, None);
+ p.last_type_ascription = cx.current_expansion.prior_type_ascription;
+
+ if is_local {
+ cx.resolver.record_macro_rule_usage(node_id, i);
+ }
+
+ // Let the context choose how to interpret the result.
+ // Weird, but useful for X-macros.
+ return Box::new(ParserAnyMacro {
+ parser: p,
+
+ // Pass along the original expansion site and the name of the macro
+ // so we can print a useful error message if the parse of the expanded
+ // macro leaves unparsed tokens.
+ site_span: sp,
+ macro_ident: name,
+ lint_node_id: cx.current_expansion.lint_node_id,
+ is_trailing_mac: cx.current_expansion.is_trailing_mac,
+ arm_span,
+ is_local,
+ });
+ }
+ Failure(token, msg) => match best_failure {
+ Some((ref best_token, _)) if best_token.span.lo() >= token.span.lo() => {}
+ _ => best_failure = Some((token, msg)),
+ },
+ Error(err_sp, ref msg) => {
+ let span = err_sp.substitute_dummy(sp);
+ cx.struct_span_err(span, &msg).emit();
+ return DummyResult::any(span);
+ }
+ ErrorReported => return DummyResult::any(sp),
+ }
+
+ // The matcher was not `Success(..)`ful.
+ // Restore to the state before snapshotting and maybe try again.
+ mem::swap(&mut gated_spans_snapshot, &mut sess.gated_spans.spans.borrow_mut());
+ }
+ drop(parser);
+
+ let (token, label) = best_failure.expect("ran no matchers");
+ let span = token.span.substitute_dummy(sp);
+ let mut err = cx.struct_span_err(span, &parse_failure_msg(&token));
+ err.span_label(span, label);
+ if !def_span.is_dummy() && !cx.source_map().is_imported(def_span) {
+ err.span_label(cx.source_map().guess_head_span(def_span), "when calling this macro");
+ }
+ annotate_doc_comment(&mut err, sess.source_map(), span);
+ // Check whether there's a missing comma in this macro call, like `println!("{}" a);`
+ if let Some((arg, comma_span)) = arg.add_comma() {
+ for lhs in lhses {
+ let parser = parser_from_cx(sess, arg.clone());
+ if let Success(_) = tt_parser.parse_tt(&mut Cow::Borrowed(&parser), lhs) {
+ if comma_span.is_dummy() {
+ err.note("you might be missing a comma");
+ } else {
+ err.span_suggestion_short(
+ comma_span,
+ "missing comma here",
+ ", ",
+ Applicability::MachineApplicable,
+ );
+ }
+ }
+ }
+ }
+ err.emit();
+ cx.trace_macros_diag();
+ DummyResult::any(sp)
+}
+
+// Note that macro-by-example's input is also matched against a token tree:
+// $( $lhs:tt => $rhs:tt );+
+//
+// Holy self-referential!
+
+/// Converts a macro item into a syntax extension.
+pub fn compile_declarative_macro(
+ sess: &Session,
+ features: &Features,
+ def: &ast::Item,
+ edition: Edition,
+) -> (SyntaxExtension, Vec<(usize, Span)>) {
+ debug!("compile_declarative_macro: {:?}", def);
+ let mk_syn_ext = |expander| {
+ SyntaxExtension::new(
+ sess,
+ SyntaxExtensionKind::LegacyBang(expander),
+ def.span,
+ Vec::new(),
+ edition,
+ def.ident.name,
+ &def.attrs,
+ )
+ };
+ let dummy_syn_ext = || (mk_syn_ext(Box::new(macro_rules_dummy_expander)), Vec::new());
+
+ let diag = &sess.parse_sess.span_diagnostic;
+ let lhs_nm = Ident::new(sym::lhs, def.span);
+ let rhs_nm = Ident::new(sym::rhs, def.span);
+ let tt_spec = Some(NonterminalKind::TT);
+
+ // Parse the macro_rules! invocation
+ let (macro_rules, body) = match &def.kind {
+ ast::ItemKind::MacroDef(def) => (def.macro_rules, def.body.inner_tokens()),
+ _ => unreachable!(),
+ };
+
+ // The pattern that macro_rules matches.
+ // The grammar for macro_rules! is:
+ // $( $lhs:tt => $rhs:tt );+
+ // ...quasiquoting this would be nice.
+ // These spans won't matter, anyways
+ let argument_gram = vec![
+ mbe::TokenTree::Sequence(
+ DelimSpan::dummy(),
+ mbe::SequenceRepetition {
+ tts: vec![
+ mbe::TokenTree::MetaVarDecl(def.span, lhs_nm, tt_spec),
+ mbe::TokenTree::token(token::FatArrow, def.span),
+ mbe::TokenTree::MetaVarDecl(def.span, rhs_nm, tt_spec),
+ ],
+ separator: Some(Token::new(
+ if macro_rules { token::Semi } else { token::Comma },
+ def.span,
+ )),
+ kleene: mbe::KleeneToken::new(mbe::KleeneOp::OneOrMore, def.span),
+ num_captures: 2,
+ },
+ ),
+ // to phase into semicolon-termination instead of semicolon-separation
+ mbe::TokenTree::Sequence(
+ DelimSpan::dummy(),
+ mbe::SequenceRepetition {
+ tts: vec![mbe::TokenTree::token(
+ if macro_rules { token::Semi } else { token::Comma },
+ def.span,
+ )],
+ separator: None,
+ kleene: mbe::KleeneToken::new(mbe::KleeneOp::ZeroOrMore, def.span),
+ num_captures: 0,
+ },
+ ),
+ ];
+ // Convert it into `MatcherLoc` form.
+ let argument_gram = mbe::macro_parser::compute_locs(&argument_gram);
+
+ let parser = Parser::new(&sess.parse_sess, body, true, rustc_parse::MACRO_ARGUMENTS);
+ let mut tt_parser =
+ TtParser::new(Ident::with_dummy_span(if macro_rules { kw::MacroRules } else { kw::Macro }));
+ let argument_map = match tt_parser.parse_tt(&mut Cow::Borrowed(&parser), &argument_gram) {
+ Success(m) => m,
+ Failure(token, msg) => {
+ let s = parse_failure_msg(&token);
+ let sp = token.span.substitute_dummy(def.span);
+ let mut err = sess.parse_sess.span_diagnostic.struct_span_err(sp, &s);
+ err.span_label(sp, msg);
+ annotate_doc_comment(&mut err, sess.source_map(), sp);
+ err.emit();
+ return dummy_syn_ext();
+ }
+ Error(sp, msg) => {
+ sess.parse_sess
+ .span_diagnostic
+ .struct_span_err(sp.substitute_dummy(def.span), &msg)
+ .emit();
+ return dummy_syn_ext();
+ }
+ ErrorReported => {
+ return dummy_syn_ext();
+ }
+ };
+
+ let mut valid = true;
+
+ // Extract the arguments:
+ let lhses = match argument_map[&MacroRulesNormalizedIdent::new(lhs_nm)] {
+ MatchedSeq(ref s) => s
+ .iter()
+ .map(|m| {
+ if let MatchedTokenTree(ref tt) = *m {
+ let tt = mbe::quoted::parse(
+ TokenStream::new(vec![tt.clone()]),
+ true,
+ &sess.parse_sess,
+ def.id,
+ features,
+ edition,
+ )
+ .pop()
+ .unwrap();
+ valid &= check_lhs_nt_follows(&sess.parse_sess, &def, &tt);
+ return tt;
+ }
+ sess.parse_sess.span_diagnostic.span_bug(def.span, "wrong-structured lhs")
+ })
+ .collect::<Vec<mbe::TokenTree>>(),
+ _ => sess.parse_sess.span_diagnostic.span_bug(def.span, "wrong-structured lhs"),
+ };
+
+ let rhses = match argument_map[&MacroRulesNormalizedIdent::new(rhs_nm)] {
+ MatchedSeq(ref s) => s
+ .iter()
+ .map(|m| {
+ if let MatchedTokenTree(ref tt) = *m {
+ return mbe::quoted::parse(
+ TokenStream::new(vec![tt.clone()]),
+ false,
+ &sess.parse_sess,
+ def.id,
+ features,
+ edition,
+ )
+ .pop()
+ .unwrap();
+ }
+ sess.parse_sess.span_diagnostic.span_bug(def.span, "wrong-structured lhs")
+ })
+ .collect::<Vec<mbe::TokenTree>>(),
+ _ => sess.parse_sess.span_diagnostic.span_bug(def.span, "wrong-structured rhs"),
+ };
+
+ for rhs in &rhses {
+ valid &= check_rhs(&sess.parse_sess, rhs);
+ }
+
+ // don't abort iteration early, so that errors for multiple lhses can be reported
+ for lhs in &lhses {
+ valid &= check_lhs_no_empty_seq(&sess.parse_sess, slice::from_ref(lhs));
+ }
+
+ valid &= macro_check::check_meta_variables(&sess.parse_sess, def.id, def.span, &lhses, &rhses);
+
+ let (transparency, transparency_error) = attr::find_transparency(&def.attrs, macro_rules);
+ match transparency_error {
+ Some(TransparencyError::UnknownTransparency(value, span)) => {
+ diag.span_err(span, &format!("unknown macro transparency: `{}`", value));
+ }
+ Some(TransparencyError::MultipleTransparencyAttrs(old_span, new_span)) => {
+ diag.span_err(vec![old_span, new_span], "multiple macro transparency attributes");
+ }
+ None => {}
+ }
+
+ // Compute the spans of the macro rules for unused rule linting.
+ // To avoid warning noise, only consider the rules of this
+ // macro for the lint, if all rules are valid.
+ // Also, we are only interested in non-foreign macros.
+ let rule_spans = if valid && def.id != DUMMY_NODE_ID {
+ lhses
+ .iter()
+ .zip(rhses.iter())
+ .enumerate()
+ // If the rhs contains an invocation like compile_error!,
+ // don't consider the rule for the unused rule lint.
+ .filter(|(_idx, (_lhs, rhs))| !has_compile_error_macro(rhs))
+ // We only take the span of the lhs here,
+ // so that the spans of created warnings are smaller.
+ .map(|(idx, (lhs, _rhs))| (idx, lhs.span()))
+ .collect::<Vec<_>>()
+ } else {
+ Vec::new()
+ };
+
+ // Convert the lhses into `MatcherLoc` form, which is better for doing the
+ // actual matching. Unless the matcher is invalid.
+ let lhses = if valid {
+ lhses
+ .iter()
+ .map(|lhs| {
+ // Ignore the delimiters around the matcher.
+ match lhs {
+ mbe::TokenTree::Delimited(_, delimited) => {
+ mbe::macro_parser::compute_locs(&delimited.tts)
+ }
+ _ => sess.parse_sess.span_diagnostic.span_bug(def.span, "malformed macro lhs"),
+ }
+ })
+ .collect()
+ } else {
+ vec![]
+ };
+
+ let expander = Box::new(MacroRulesMacroExpander {
+ name: def.ident,
+ span: def.span,
+ node_id: def.id,
+ transparency,
+ lhses,
+ rhses,
+ valid,
+ });
+ (mk_syn_ext(expander), rule_spans)
+}
+
+#[derive(SessionSubdiagnostic)]
+enum ExplainDocComment {
+ #[label(expand::explain_doc_comment_inner)]
+ Inner {
+ #[primary_span]
+ span: Span,
+ },
+ #[label(expand::explain_doc_comment_outer)]
+ Outer {
+ #[primary_span]
+ span: Span,
+ },
+}
+
+fn annotate_doc_comment(
+ err: &mut DiagnosticBuilder<'_, ErrorGuaranteed>,
+ sm: &SourceMap,
+ span: Span,
+) {
+ if let Ok(src) = sm.span_to_snippet(span) {
+ if src.starts_with("///") || src.starts_with("/**") {
+ err.subdiagnostic(ExplainDocComment::Outer { span });
+ } else if src.starts_with("//!") || src.starts_with("/*!") {
+ err.subdiagnostic(ExplainDocComment::Inner { span });
+ }
+ }
+}
+
+fn check_lhs_nt_follows(sess: &ParseSess, def: &ast::Item, lhs: &mbe::TokenTree) -> bool {
+ // lhs is going to be like TokenTree::Delimited(...), where the
+ // entire lhs is those tts. Or, it can be a "bare sequence", not wrapped in parens.
+ if let mbe::TokenTree::Delimited(_, delimited) = lhs {
+ check_matcher(sess, def, &delimited.tts)
+ } else {
+ let msg = "invalid macro matcher; matchers must be contained in balanced delimiters";
+ sess.span_diagnostic.span_err(lhs.span(), msg);
+ false
+ }
+ // we don't abort on errors on rejection, the driver will do that for us
+ // after parsing/expansion. we can report every error in every macro this way.
+}
+
+/// Checks that the lhs contains no repetition which could match an empty token
+/// tree, because then the matcher would hang indefinitely.
+fn check_lhs_no_empty_seq(sess: &ParseSess, tts: &[mbe::TokenTree]) -> bool {
+ use mbe::TokenTree;
+ for tt in tts {
+ match *tt {
+ TokenTree::Token(..)
+ | TokenTree::MetaVar(..)
+ | TokenTree::MetaVarDecl(..)
+ | TokenTree::MetaVarExpr(..) => (),
+ TokenTree::Delimited(_, ref del) => {
+ if !check_lhs_no_empty_seq(sess, &del.tts) {
+ return false;
+ }
+ }
+ TokenTree::Sequence(span, ref seq) => {
+ if seq.separator.is_none()
+ && seq.tts.iter().all(|seq_tt| match *seq_tt {
+ TokenTree::MetaVarDecl(_, _, Some(NonterminalKind::Vis)) => true,
+ TokenTree::Sequence(_, ref sub_seq) => {
+ sub_seq.kleene.op == mbe::KleeneOp::ZeroOrMore
+ || sub_seq.kleene.op == mbe::KleeneOp::ZeroOrOne
+ }
+ _ => false,
+ })
+ {
+ let sp = span.entire();
+ sess.span_diagnostic.span_err(sp, "repetition matches empty token tree");
+ return false;
+ }
+ if !check_lhs_no_empty_seq(sess, &seq.tts) {
+ return false;
+ }
+ }
+ }
+ }
+
+ true
+}
+
+fn check_rhs(sess: &ParseSess, rhs: &mbe::TokenTree) -> bool {
+ match *rhs {
+ mbe::TokenTree::Delimited(..) => return true,
+ _ => {
+ sess.span_diagnostic.span_err(rhs.span(), "macro rhs must be delimited");
+ }
+ }
+ false
+}
+
+fn check_matcher(sess: &ParseSess, def: &ast::Item, matcher: &[mbe::TokenTree]) -> bool {
+ let first_sets = FirstSets::new(matcher);
+ let empty_suffix = TokenSet::empty();
+ let err = sess.span_diagnostic.err_count();
+ check_matcher_core(sess, def, &first_sets, matcher, &empty_suffix);
+ err == sess.span_diagnostic.err_count()
+}
+
+fn has_compile_error_macro(rhs: &mbe::TokenTree) -> bool {
+ match rhs {
+ mbe::TokenTree::Delimited(_sp, d) => {
+ let has_compile_error = d.tts.array_windows::<3>().any(|[ident, bang, args]| {
+ if let mbe::TokenTree::Token(ident) = ident &&
+ let TokenKind::Ident(ident, _) = ident.kind &&
+ ident == sym::compile_error &&
+ let mbe::TokenTree::Token(bang) = bang &&
+ let TokenKind::Not = bang.kind &&
+ let mbe::TokenTree::Delimited(_, del) = args &&
+ del.delim != Delimiter::Invisible
+ {
+ true
+ } else {
+ false
+ }
+ });
+ if has_compile_error { true } else { d.tts.iter().any(has_compile_error_macro) }
+ }
+ _ => false,
+ }
+}
+
+// `The FirstSets` for a matcher is a mapping from subsequences in the
+// matcher to the FIRST set for that subsequence.
+//
+// This mapping is partially precomputed via a backwards scan over the
+// token trees of the matcher, which provides a mapping from each
+// repetition sequence to its *first* set.
+//
+// (Hypothetically, sequences should be uniquely identifiable via their
+// spans, though perhaps that is false, e.g., for macro-generated macros
+// that do not try to inject artificial span information. My plan is
+// to try to catch such cases ahead of time and not include them in
+// the precomputed mapping.)
+struct FirstSets<'tt> {
+ // this maps each TokenTree::Sequence `$(tt ...) SEP OP` that is uniquely identified by its
+ // span in the original matcher to the First set for the inner sequence `tt ...`.
+ //
+ // If two sequences have the same span in a matcher, then map that
+ // span to None (invalidating the mapping here and forcing the code to
+ // use a slow path).
+ first: FxHashMap<Span, Option<TokenSet<'tt>>>,
+}
+
+impl<'tt> FirstSets<'tt> {
+ fn new(tts: &'tt [mbe::TokenTree]) -> FirstSets<'tt> {
+ use mbe::TokenTree;
+
+ let mut sets = FirstSets { first: FxHashMap::default() };
+ build_recur(&mut sets, tts);
+ return sets;
+
+ // walks backward over `tts`, returning the FIRST for `tts`
+ // and updating `sets` at the same time for all sequence
+ // substructure we find within `tts`.
+ fn build_recur<'tt>(sets: &mut FirstSets<'tt>, tts: &'tt [TokenTree]) -> TokenSet<'tt> {
+ let mut first = TokenSet::empty();
+ for tt in tts.iter().rev() {
+ match *tt {
+ TokenTree::Token(..)
+ | TokenTree::MetaVar(..)
+ | TokenTree::MetaVarDecl(..)
+ | TokenTree::MetaVarExpr(..) => {
+ first.replace_with(TtHandle::TtRef(tt));
+ }
+ TokenTree::Delimited(span, ref delimited) => {
+ build_recur(sets, &delimited.tts);
+ first.replace_with(TtHandle::from_token_kind(
+ token::OpenDelim(delimited.delim),
+ span.open,
+ ));
+ }
+ TokenTree::Sequence(sp, ref seq_rep) => {
+ let subfirst = build_recur(sets, &seq_rep.tts);
+
+ match sets.first.entry(sp.entire()) {
+ Entry::Vacant(vac) => {
+ vac.insert(Some(subfirst.clone()));
+ }
+ Entry::Occupied(mut occ) => {
+ // if there is already an entry, then a span must have collided.
+ // This should not happen with typical macro_rules macros,
+ // but syntax extensions need not maintain distinct spans,
+ // so distinct syntax trees can be assigned the same span.
+ // In such a case, the map cannot be trusted; so mark this
+ // entry as unusable.
+ occ.insert(None);
+ }
+ }
+
+ // If the sequence contents can be empty, then the first
+ // token could be the separator token itself.
+
+ if let (Some(sep), true) = (&seq_rep.separator, subfirst.maybe_empty) {
+ first.add_one_maybe(TtHandle::from_token(sep.clone()));
+ }
+
+ // Reverse scan: Sequence comes before `first`.
+ if subfirst.maybe_empty
+ || seq_rep.kleene.op == mbe::KleeneOp::ZeroOrMore
+ || seq_rep.kleene.op == mbe::KleeneOp::ZeroOrOne
+ {
+ // If sequence is potentially empty, then
+ // union them (preserving first emptiness).
+ first.add_all(&TokenSet { maybe_empty: true, ..subfirst });
+ } else {
+ // Otherwise, sequence guaranteed
+ // non-empty; replace first.
+ first = subfirst;
+ }
+ }
+ }
+ }
+
+ first
+ }
+ }
+
+ // walks forward over `tts` until all potential FIRST tokens are
+ // identified.
+ fn first(&self, tts: &'tt [mbe::TokenTree]) -> TokenSet<'tt> {
+ use mbe::TokenTree;
+
+ let mut first = TokenSet::empty();
+ for tt in tts.iter() {
+ assert!(first.maybe_empty);
+ match *tt {
+ TokenTree::Token(..)
+ | TokenTree::MetaVar(..)
+ | TokenTree::MetaVarDecl(..)
+ | TokenTree::MetaVarExpr(..) => {
+ first.add_one(TtHandle::TtRef(tt));
+ return first;
+ }
+ TokenTree::Delimited(span, ref delimited) => {
+ first.add_one(TtHandle::from_token_kind(
+ token::OpenDelim(delimited.delim),
+ span.open,
+ ));
+ return first;
+ }
+ TokenTree::Sequence(sp, ref seq_rep) => {
+ let subfirst_owned;
+ let subfirst = match self.first.get(&sp.entire()) {
+ Some(&Some(ref subfirst)) => subfirst,
+ Some(&None) => {
+ subfirst_owned = self.first(&seq_rep.tts);
+ &subfirst_owned
+ }
+ None => {
+ panic!("We missed a sequence during FirstSets construction");
+ }
+ };
+
+ // If the sequence contents can be empty, then the first
+ // token could be the separator token itself.
+ if let (Some(sep), true) = (&seq_rep.separator, subfirst.maybe_empty) {
+ first.add_one_maybe(TtHandle::from_token(sep.clone()));
+ }
+
+ assert!(first.maybe_empty);
+ first.add_all(subfirst);
+ if subfirst.maybe_empty
+ || seq_rep.kleene.op == mbe::KleeneOp::ZeroOrMore
+ || seq_rep.kleene.op == mbe::KleeneOp::ZeroOrOne
+ {
+ // Continue scanning for more first
+ // tokens, but also make sure we
+ // restore empty-tracking state.
+ first.maybe_empty = true;
+ continue;
+ } else {
+ return first;
+ }
+ }
+ }
+ }
+
+ // we only exit the loop if `tts` was empty or if every
+ // element of `tts` matches the empty sequence.
+ assert!(first.maybe_empty);
+ first
+ }
+}
+
+// Most `mbe::TokenTree`s are pre-existing in the matcher, but some are defined
+// implicitly, such as opening/closing delimiters and sequence repetition ops.
+// This type encapsulates both kinds. It implements `Clone` while avoiding the
+// need for `mbe::TokenTree` to implement `Clone`.
+#[derive(Debug)]
+enum TtHandle<'tt> {
+ /// This is used in most cases.
+ TtRef(&'tt mbe::TokenTree),
+
+ /// This is only used for implicit token trees. The `mbe::TokenTree` *must*
+ /// be `mbe::TokenTree::Token`. No other variants are allowed. We store an
+ /// `mbe::TokenTree` rather than a `Token` so that `get()` can return a
+ /// `&mbe::TokenTree`.
+ Token(mbe::TokenTree),
+}
+
+impl<'tt> TtHandle<'tt> {
+ fn from_token(tok: Token) -> Self {
+ TtHandle::Token(mbe::TokenTree::Token(tok))
+ }
+
+ fn from_token_kind(kind: TokenKind, span: Span) -> Self {
+ TtHandle::from_token(Token::new(kind, span))
+ }
+
+ // Get a reference to a token tree.
+ fn get(&'tt self) -> &'tt mbe::TokenTree {
+ match self {
+ TtHandle::TtRef(tt) => tt,
+ TtHandle::Token(token_tt) => &token_tt,
+ }
+ }
+}
+
+impl<'tt> PartialEq for TtHandle<'tt> {
+ fn eq(&self, other: &TtHandle<'tt>) -> bool {
+ self.get() == other.get()
+ }
+}
+
+impl<'tt> Clone for TtHandle<'tt> {
+ fn clone(&self) -> Self {
+ match self {
+ TtHandle::TtRef(tt) => TtHandle::TtRef(tt),
+
+ // This variant *must* contain a `mbe::TokenTree::Token`, and not
+ // any other variant of `mbe::TokenTree`.
+ TtHandle::Token(mbe::TokenTree::Token(tok)) => {
+ TtHandle::Token(mbe::TokenTree::Token(tok.clone()))
+ }
+
+ _ => unreachable!(),
+ }
+ }
+}
+
+// A set of `mbe::TokenTree`s, which may include `TokenTree::Match`s
+// (for macro-by-example syntactic variables). It also carries the
+// `maybe_empty` flag; that is true if and only if the matcher can
+// match an empty token sequence.
+//
+// The First set is computed on submatchers like `$($a:expr b),* $(c)* d`,
+// which has corresponding FIRST = {$a:expr, c, d}.
+// Likewise, `$($a:expr b),* $(c)+ d` has FIRST = {$a:expr, c}.
+//
+// (Notably, we must allow for *-op to occur zero times.)
+#[derive(Clone, Debug)]
+struct TokenSet<'tt> {
+ tokens: Vec<TtHandle<'tt>>,
+ maybe_empty: bool,
+}
+
+impl<'tt> TokenSet<'tt> {
+ // Returns a set for the empty sequence.
+ fn empty() -> Self {
+ TokenSet { tokens: Vec::new(), maybe_empty: true }
+ }
+
+ // Returns the set `{ tok }` for the single-token (and thus
+ // non-empty) sequence [tok].
+ fn singleton(tt: TtHandle<'tt>) -> Self {
+ TokenSet { tokens: vec![tt], maybe_empty: false }
+ }
+
+ // Changes self to be the set `{ tok }`.
+ // Since `tok` is always present, marks self as non-empty.
+ fn replace_with(&mut self, tt: TtHandle<'tt>) {
+ self.tokens.clear();
+ self.tokens.push(tt);
+ self.maybe_empty = false;
+ }
+
+ // Changes self to be the empty set `{}`; meant for use when
+ // the particular token does not matter, but we want to
+ // record that it occurs.
+ fn replace_with_irrelevant(&mut self) {
+ self.tokens.clear();
+ self.maybe_empty = false;
+ }
+
+ // Adds `tok` to the set for `self`, marking sequence as non-empy.
+ fn add_one(&mut self, tt: TtHandle<'tt>) {
+ if !self.tokens.contains(&tt) {
+ self.tokens.push(tt);
+ }
+ self.maybe_empty = false;
+ }
+
+ // Adds `tok` to the set for `self`. (Leaves `maybe_empty` flag alone.)
+ fn add_one_maybe(&mut self, tt: TtHandle<'tt>) {
+ if !self.tokens.contains(&tt) {
+ self.tokens.push(tt);
+ }
+ }
+
+ // Adds all elements of `other` to this.
+ //
+ // (Since this is a set, we filter out duplicates.)
+ //
+ // If `other` is potentially empty, then preserves the previous
+ // setting of the empty flag of `self`. If `other` is guaranteed
+ // non-empty, then `self` is marked non-empty.
+ fn add_all(&mut self, other: &Self) {
+ for tt in &other.tokens {
+ if !self.tokens.contains(tt) {
+ self.tokens.push(tt.clone());
+ }
+ }
+ if !other.maybe_empty {
+ self.maybe_empty = false;
+ }
+ }
+}
+
+// Checks that `matcher` is internally consistent and that it
+// can legally be followed by a token `N`, for all `N` in `follow`.
+// (If `follow` is empty, then it imposes no constraint on
+// the `matcher`.)
+//
+// Returns the set of NT tokens that could possibly come last in
+// `matcher`. (If `matcher` matches the empty sequence, then
+// `maybe_empty` will be set to true.)
+//
+// Requires that `first_sets` is pre-computed for `matcher`;
+// see `FirstSets::new`.
+fn check_matcher_core<'tt>(
+ sess: &ParseSess,
+ def: &ast::Item,
+ first_sets: &FirstSets<'tt>,
+ matcher: &'tt [mbe::TokenTree],
+ follow: &TokenSet<'tt>,
+) -> TokenSet<'tt> {
+ use mbe::TokenTree;
+
+ let mut last = TokenSet::empty();
+
+ // 2. For each token and suffix [T, SUFFIX] in M:
+ // ensure that T can be followed by SUFFIX, and if SUFFIX may be empty,
+ // then ensure T can also be followed by any element of FOLLOW.
+ 'each_token: for i in 0..matcher.len() {
+ let token = &matcher[i];
+ let suffix = &matcher[i + 1..];
+
+ let build_suffix_first = || {
+ let mut s = first_sets.first(suffix);
+ if s.maybe_empty {
+ s.add_all(follow);
+ }
+ s
+ };
+
+ // (we build `suffix_first` on demand below; you can tell
+ // which cases are supposed to fall through by looking for the
+ // initialization of this variable.)
+ let suffix_first;
+
+ // First, update `last` so that it corresponds to the set
+ // of NT tokens that might end the sequence `... token`.
+ match *token {
+ TokenTree::Token(..)
+ | TokenTree::MetaVar(..)
+ | TokenTree::MetaVarDecl(..)
+ | TokenTree::MetaVarExpr(..) => {
+ if token_can_be_followed_by_any(token) {
+ // don't need to track tokens that work with any,
+ last.replace_with_irrelevant();
+ // ... and don't need to check tokens that can be
+ // followed by anything against SUFFIX.
+ continue 'each_token;
+ } else {
+ last.replace_with(TtHandle::TtRef(token));
+ suffix_first = build_suffix_first();
+ }
+ }
+ TokenTree::Delimited(span, ref d) => {
+ let my_suffix = TokenSet::singleton(TtHandle::from_token_kind(
+ token::CloseDelim(d.delim),
+ span.close,
+ ));
+ check_matcher_core(sess, def, first_sets, &d.tts, &my_suffix);
+ // don't track non NT tokens
+ last.replace_with_irrelevant();
+
+ // also, we don't need to check delimited sequences
+ // against SUFFIX
+ continue 'each_token;
+ }
+ TokenTree::Sequence(_, ref seq_rep) => {
+ suffix_first = build_suffix_first();
+ // The trick here: when we check the interior, we want
+ // to include the separator (if any) as a potential
+ // (but not guaranteed) element of FOLLOW. So in that
+ // case, we make a temp copy of suffix and stuff
+ // delimiter in there.
+ //
+ // FIXME: Should I first scan suffix_first to see if
+ // delimiter is already in it before I go through the
+ // work of cloning it? But then again, this way I may
+ // get a "tighter" span?
+ let mut new;
+ let my_suffix = if let Some(sep) = &seq_rep.separator {
+ new = suffix_first.clone();
+ new.add_one_maybe(TtHandle::from_token(sep.clone()));
+ &new
+ } else {
+ &suffix_first
+ };
+
+ // At this point, `suffix_first` is built, and
+ // `my_suffix` is some TokenSet that we can use
+ // for checking the interior of `seq_rep`.
+ let next = check_matcher_core(sess, def, first_sets, &seq_rep.tts, my_suffix);
+ if next.maybe_empty {
+ last.add_all(&next);
+ } else {
+ last = next;
+ }
+
+ // the recursive call to check_matcher_core already ran the 'each_last
+ // check below, so we can just keep going forward here.
+ continue 'each_token;
+ }
+ }
+
+ // (`suffix_first` guaranteed initialized once reaching here.)
+
+ // Now `last` holds the complete set of NT tokens that could
+ // end the sequence before SUFFIX. Check that every one works with `suffix`.
+ for tt in &last.tokens {
+ if let &TokenTree::MetaVarDecl(span, name, Some(kind)) = tt.get() {
+ for next_token in &suffix_first.tokens {
+ let next_token = next_token.get();
+
+ // Check if the old pat is used and the next token is `|`
+ // to warn about incompatibility with Rust 2021.
+ // We only emit this lint if we're parsing the original
+ // definition of this macro_rules, not while (re)parsing
+ // the macro when compiling another crate that is using the
+ // macro. (See #86567.)
+ // Macros defined in the current crate have a real node id,
+ // whereas macros from an external crate have a dummy id.
+ if def.id != DUMMY_NODE_ID
+ && matches!(kind, NonterminalKind::PatParam { inferred: true })
+ && matches!(next_token, TokenTree::Token(token) if token.kind == BinOp(token::BinOpToken::Or))
+ {
+ // It is suggestion to use pat_param, for example: $x:pat -> $x:pat_param.
+ let suggestion = quoted_tt_to_string(&TokenTree::MetaVarDecl(
+ span,
+ name,
+ Some(NonterminalKind::PatParam { inferred: false }),
+ ));
+ sess.buffer_lint_with_diagnostic(
+ &RUST_2021_INCOMPATIBLE_OR_PATTERNS,
+ span,
+ ast::CRATE_NODE_ID,
+ "the meaning of the `pat` fragment specifier is changing in Rust 2021, which may affect this macro",
+ BuiltinLintDiagnostics::OrPatternsBackCompat(span, suggestion),
+ );
+ }
+ match is_in_follow(next_token, kind) {
+ IsInFollow::Yes => {}
+ IsInFollow::No(possible) => {
+ let may_be = if last.tokens.len() == 1 && suffix_first.tokens.len() == 1
+ {
+ "is"
+ } else {
+ "may be"
+ };
+
+ let sp = next_token.span();
+ let mut err = sess.span_diagnostic.struct_span_err(
+ sp,
+ &format!(
+ "`${name}:{frag}` {may_be} followed by `{next}`, which \
+ is not allowed for `{frag}` fragments",
+ name = name,
+ frag = kind,
+ next = quoted_tt_to_string(next_token),
+ may_be = may_be
+ ),
+ );
+ err.span_label(sp, format!("not allowed after `{}` fragments", kind));
+
+ if kind == NonterminalKind::PatWithOr
+ && sess.edition.rust_2021()
+ && next_token.is_token(&BinOp(token::BinOpToken::Or))
+ {
+ let suggestion = quoted_tt_to_string(&TokenTree::MetaVarDecl(
+ span,
+ name,
+ Some(NonterminalKind::PatParam { inferred: false }),
+ ));
+ err.span_suggestion(
+ span,
+ "try a `pat_param` fragment specifier instead",
+ suggestion,
+ Applicability::MaybeIncorrect,
+ );
+ }
+
+ let msg = "allowed there are: ";
+ match possible {
+ &[] => {}
+ &[t] => {
+ err.note(&format!(
+ "only {} is allowed after `{}` fragments",
+ t, kind,
+ ));
+ }
+ ts => {
+ err.note(&format!(
+ "{}{} or {}",
+ msg,
+ ts[..ts.len() - 1]
+ .iter()
+ .copied()
+ .collect::<Vec<_>>()
+ .join(", "),
+ ts[ts.len() - 1],
+ ));
+ }
+ }
+ err.emit();
+ }
+ }
+ }
+ }
+ }
+ }
+ last
+}
+
+fn token_can_be_followed_by_any(tok: &mbe::TokenTree) -> bool {
+ if let mbe::TokenTree::MetaVarDecl(_, _, Some(kind)) = *tok {
+ frag_can_be_followed_by_any(kind)
+ } else {
+ // (Non NT's can always be followed by anything in matchers.)
+ true
+ }
+}
+
+/// Returns `true` if a fragment of type `frag` can be followed by any sort of
+/// token. We use this (among other things) as a useful approximation
+/// for when `frag` can be followed by a repetition like `$(...)*` or
+/// `$(...)+`. In general, these can be a bit tricky to reason about,
+/// so we adopt a conservative position that says that any fragment
+/// specifier which consumes at most one token tree can be followed by
+/// a fragment specifier (indeed, these fragments can be followed by
+/// ANYTHING without fear of future compatibility hazards).
+fn frag_can_be_followed_by_any(kind: NonterminalKind) -> bool {
+ matches!(
+ kind,
+ NonterminalKind::Item // always terminated by `}` or `;`
+ | NonterminalKind::Block // exactly one token tree
+ | NonterminalKind::Ident // exactly one token tree
+ | NonterminalKind::Literal // exactly one token tree
+ | NonterminalKind::Meta // exactly one token tree
+ | NonterminalKind::Lifetime // exactly one token tree
+ | NonterminalKind::TT // exactly one token tree
+ )
+}
+
+enum IsInFollow {
+ Yes,
+ No(&'static [&'static str]),
+}
+
+/// Returns `true` if `frag` can legally be followed by the token `tok`. For
+/// fragments that can consume an unbounded number of tokens, `tok`
+/// must be within a well-defined follow set. This is intended to
+/// guarantee future compatibility: for example, without this rule, if
+/// we expanded `expr` to include a new binary operator, we might
+/// break macros that were relying on that binary operator as a
+/// separator.
+// when changing this do not forget to update doc/book/macros.md!
+fn is_in_follow(tok: &mbe::TokenTree, kind: NonterminalKind) -> IsInFollow {
+ use mbe::TokenTree;
+
+ if let TokenTree::Token(Token { kind: token::CloseDelim(_), .. }) = *tok {
+ // closing a token tree can never be matched by any fragment;
+ // iow, we always require that `(` and `)` match, etc.
+ IsInFollow::Yes
+ } else {
+ match kind {
+ NonterminalKind::Item => {
+ // since items *must* be followed by either a `;` or a `}`, we can
+ // accept anything after them
+ IsInFollow::Yes
+ }
+ NonterminalKind::Block => {
+ // anything can follow block, the braces provide an easy boundary to
+ // maintain
+ IsInFollow::Yes
+ }
+ NonterminalKind::Stmt | NonterminalKind::Expr => {
+ const TOKENS: &[&str] = &["`=>`", "`,`", "`;`"];
+ match tok {
+ TokenTree::Token(token) => match token.kind {
+ FatArrow | Comma | Semi => IsInFollow::Yes,
+ _ => IsInFollow::No(TOKENS),
+ },
+ _ => IsInFollow::No(TOKENS),
+ }
+ }
+ NonterminalKind::PatParam { .. } => {
+ const TOKENS: &[&str] = &["`=>`", "`,`", "`=`", "`|`", "`if`", "`in`"];
+ match tok {
+ TokenTree::Token(token) => match token.kind {
+ FatArrow | Comma | Eq | BinOp(token::Or) => IsInFollow::Yes,
+ Ident(name, false) if name == kw::If || name == kw::In => IsInFollow::Yes,
+ _ => IsInFollow::No(TOKENS),
+ },
+ _ => IsInFollow::No(TOKENS),
+ }
+ }
+ NonterminalKind::PatWithOr { .. } => {
+ const TOKENS: &[&str] = &["`=>`", "`,`", "`=`", "`if`", "`in`"];
+ match tok {
+ TokenTree::Token(token) => match token.kind {
+ FatArrow | Comma | Eq => IsInFollow::Yes,
+ Ident(name, false) if name == kw::If || name == kw::In => IsInFollow::Yes,
+ _ => IsInFollow::No(TOKENS),
+ },
+ _ => IsInFollow::No(TOKENS),
+ }
+ }
+ NonterminalKind::Path | NonterminalKind::Ty => {
+ const TOKENS: &[&str] = &[
+ "`{`", "`[`", "`=>`", "`,`", "`>`", "`=`", "`:`", "`;`", "`|`", "`as`",
+ "`where`",
+ ];
+ match tok {
+ TokenTree::Token(token) => match token.kind {
+ OpenDelim(Delimiter::Brace)
+ | OpenDelim(Delimiter::Bracket)
+ | Comma
+ | FatArrow
+ | Colon
+ | Eq
+ | Gt
+ | BinOp(token::Shr)
+ | Semi
+ | BinOp(token::Or) => IsInFollow::Yes,
+ Ident(name, false) if name == kw::As || name == kw::Where => {
+ IsInFollow::Yes
+ }
+ _ => IsInFollow::No(TOKENS),
+ },
+ TokenTree::MetaVarDecl(_, _, Some(NonterminalKind::Block)) => IsInFollow::Yes,
+ _ => IsInFollow::No(TOKENS),
+ }
+ }
+ NonterminalKind::Ident | NonterminalKind::Lifetime => {
+ // being a single token, idents and lifetimes are harmless
+ IsInFollow::Yes
+ }
+ NonterminalKind::Literal => {
+ // literals may be of a single token, or two tokens (negative numbers)
+ IsInFollow::Yes
+ }
+ NonterminalKind::Meta | NonterminalKind::TT => {
+ // being either a single token or a delimited sequence, tt is
+ // harmless
+ IsInFollow::Yes
+ }
+ NonterminalKind::Vis => {
+ // Explicitly disallow `priv`, on the off chance it comes back.
+ const TOKENS: &[&str] = &["`,`", "an ident", "a type"];
+ match tok {
+ TokenTree::Token(token) => match token.kind {
+ Comma => IsInFollow::Yes,
+ Ident(name, is_raw) if is_raw || name != kw::Priv => IsInFollow::Yes,
+ _ => {
+ if token.can_begin_type() {
+ IsInFollow::Yes
+ } else {
+ IsInFollow::No(TOKENS)
+ }
+ }
+ },
+ TokenTree::MetaVarDecl(
+ _,
+ _,
+ Some(NonterminalKind::Ident | NonterminalKind::Ty | NonterminalKind::Path),
+ ) => IsInFollow::Yes,
+ _ => IsInFollow::No(TOKENS),
+ }
+ }
+ }
+ }
+}
+
+fn quoted_tt_to_string(tt: &mbe::TokenTree) -> String {
+ match *tt {
+ mbe::TokenTree::Token(ref token) => pprust::token_to_string(&token).into(),
+ mbe::TokenTree::MetaVar(_, name) => format!("${}", name),
+ mbe::TokenTree::MetaVarDecl(_, name, Some(kind)) => format!("${}:{}", name, kind),
+ mbe::TokenTree::MetaVarDecl(_, name, None) => format!("${}:", name),
+ _ => panic!(
+ "{}",
+ "unexpected mbe::TokenTree::{Sequence or Delimited} \
+ in follow set checker"
+ ),
+ }
+}
+
+fn parser_from_cx(sess: &ParseSess, tts: TokenStream) -> Parser<'_> {
+ Parser::new(sess, tts, true, rustc_parse::MACRO_ARGUMENTS)
+}
+
+/// Generates an appropriate parsing failure message. For EOF, this is "unexpected end...". For
+/// other tokens, this is "unexpected token...".
+fn parse_failure_msg(tok: &Token) -> String {
+ match tok.kind {
+ token::Eof => "unexpected end of macro invocation".to_string(),
+ _ => format!("no rules expected the token `{}`", pprust::token_to_string(tok),),
+ }
+}
diff --git a/compiler/rustc_expand/src/mbe/metavar_expr.rs b/compiler/rustc_expand/src/mbe/metavar_expr.rs
new file mode 100644
index 000000000..fc808401a
--- /dev/null
+++ b/compiler/rustc_expand/src/mbe/metavar_expr.rs
@@ -0,0 +1,161 @@
+use rustc_ast::token::{self, Delimiter};
+use rustc_ast::tokenstream::{CursorRef, TokenStream, TokenTree};
+use rustc_ast::{LitIntType, LitKind};
+use rustc_ast_pretty::pprust;
+use rustc_errors::{Applicability, PResult};
+use rustc_session::parse::ParseSess;
+use rustc_span::symbol::Ident;
+use rustc_span::Span;
+
+/// A meta-variable expression, for expansions based on properties of meta-variables.
+#[derive(Debug, Clone, PartialEq, Encodable, Decodable)]
+pub(crate) enum MetaVarExpr {
+ /// The number of repetitions of an identifier, optionally limited to a number
+ /// of outer-most repetition depths. If the depth limit is `None` then the depth is unlimited.
+ Count(Ident, Option<usize>),
+
+ /// Ignore a meta-variable for repetition without expansion.
+ Ignore(Ident),
+
+ /// The index of the repetition at a particular depth, where 0 is the inner-most
+ /// repetition. The `usize` is the depth.
+ Index(usize),
+
+ /// The length of the repetition at a particular depth, where 0 is the inner-most
+ /// repetition. The `usize` is the depth.
+ Length(usize),
+}
+
+impl MetaVarExpr {
+ /// Attempt to parse a meta-variable expression from a token stream.
+ pub(crate) fn parse<'sess>(
+ input: &TokenStream,
+ outer_span: Span,
+ sess: &'sess ParseSess,
+ ) -> PResult<'sess, MetaVarExpr> {
+ let mut tts = input.trees();
+ let ident = parse_ident(&mut tts, sess, outer_span)?;
+ let Some(TokenTree::Delimited(_, Delimiter::Parenthesis, args)) = tts.next() else {
+ let msg = "meta-variable expression parameter must be wrapped in parentheses";
+ return Err(sess.span_diagnostic.struct_span_err(ident.span, msg));
+ };
+ check_trailing_token(&mut tts, sess)?;
+ let mut iter = args.trees();
+ let rslt = match &*ident.as_str() {
+ "count" => parse_count(&mut iter, sess, ident.span)?,
+ "ignore" => MetaVarExpr::Ignore(parse_ident(&mut iter, sess, ident.span)?),
+ "index" => MetaVarExpr::Index(parse_depth(&mut iter, sess, ident.span)?),
+ "length" => MetaVarExpr::Length(parse_depth(&mut iter, sess, ident.span)?),
+ _ => {
+ let err_msg = "unrecognized meta-variable expression";
+ let mut err = sess.span_diagnostic.struct_span_err(ident.span, err_msg);
+ err.span_suggestion(
+ ident.span,
+ "supported expressions are count, ignore, index and length",
+ "",
+ Applicability::MachineApplicable,
+ );
+ return Err(err);
+ }
+ };
+ check_trailing_token(&mut iter, sess)?;
+ Ok(rslt)
+ }
+
+ pub(crate) fn ident(&self) -> Option<Ident> {
+ match *self {
+ MetaVarExpr::Count(ident, _) | MetaVarExpr::Ignore(ident) => Some(ident),
+ MetaVarExpr::Index(..) | MetaVarExpr::Length(..) => None,
+ }
+ }
+}
+
+// Checks if there are any remaining tokens. For example, `${ignore(ident ... a b c ...)}`
+fn check_trailing_token<'sess>(
+ iter: &mut CursorRef<'_>,
+ sess: &'sess ParseSess,
+) -> PResult<'sess, ()> {
+ if let Some(tt) = iter.next() {
+ let mut diag = sess
+ .span_diagnostic
+ .struct_span_err(tt.span(), &format!("unexpected token: {}", pprust::tt_to_string(tt)));
+ diag.span_note(tt.span(), "meta-variable expression must not have trailing tokens");
+ Err(diag)
+ } else {
+ Ok(())
+ }
+}
+
+/// Parse a meta-variable `count` expression: `count(ident[, depth])`
+fn parse_count<'sess>(
+ iter: &mut CursorRef<'_>,
+ sess: &'sess ParseSess,
+ span: Span,
+) -> PResult<'sess, MetaVarExpr> {
+ let ident = parse_ident(iter, sess, span)?;
+ let depth = if try_eat_comma(iter) { Some(parse_depth(iter, sess, span)?) } else { None };
+ Ok(MetaVarExpr::Count(ident, depth))
+}
+
+/// Parses the depth used by index(depth) and length(depth).
+fn parse_depth<'sess>(
+ iter: &mut CursorRef<'_>,
+ sess: &'sess ParseSess,
+ span: Span,
+) -> PResult<'sess, usize> {
+ let Some(tt) = iter.next() else { return Ok(0) };
+ let TokenTree::Token(token::Token {
+ kind: token::TokenKind::Literal(lit), ..
+ }, _) = tt else {
+ return Err(sess.span_diagnostic.struct_span_err(
+ span,
+ "meta-variable expression depth must be a literal"
+ ));
+ };
+ if let Ok(lit_kind) = LitKind::from_lit_token(*lit)
+ && let LitKind::Int(n_u128, LitIntType::Unsuffixed) = lit_kind
+ && let Ok(n_usize) = usize::try_from(n_u128)
+ {
+ Ok(n_usize)
+ }
+ else {
+ let msg = "only unsuffixes integer literals are supported in meta-variable expressions";
+ Err(sess.span_diagnostic.struct_span_err(span, msg))
+ }
+}
+
+/// Parses an generic ident
+fn parse_ident<'sess>(
+ iter: &mut CursorRef<'_>,
+ sess: &'sess ParseSess,
+ span: Span,
+) -> PResult<'sess, Ident> {
+ if let Some(tt) = iter.next() && let TokenTree::Token(token, _) = tt {
+ if let Some((elem, false)) = token.ident() {
+ return Ok(elem);
+ }
+ let token_str = pprust::token_to_string(token);
+ let mut err = sess.span_diagnostic.struct_span_err(
+ span,
+ &format!("expected identifier, found `{}`", &token_str)
+ );
+ err.span_suggestion(
+ token.span,
+ &format!("try removing `{}`", &token_str),
+ "",
+ Applicability::MaybeIncorrect,
+ );
+ return Err(err);
+ }
+ Err(sess.span_diagnostic.struct_span_err(span, "expected identifier"))
+}
+
+/// Tries to move the iterator forward returning `true` if there is a comma. If not, then the
+/// iterator is not modified and the result is `false`.
+fn try_eat_comma(iter: &mut CursorRef<'_>) -> bool {
+ if let Some(TokenTree::Token(token::Token { kind: token::Comma, .. }, _)) = iter.look_ahead(0) {
+ let _ = iter.next();
+ return true;
+ }
+ false
+}
diff --git a/compiler/rustc_expand/src/mbe/quoted.rs b/compiler/rustc_expand/src/mbe/quoted.rs
new file mode 100644
index 000000000..ee17d54f6
--- /dev/null
+++ b/compiler/rustc_expand/src/mbe/quoted.rs
@@ -0,0 +1,366 @@
+use crate::mbe::macro_parser::count_metavar_decls;
+use crate::mbe::{Delimited, KleeneOp, KleeneToken, MetaVarExpr, SequenceRepetition, TokenTree};
+
+use rustc_ast::token::{self, Delimiter, Token};
+use rustc_ast::{tokenstream, NodeId};
+use rustc_ast_pretty::pprust;
+use rustc_feature::Features;
+use rustc_session::parse::{feature_err, ParseSess};
+use rustc_span::symbol::{kw, sym, Ident};
+
+use rustc_span::edition::Edition;
+use rustc_span::{Span, SyntaxContext};
+
+const VALID_FRAGMENT_NAMES_MSG: &str = "valid fragment specifiers are \
+ `ident`, `block`, `stmt`, `expr`, `pat`, `ty`, `lifetime`, \
+ `literal`, `path`, `meta`, `tt`, `item` and `vis`";
+
+/// Takes a `tokenstream::TokenStream` and returns a `Vec<self::TokenTree>`. Specifically, this
+/// takes a generic `TokenStream`, such as is used in the rest of the compiler, and returns a
+/// collection of `TokenTree` for use in parsing a macro.
+///
+/// # Parameters
+///
+/// - `input`: a token stream to read from, the contents of which we are parsing.
+/// - `parsing_patterns`: `parse` can be used to parse either the "patterns" or the "body" of a
+/// macro. Both take roughly the same form _except_ that:
+/// - In a pattern, metavars are declared with their "matcher" type. For example `$var:expr` or
+/// `$id:ident`. In this example, `expr` and `ident` are "matchers". They are not present in the
+/// body of a macro rule -- just in the pattern.
+/// - Metavariable expressions are only valid in the "body", not the "pattern".
+/// - `sess`: the parsing session. Any errors will be emitted to this session.
+/// - `node_id`: the NodeId of the macro we are parsing.
+/// - `features`: language features so we can do feature gating.
+///
+/// # Returns
+///
+/// A collection of `self::TokenTree`. There may also be some errors emitted to `sess`.
+pub(super) fn parse(
+ input: tokenstream::TokenStream,
+ parsing_patterns: bool,
+ sess: &ParseSess,
+ node_id: NodeId,
+ features: &Features,
+ edition: Edition,
+) -> Vec<TokenTree> {
+ // Will contain the final collection of `self::TokenTree`
+ let mut result = Vec::new();
+
+ // For each token tree in `input`, parse the token into a `self::TokenTree`, consuming
+ // additional trees if need be.
+ let mut trees = input.into_trees();
+ while let Some(tree) = trees.next() {
+ // Given the parsed tree, if there is a metavar and we are expecting matchers, actually
+ // parse out the matcher (i.e., in `$id:ident` this would parse the `:` and `ident`).
+ let tree = parse_tree(tree, &mut trees, parsing_patterns, sess, node_id, features, edition);
+ match tree {
+ TokenTree::MetaVar(start_sp, ident) if parsing_patterns => {
+ let span = match trees.next() {
+ Some(tokenstream::TokenTree::Token(Token { kind: token::Colon, span }, _)) => {
+ match trees.next() {
+ Some(tokenstream::TokenTree::Token(token, _)) => match token.ident() {
+ Some((frag, _)) => {
+ let span = token.span.with_lo(start_sp.lo());
+
+ let kind =
+ token::NonterminalKind::from_symbol(frag.name, || {
+ // FIXME(#85708) - once we properly decode a foreign
+ // crate's `SyntaxContext::root`, then we can replace
+ // this with just `span.edition()`. A
+ // `SyntaxContext::root()` from the current crate will
+ // have the edition of the current crate, and a
+ // `SyntaxContext::root()` from a foreign crate will
+ // have the edition of that crate (which we manually
+ // retrieve via the `edition` parameter).
+ if span.ctxt() == SyntaxContext::root() {
+ edition
+ } else {
+ span.edition()
+ }
+ })
+ .unwrap_or_else(
+ || {
+ let msg = format!(
+ "invalid fragment specifier `{}`",
+ frag.name
+ );
+ sess.span_diagnostic
+ .struct_span_err(span, &msg)
+ .help(VALID_FRAGMENT_NAMES_MSG)
+ .emit();
+ token::NonterminalKind::Ident
+ },
+ );
+ result.push(TokenTree::MetaVarDecl(span, ident, Some(kind)));
+ continue;
+ }
+ _ => token.span,
+ },
+ tree => tree.as_ref().map_or(span, tokenstream::TokenTree::span),
+ }
+ }
+ tree => tree.as_ref().map_or(start_sp, tokenstream::TokenTree::span),
+ };
+
+ result.push(TokenTree::MetaVarDecl(span, ident, None));
+ }
+
+ // Not a metavar or no matchers allowed, so just return the tree
+ _ => result.push(tree),
+ }
+ }
+ result
+}
+
+/// Asks for the `macro_metavar_expr` feature if it is not already declared
+fn maybe_emit_macro_metavar_expr_feature(features: &Features, sess: &ParseSess, span: Span) {
+ if !features.macro_metavar_expr {
+ let msg = "meta-variable expressions are unstable";
+ feature_err(&sess, sym::macro_metavar_expr, span, msg).emit();
+ }
+}
+
+/// Takes a `tokenstream::TokenTree` and returns a `self::TokenTree`. Specifically, this takes a
+/// generic `TokenTree`, such as is used in the rest of the compiler, and returns a `TokenTree`
+/// for use in parsing a macro.
+///
+/// Converting the given tree may involve reading more tokens.
+///
+/// # Parameters
+///
+/// - `tree`: the tree we wish to convert.
+/// - `outer_trees`: an iterator over trees. We may need to read more tokens from it in order to finish
+/// converting `tree`
+/// - `parsing_patterns`: same as [parse].
+/// - `sess`: the parsing session. Any errors will be emitted to this session.
+/// - `features`: language features so we can do feature gating.
+fn parse_tree(
+ tree: tokenstream::TokenTree,
+ outer_trees: &mut impl Iterator<Item = tokenstream::TokenTree>,
+ parsing_patterns: bool,
+ sess: &ParseSess,
+ node_id: NodeId,
+ features: &Features,
+ edition: Edition,
+) -> TokenTree {
+ // Depending on what `tree` is, we could be parsing different parts of a macro
+ match tree {
+ // `tree` is a `$` token. Look at the next token in `trees`
+ tokenstream::TokenTree::Token(Token { kind: token::Dollar, span }, _) => {
+ // FIXME: Handle `Invisible`-delimited groups in a more systematic way
+ // during parsing.
+ let mut next = outer_trees.next();
+ let mut trees: Box<dyn Iterator<Item = tokenstream::TokenTree>>;
+ if let Some(tokenstream::TokenTree::Delimited(_, Delimiter::Invisible, tts)) = next {
+ trees = Box::new(tts.into_trees());
+ next = trees.next();
+ } else {
+ trees = Box::new(outer_trees);
+ }
+
+ match next {
+ // `tree` is followed by a delimited set of token trees.
+ Some(tokenstream::TokenTree::Delimited(delim_span, delim, tts)) => {
+ if parsing_patterns {
+ if delim != Delimiter::Parenthesis {
+ span_dollar_dollar_or_metavar_in_the_lhs_err(
+ sess,
+ &Token { kind: token::OpenDelim(delim), span: delim_span.entire() },
+ );
+ }
+ } else {
+ match delim {
+ Delimiter::Brace => {
+ // The delimiter is `{`. This indicates the beginning
+ // of a meta-variable expression (e.g. `${count(ident)}`).
+ // Try to parse the meta-variable expression.
+ match MetaVarExpr::parse(&tts, delim_span.entire(), sess) {
+ Err(mut err) => {
+ err.emit();
+ // Returns early the same read `$` to avoid spanning
+ // unrelated diagnostics that could be performed afterwards
+ return TokenTree::token(token::Dollar, span);
+ }
+ Ok(elem) => {
+ maybe_emit_macro_metavar_expr_feature(
+ features,
+ sess,
+ delim_span.entire(),
+ );
+ return TokenTree::MetaVarExpr(delim_span, elem);
+ }
+ }
+ }
+ Delimiter::Parenthesis => {}
+ _ => {
+ let tok = pprust::token_kind_to_string(&token::OpenDelim(delim));
+ let msg = format!("expected `(` or `{{`, found `{}`", tok);
+ sess.span_diagnostic.span_err(delim_span.entire(), &msg);
+ }
+ }
+ }
+ // If we didn't find a metavar expression above, then we must have a
+ // repetition sequence in the macro (e.g. `$(pat)*`). Parse the
+ // contents of the sequence itself
+ let sequence = parse(tts, parsing_patterns, sess, node_id, features, edition);
+ // Get the Kleene operator and optional separator
+ let (separator, kleene) =
+ parse_sep_and_kleene_op(&mut trees, delim_span.entire(), sess);
+ // Count the number of captured "names" (i.e., named metavars)
+ let num_captures =
+ if parsing_patterns { count_metavar_decls(&sequence) } else { 0 };
+ TokenTree::Sequence(
+ delim_span,
+ SequenceRepetition { tts: sequence, separator, kleene, num_captures },
+ )
+ }
+
+ // `tree` is followed by an `ident`. This could be `$meta_var` or the `$crate`
+ // special metavariable that names the crate of the invocation.
+ Some(tokenstream::TokenTree::Token(token, _)) if token.is_ident() => {
+ let (ident, is_raw) = token.ident().unwrap();
+ let span = ident.span.with_lo(span.lo());
+ if ident.name == kw::Crate && !is_raw {
+ TokenTree::token(token::Ident(kw::DollarCrate, is_raw), span)
+ } else {
+ TokenTree::MetaVar(span, ident)
+ }
+ }
+
+ // `tree` is followed by another `$`. This is an escaped `$`.
+ Some(tokenstream::TokenTree::Token(Token { kind: token::Dollar, span }, _)) => {
+ if parsing_patterns {
+ span_dollar_dollar_or_metavar_in_the_lhs_err(
+ sess,
+ &Token { kind: token::Dollar, span },
+ );
+ } else {
+ maybe_emit_macro_metavar_expr_feature(features, sess, span);
+ }
+ TokenTree::token(token::Dollar, span)
+ }
+
+ // `tree` is followed by some other token. This is an error.
+ Some(tokenstream::TokenTree::Token(token, _)) => {
+ let msg = format!(
+ "expected identifier, found `{}`",
+ pprust::token_to_string(&token),
+ );
+ sess.span_diagnostic.span_err(token.span, &msg);
+ TokenTree::MetaVar(token.span, Ident::empty())
+ }
+
+ // There are no more tokens. Just return the `$` we already have.
+ None => TokenTree::token(token::Dollar, span),
+ }
+ }
+
+ // `tree` is an arbitrary token. Keep it.
+ tokenstream::TokenTree::Token(token, _) => TokenTree::Token(token),
+
+ // `tree` is the beginning of a delimited set of tokens (e.g., `(` or `{`). We need to
+ // descend into the delimited set and further parse it.
+ tokenstream::TokenTree::Delimited(span, delim, tts) => TokenTree::Delimited(
+ span,
+ Delimited {
+ delim,
+ tts: parse(tts, parsing_patterns, sess, node_id, features, edition),
+ },
+ ),
+ }
+}
+
+/// Takes a token and returns `Some(KleeneOp)` if the token is `+` `*` or `?`. Otherwise, return
+/// `None`.
+fn kleene_op(token: &Token) -> Option<KleeneOp> {
+ match token.kind {
+ token::BinOp(token::Star) => Some(KleeneOp::ZeroOrMore),
+ token::BinOp(token::Plus) => Some(KleeneOp::OneOrMore),
+ token::Question => Some(KleeneOp::ZeroOrOne),
+ _ => None,
+ }
+}
+
+/// Parse the next token tree of the input looking for a KleeneOp. Returns
+///
+/// - Ok(Ok((op, span))) if the next token tree is a KleeneOp
+/// - Ok(Err(tok, span)) if the next token tree is a token but not a KleeneOp
+/// - Err(span) if the next token tree is not a token
+fn parse_kleene_op(
+ input: &mut impl Iterator<Item = tokenstream::TokenTree>,
+ span: Span,
+) -> Result<Result<(KleeneOp, Span), Token>, Span> {
+ match input.next() {
+ Some(tokenstream::TokenTree::Token(token, _)) => match kleene_op(&token) {
+ Some(op) => Ok(Ok((op, token.span))),
+ None => Ok(Err(token)),
+ },
+ tree => Err(tree.as_ref().map_or(span, tokenstream::TokenTree::span)),
+ }
+}
+
+/// Attempt to parse a single Kleene star, possibly with a separator.
+///
+/// For example, in a pattern such as `$(a),*`, `a` is the pattern to be repeated, `,` is the
+/// separator, and `*` is the Kleene operator. This function is specifically concerned with parsing
+/// the last two tokens of such a pattern: namely, the optional separator and the Kleene operator
+/// itself. Note that here we are parsing the _macro_ itself, rather than trying to match some
+/// stream of tokens in an invocation of a macro.
+///
+/// This function will take some input iterator `input` corresponding to `span` and a parsing
+/// session `sess`. If the next one (or possibly two) tokens in `input` correspond to a Kleene
+/// operator and separator, then a tuple with `(separator, KleeneOp)` is returned. Otherwise, an
+/// error with the appropriate span is emitted to `sess` and a dummy value is returned.
+fn parse_sep_and_kleene_op(
+ input: &mut impl Iterator<Item = tokenstream::TokenTree>,
+ span: Span,
+ sess: &ParseSess,
+) -> (Option<Token>, KleeneToken) {
+ // We basically look at two token trees here, denoted as #1 and #2 below
+ let span = match parse_kleene_op(input, span) {
+ // #1 is a `?`, `+`, or `*` KleeneOp
+ Ok(Ok((op, span))) => return (None, KleeneToken::new(op, span)),
+
+ // #1 is a separator followed by #2, a KleeneOp
+ Ok(Err(token)) => match parse_kleene_op(input, token.span) {
+ // #2 is the `?` Kleene op, which does not take a separator (error)
+ Ok(Ok((KleeneOp::ZeroOrOne, span))) => {
+ // Error!
+ sess.span_diagnostic.span_err(
+ token.span,
+ "the `?` macro repetition operator does not take a separator",
+ );
+
+ // Return a dummy
+ return (None, KleeneToken::new(KleeneOp::ZeroOrMore, span));
+ }
+
+ // #2 is a KleeneOp :D
+ Ok(Ok((op, span))) => return (Some(token), KleeneToken::new(op, span)),
+
+ // #2 is a random token or not a token at all :(
+ Ok(Err(Token { span, .. })) | Err(span) => span,
+ },
+
+ // #1 is not a token
+ Err(span) => span,
+ };
+
+ // If we ever get to this point, we have experienced an "unexpected token" error
+ sess.span_diagnostic.span_err(span, "expected one of: `*`, `+`, or `?`");
+
+ // Return a dummy
+ (None, KleeneToken::new(KleeneOp::ZeroOrMore, span))
+}
+
+// `$$` or a meta-variable is the lhs of a macro but shouldn't.
+//
+// For example, `macro_rules! foo { ( ${length()} ) => {} }`
+fn span_dollar_dollar_or_metavar_in_the_lhs_err<'sess>(sess: &'sess ParseSess, token: &Token) {
+ sess.span_diagnostic
+ .span_err(token.span, &format!("unexpected token: {}", pprust::token_to_string(token)));
+ sess.span_diagnostic.span_note_without_error(
+ token.span,
+ "`$$` and meta-variable expressions are not allowed inside macro parameter definitions",
+ );
+}
diff --git a/compiler/rustc_expand/src/mbe/transcribe.rs b/compiler/rustc_expand/src/mbe/transcribe.rs
new file mode 100644
index 000000000..e47ea83ac
--- /dev/null
+++ b/compiler/rustc_expand/src/mbe/transcribe.rs
@@ -0,0 +1,580 @@
+use crate::base::ExtCtxt;
+use crate::mbe::macro_parser::{MatchedNonterminal, MatchedSeq, MatchedTokenTree, NamedMatch};
+use crate::mbe::{self, MetaVarExpr};
+use rustc_ast::mut_visit::{self, MutVisitor};
+use rustc_ast::token::{self, Delimiter, Token, TokenKind};
+use rustc_ast::tokenstream::{DelimSpan, Spacing, TokenStream, TokenTree};
+use rustc_data_structures::fx::FxHashMap;
+use rustc_errors::{pluralize, PResult};
+use rustc_errors::{DiagnosticBuilder, ErrorGuaranteed};
+use rustc_span::hygiene::{LocalExpnId, Transparency};
+use rustc_span::symbol::{sym, Ident, MacroRulesNormalizedIdent};
+use rustc_span::Span;
+
+use smallvec::{smallvec, SmallVec};
+use std::mem;
+
+// A Marker adds the given mark to the syntax context.
+struct Marker(LocalExpnId, Transparency);
+
+impl MutVisitor for Marker {
+ const VISIT_TOKENS: bool = true;
+
+ fn visit_span(&mut self, span: &mut Span) {
+ *span = span.apply_mark(self.0.to_expn_id(), self.1)
+ }
+}
+
+/// An iterator over the token trees in a delimited token tree (`{ ... }`) or a sequence (`$(...)`).
+enum Frame<'a> {
+ Delimited { tts: &'a [mbe::TokenTree], idx: usize, delim: Delimiter, span: DelimSpan },
+ Sequence { tts: &'a [mbe::TokenTree], idx: usize, sep: Option<Token> },
+}
+
+impl<'a> Frame<'a> {
+ /// Construct a new frame around the delimited set of tokens.
+ fn new(src: &'a mbe::Delimited, span: DelimSpan) -> Frame<'a> {
+ Frame::Delimited { tts: &src.tts, idx: 0, delim: src.delim, span }
+ }
+}
+
+impl<'a> Iterator for Frame<'a> {
+ type Item = &'a mbe::TokenTree;
+
+ fn next(&mut self) -> Option<&'a mbe::TokenTree> {
+ match self {
+ Frame::Delimited { tts, ref mut idx, .. }
+ | Frame::Sequence { tts, ref mut idx, .. } => {
+ let res = tts.get(*idx);
+ *idx += 1;
+ res
+ }
+ }
+ }
+}
+
+/// This can do Macro-By-Example transcription.
+/// - `interp` is a map of meta-variables to the tokens (non-terminals) they matched in the
+/// invocation. We are assuming we already know there is a match.
+/// - `src` is the RHS of the MBE, that is, the "example" we are filling in.
+///
+/// For example,
+///
+/// ```rust
+/// macro_rules! foo {
+/// ($id:ident) => { println!("{}", stringify!($id)); }
+/// }
+///
+/// foo!(bar);
+/// ```
+///
+/// `interp` would contain `$id => bar` and `src` would contain `println!("{}", stringify!($id));`.
+///
+/// `transcribe` would return a `TokenStream` containing `println!("{}", stringify!(bar));`.
+///
+/// Along the way, we do some additional error checking.
+pub(super) fn transcribe<'a>(
+ cx: &ExtCtxt<'a>,
+ interp: &FxHashMap<MacroRulesNormalizedIdent, NamedMatch>,
+ src: &mbe::Delimited,
+ src_span: DelimSpan,
+ transparency: Transparency,
+) -> PResult<'a, TokenStream> {
+ // Nothing for us to transcribe...
+ if src.tts.is_empty() {
+ return Ok(TokenStream::default());
+ }
+
+ // We descend into the RHS (`src`), expanding things as we go. This stack contains the things
+ // we have yet to expand/are still expanding. We start the stack off with the whole RHS.
+ let mut stack: SmallVec<[Frame<'_>; 1]> = smallvec![Frame::new(&src, src_span)];
+
+ // As we descend in the RHS, we will need to be able to match nested sequences of matchers.
+ // `repeats` keeps track of where we are in matching at each level, with the last element being
+ // the most deeply nested sequence. This is used as a stack.
+ let mut repeats = Vec::new();
+
+ // `result` contains resulting token stream from the TokenTree we just finished processing. At
+ // the end, this will contain the full result of transcription, but at arbitrary points during
+ // `transcribe`, `result` will contain subsets of the final result.
+ //
+ // Specifically, as we descend into each TokenTree, we will push the existing results onto the
+ // `result_stack` and clear `results`. We will then produce the results of transcribing the
+ // TokenTree into `results`. Then, as we unwind back out of the `TokenTree`, we will pop the
+ // `result_stack` and append `results` too it to produce the new `results` up to that point.
+ //
+ // Thus, if we try to pop the `result_stack` and it is empty, we have reached the top-level
+ // again, and we are done transcribing.
+ let mut result: Vec<TokenTree> = Vec::new();
+ let mut result_stack = Vec::new();
+ let mut marker = Marker(cx.current_expansion.id, transparency);
+
+ loop {
+ // Look at the last frame on the stack.
+ // If it still has a TokenTree we have not looked at yet, use that tree.
+ let Some(tree) = stack.last_mut().unwrap().next() else {
+ // This else-case never produces a value for `tree` (it `continue`s or `return`s).
+
+ // Otherwise, if we have just reached the end of a sequence and we can keep repeating,
+ // go back to the beginning of the sequence.
+ if let Frame::Sequence { idx, sep, .. } = stack.last_mut().unwrap() {
+ let (repeat_idx, repeat_len) = repeats.last_mut().unwrap();
+ *repeat_idx += 1;
+ if repeat_idx < repeat_len {
+ *idx = 0;
+ if let Some(sep) = sep {
+ result.push(TokenTree::Token(sep.clone(), Spacing::Alone));
+ }
+ continue;
+ }
+ }
+
+ // We are done with the top of the stack. Pop it. Depending on what it was, we do
+ // different things. Note that the outermost item must be the delimited, wrapped RHS
+ // that was passed in originally to `transcribe`.
+ match stack.pop().unwrap() {
+ // Done with a sequence. Pop from repeats.
+ Frame::Sequence { .. } => {
+ repeats.pop();
+ }
+
+ // We are done processing a Delimited. If this is the top-level delimited, we are
+ // done. Otherwise, we unwind the result_stack to append what we have produced to
+ // any previous results.
+ Frame::Delimited { delim, span, .. } => {
+ if result_stack.is_empty() {
+ // No results left to compute! We are back at the top-level.
+ return Ok(TokenStream::new(result));
+ }
+
+ // Step back into the parent Delimited.
+ let tree = TokenTree::Delimited(span, delim, TokenStream::new(result));
+ result = result_stack.pop().unwrap();
+ result.push(tree);
+ }
+ }
+ continue;
+ };
+
+ // At this point, we know we are in the middle of a TokenTree (the last one on `stack`).
+ // `tree` contains the next `TokenTree` to be processed.
+ match tree {
+ // We are descending into a sequence. We first make sure that the matchers in the RHS
+ // and the matches in `interp` have the same shape. Otherwise, either the caller or the
+ // macro writer has made a mistake.
+ seq @ mbe::TokenTree::Sequence(_, delimited) => {
+ match lockstep_iter_size(&seq, interp, &repeats) {
+ LockstepIterSize::Unconstrained => {
+ return Err(cx.struct_span_err(
+ seq.span(), /* blame macro writer */
+ "attempted to repeat an expression containing no syntax variables \
+ matched as repeating at this depth",
+ ));
+ }
+
+ LockstepIterSize::Contradiction(msg) => {
+ // FIXME: this really ought to be caught at macro definition time... It
+ // happens when two meta-variables are used in the same repetition in a
+ // sequence, but they come from different sequence matchers and repeat
+ // different amounts.
+ return Err(cx.struct_span_err(seq.span(), &msg));
+ }
+
+ LockstepIterSize::Constraint(len, _) => {
+ // We do this to avoid an extra clone above. We know that this is a
+ // sequence already.
+ let mbe::TokenTree::Sequence(sp, seq) = seq else {
+ unreachable!()
+ };
+
+ // Is the repetition empty?
+ if len == 0 {
+ if seq.kleene.op == mbe::KleeneOp::OneOrMore {
+ // FIXME: this really ought to be caught at macro definition
+ // time... It happens when the Kleene operator in the matcher and
+ // the body for the same meta-variable do not match.
+ return Err(cx.struct_span_err(
+ sp.entire(),
+ "this must repeat at least once",
+ ));
+ }
+ } else {
+ // 0 is the initial counter (we have done 0 repetitions so far). `len`
+ // is the total number of repetitions we should generate.
+ repeats.push((0, len));
+
+ // The first time we encounter the sequence we push it to the stack. It
+ // then gets reused (see the beginning of the loop) until we are done
+ // repeating.
+ stack.push(Frame::Sequence {
+ idx: 0,
+ sep: seq.separator.clone(),
+ tts: &delimited.tts,
+ });
+ }
+ }
+ }
+ }
+
+ // Replace the meta-var with the matched token tree from the invocation.
+ mbe::TokenTree::MetaVar(mut sp, mut original_ident) => {
+ // Find the matched nonterminal from the macro invocation, and use it to replace
+ // the meta-var.
+ let ident = MacroRulesNormalizedIdent::new(original_ident);
+ if let Some(cur_matched) = lookup_cur_matched(ident, interp, &repeats) {
+ match cur_matched {
+ MatchedTokenTree(ref tt) => {
+ // `tt`s are emitted into the output stream directly as "raw tokens",
+ // without wrapping them into groups.
+ let token = tt.clone();
+ result.push(token);
+ }
+ MatchedNonterminal(ref nt) => {
+ // Other variables are emitted into the output stream as groups with
+ // `Delimiter::Invisible` to maintain parsing priorities.
+ // `Interpolated` is currently used for such groups in rustc parser.
+ marker.visit_span(&mut sp);
+ let token = TokenTree::token_alone(token::Interpolated(nt.clone()), sp);
+ result.push(token);
+ }
+ MatchedSeq(..) => {
+ // We were unable to descend far enough. This is an error.
+ return Err(cx.struct_span_err(
+ sp, /* blame the macro writer */
+ &format!("variable '{}' is still repeating at this depth", ident),
+ ));
+ }
+ }
+ } else {
+ // If we aren't able to match the meta-var, we push it back into the result but
+ // with modified syntax context. (I believe this supports nested macros).
+ marker.visit_span(&mut sp);
+ marker.visit_ident(&mut original_ident);
+ result.push(TokenTree::token_alone(token::Dollar, sp));
+ result.push(TokenTree::Token(
+ Token::from_ast_ident(original_ident),
+ Spacing::Alone,
+ ));
+ }
+ }
+
+ // Replace meta-variable expressions with the result of their expansion.
+ mbe::TokenTree::MetaVarExpr(sp, expr) => {
+ transcribe_metavar_expr(cx, expr, interp, &mut marker, &repeats, &mut result, &sp)?;
+ }
+
+ // If we are entering a new delimiter, we push its contents to the `stack` to be
+ // processed, and we push all of the currently produced results to the `result_stack`.
+ // We will produce all of the results of the inside of the `Delimited` and then we will
+ // jump back out of the Delimited, pop the result_stack and add the new results back to
+ // the previous results (from outside the Delimited).
+ mbe::TokenTree::Delimited(mut span, delimited) => {
+ mut_visit::visit_delim_span(&mut span, &mut marker);
+ stack.push(Frame::Delimited {
+ tts: &delimited.tts,
+ delim: delimited.delim,
+ idx: 0,
+ span,
+ });
+ result_stack.push(mem::take(&mut result));
+ }
+
+ // Nothing much to do here. Just push the token to the result, being careful to
+ // preserve syntax context.
+ mbe::TokenTree::Token(token) => {
+ let mut token = token.clone();
+ mut_visit::visit_token(&mut token, &mut marker);
+ let tt = TokenTree::Token(token, Spacing::Alone);
+ result.push(tt);
+ }
+
+ // There should be no meta-var declarations in the invocation of a macro.
+ mbe::TokenTree::MetaVarDecl(..) => panic!("unexpected `TokenTree::MetaVarDecl"),
+ }
+ }
+}
+
+/// Lookup the meta-var named `ident` and return the matched token tree from the invocation using
+/// the set of matches `interpolations`.
+///
+/// See the definition of `repeats` in the `transcribe` function. `repeats` is used to descend
+/// into the right place in nested matchers. If we attempt to descend too far, the macro writer has
+/// made a mistake, and we return `None`.
+fn lookup_cur_matched<'a>(
+ ident: MacroRulesNormalizedIdent,
+ interpolations: &'a FxHashMap<MacroRulesNormalizedIdent, NamedMatch>,
+ repeats: &[(usize, usize)],
+) -> Option<&'a NamedMatch> {
+ interpolations.get(&ident).map(|matched| {
+ let mut matched = matched;
+ for &(idx, _) in repeats {
+ match matched {
+ MatchedTokenTree(_) | MatchedNonterminal(_) => break,
+ MatchedSeq(ref ads) => matched = ads.get(idx).unwrap(),
+ }
+ }
+
+ matched
+ })
+}
+
+/// An accumulator over a TokenTree to be used with `fold`. During transcription, we need to make
+/// sure that the size of each sequence and all of its nested sequences are the same as the sizes
+/// of all the matched (nested) sequences in the macro invocation. If they don't match, somebody
+/// has made a mistake (either the macro writer or caller).
+#[derive(Clone)]
+enum LockstepIterSize {
+ /// No constraints on length of matcher. This is true for any TokenTree variants except a
+ /// `MetaVar` with an actual `MatchedSeq` (as opposed to a `MatchedNonterminal`).
+ Unconstrained,
+
+ /// A `MetaVar` with an actual `MatchedSeq`. The length of the match and the name of the
+ /// meta-var are returned.
+ Constraint(usize, MacroRulesNormalizedIdent),
+
+ /// Two `Constraint`s on the same sequence had different lengths. This is an error.
+ Contradiction(String),
+}
+
+impl LockstepIterSize {
+ /// Find incompatibilities in matcher/invocation sizes.
+ /// - `Unconstrained` is compatible with everything.
+ /// - `Contradiction` is incompatible with everything.
+ /// - `Constraint(len)` is only compatible with other constraints of the same length.
+ fn with(self, other: LockstepIterSize) -> LockstepIterSize {
+ match self {
+ LockstepIterSize::Unconstrained => other,
+ LockstepIterSize::Contradiction(_) => self,
+ LockstepIterSize::Constraint(l_len, ref l_id) => match other {
+ LockstepIterSize::Unconstrained => self,
+ LockstepIterSize::Contradiction(_) => other,
+ LockstepIterSize::Constraint(r_len, _) if l_len == r_len => self,
+ LockstepIterSize::Constraint(r_len, r_id) => {
+ let msg = format!(
+ "meta-variable `{}` repeats {} time{}, but `{}` repeats {} time{}",
+ l_id,
+ l_len,
+ pluralize!(l_len),
+ r_id,
+ r_len,
+ pluralize!(r_len),
+ );
+ LockstepIterSize::Contradiction(msg)
+ }
+ },
+ }
+ }
+}
+
+/// Given a `tree`, make sure that all sequences have the same length as the matches for the
+/// appropriate meta-vars in `interpolations`.
+///
+/// Note that if `repeats` does not match the exact correct depth of a meta-var,
+/// `lookup_cur_matched` will return `None`, which is why this still works even in the presence of
+/// multiple nested matcher sequences.
+///
+/// Example: `$($($x $y)+*);+` -- we need to make sure that `x` and `y` repeat the same amount as
+/// each other at the given depth when the macro was invoked. If they don't it might mean they were
+/// declared at unequal depths or there was a compile bug. For example, if we have 3 repetitions of
+/// the outer sequence and 4 repetitions of the inner sequence for `x`, we should have the same for
+/// `y`; otherwise, we can't transcribe them both at the given depth.
+fn lockstep_iter_size(
+ tree: &mbe::TokenTree,
+ interpolations: &FxHashMap<MacroRulesNormalizedIdent, NamedMatch>,
+ repeats: &[(usize, usize)],
+) -> LockstepIterSize {
+ use mbe::TokenTree;
+ match *tree {
+ TokenTree::Delimited(_, ref delimited) => {
+ delimited.tts.iter().fold(LockstepIterSize::Unconstrained, |size, tt| {
+ size.with(lockstep_iter_size(tt, interpolations, repeats))
+ })
+ }
+ TokenTree::Sequence(_, ref seq) => {
+ seq.tts.iter().fold(LockstepIterSize::Unconstrained, |size, tt| {
+ size.with(lockstep_iter_size(tt, interpolations, repeats))
+ })
+ }
+ TokenTree::MetaVar(_, name) | TokenTree::MetaVarDecl(_, name, _) => {
+ let name = MacroRulesNormalizedIdent::new(name);
+ match lookup_cur_matched(name, interpolations, repeats) {
+ Some(matched) => match matched {
+ MatchedTokenTree(_) | MatchedNonterminal(_) => LockstepIterSize::Unconstrained,
+ MatchedSeq(ref ads) => LockstepIterSize::Constraint(ads.len(), name),
+ },
+ _ => LockstepIterSize::Unconstrained,
+ }
+ }
+ TokenTree::MetaVarExpr(_, ref expr) => {
+ let default_rslt = LockstepIterSize::Unconstrained;
+ let Some(ident) = expr.ident() else { return default_rslt; };
+ let name = MacroRulesNormalizedIdent::new(ident);
+ match lookup_cur_matched(name, interpolations, repeats) {
+ Some(MatchedSeq(ref ads)) => {
+ default_rslt.with(LockstepIterSize::Constraint(ads.len(), name))
+ }
+ _ => default_rslt,
+ }
+ }
+ TokenTree::Token(..) => LockstepIterSize::Unconstrained,
+ }
+}
+
+/// Used solely by the `count` meta-variable expression, counts the outer-most repetitions at a
+/// given optional nested depth.
+///
+/// For example, a macro parameter of `$( { $( $foo:ident ),* } )*` called with `{ a, b } { c }`:
+///
+/// * `[ $( ${count(foo)} ),* ]` will return [2, 1] with a, b = 2 and c = 1
+/// * `[ $( ${count(foo, 0)} ),* ]` will be the same as `[ $( ${count(foo)} ),* ]`
+/// * `[ $( ${count(foo, 1)} ),* ]` will return an error because `${count(foo, 1)}` is
+/// declared inside a single repetition and the index `1` implies two nested repetitions.
+fn count_repetitions<'a>(
+ cx: &ExtCtxt<'a>,
+ depth_opt: Option<usize>,
+ mut matched: &NamedMatch,
+ repeats: &[(usize, usize)],
+ sp: &DelimSpan,
+) -> PResult<'a, usize> {
+ // Recursively count the number of matches in `matched` at given depth
+ // (or at the top-level of `matched` if no depth is given).
+ fn count<'a>(
+ cx: &ExtCtxt<'a>,
+ declared_lhs_depth: usize,
+ depth_opt: Option<usize>,
+ matched: &NamedMatch,
+ sp: &DelimSpan,
+ ) -> PResult<'a, usize> {
+ match matched {
+ MatchedTokenTree(_) | MatchedNonterminal(_) => {
+ if declared_lhs_depth == 0 {
+ return Err(cx.struct_span_err(
+ sp.entire(),
+ "`count` can not be placed inside the inner-most repetition",
+ ));
+ }
+ match depth_opt {
+ None => Ok(1),
+ Some(_) => Err(out_of_bounds_err(cx, declared_lhs_depth, sp.entire(), "count")),
+ }
+ }
+ MatchedSeq(ref named_matches) => {
+ let new_declared_lhs_depth = declared_lhs_depth + 1;
+ match depth_opt {
+ None => named_matches
+ .iter()
+ .map(|elem| count(cx, new_declared_lhs_depth, None, elem, sp))
+ .sum(),
+ Some(0) => Ok(named_matches.len()),
+ Some(depth) => named_matches
+ .iter()
+ .map(|elem| count(cx, new_declared_lhs_depth, Some(depth - 1), elem, sp))
+ .sum(),
+ }
+ }
+ }
+ }
+ // `repeats` records all of the nested levels at which we are currently
+ // matching meta-variables. The meta-var-expr `count($x)` only counts
+ // matches that occur in this "subtree" of the `NamedMatch` where we
+ // are currently transcribing, so we need to descend to that subtree
+ // before we start counting. `matched` contains the various levels of the
+ // tree as we descend, and its final value is the subtree we are currently at.
+ for &(idx, _) in repeats {
+ if let MatchedSeq(ref ads) = matched {
+ matched = &ads[idx];
+ }
+ }
+ count(cx, 0, depth_opt, matched, sp)
+}
+
+/// Returns a `NamedMatch` item declared on the LHS given an arbitrary [Ident]
+fn matched_from_ident<'ctx, 'interp, 'rslt>(
+ cx: &ExtCtxt<'ctx>,
+ ident: Ident,
+ interp: &'interp FxHashMap<MacroRulesNormalizedIdent, NamedMatch>,
+) -> PResult<'ctx, &'rslt NamedMatch>
+where
+ 'interp: 'rslt,
+{
+ let span = ident.span;
+ let key = MacroRulesNormalizedIdent::new(ident);
+ interp.get(&key).ok_or_else(|| {
+ cx.struct_span_err(
+ span,
+ &format!("variable `{}` is not recognized in meta-variable expression", key),
+ )
+ })
+}
+
+/// Used by meta-variable expressions when an user input is out of the actual declared bounds. For
+/// example, index(999999) in an repetition of only three elements.
+fn out_of_bounds_err<'a>(
+ cx: &ExtCtxt<'a>,
+ max: usize,
+ span: Span,
+ ty: &str,
+) -> DiagnosticBuilder<'a, ErrorGuaranteed> {
+ let msg = if max == 0 {
+ format!(
+ "meta-variable expression `{ty}` with depth parameter \
+ must be called inside of a macro repetition"
+ )
+ } else {
+ format!(
+ "depth parameter on meta-variable expression `{ty}` \
+ must be less than {max}"
+ )
+ };
+ cx.struct_span_err(span, &msg)
+}
+
+fn transcribe_metavar_expr<'a>(
+ cx: &ExtCtxt<'a>,
+ expr: &MetaVarExpr,
+ interp: &FxHashMap<MacroRulesNormalizedIdent, NamedMatch>,
+ marker: &mut Marker,
+ repeats: &[(usize, usize)],
+ result: &mut Vec<TokenTree>,
+ sp: &DelimSpan,
+) -> PResult<'a, ()> {
+ let mut visited_span = || {
+ let mut span = sp.entire();
+ marker.visit_span(&mut span);
+ span
+ };
+ match *expr {
+ MetaVarExpr::Count(original_ident, depth_opt) => {
+ let matched = matched_from_ident(cx, original_ident, interp)?;
+ let count = count_repetitions(cx, depth_opt, matched, &repeats, sp)?;
+ let tt = TokenTree::token_alone(
+ TokenKind::lit(token::Integer, sym::integer(count), None),
+ visited_span(),
+ );
+ result.push(tt);
+ }
+ MetaVarExpr::Ignore(original_ident) => {
+ // Used to ensure that `original_ident` is present in the LHS
+ let _ = matched_from_ident(cx, original_ident, interp)?;
+ }
+ MetaVarExpr::Index(depth) => match repeats.iter().nth_back(depth) {
+ Some((index, _)) => {
+ result.push(TokenTree::token_alone(
+ TokenKind::lit(token::Integer, sym::integer(*index), None),
+ visited_span(),
+ ));
+ }
+ None => return Err(out_of_bounds_err(cx, repeats.len(), sp.entire(), "index")),
+ },
+ MetaVarExpr::Length(depth) => match repeats.iter().nth_back(depth) {
+ Some((_, length)) => {
+ result.push(TokenTree::token_alone(
+ TokenKind::lit(token::Integer, sym::integer(*length), None),
+ visited_span(),
+ ));
+ }
+ None => return Err(out_of_bounds_err(cx, repeats.len(), sp.entire(), "length")),
+ },
+ }
+ Ok(())
+}