diff options
author | Daniel Baumann <daniel.baumann@progress-linux.org> | 2024-04-17 12:02:58 +0000 |
---|---|---|
committer | Daniel Baumann <daniel.baumann@progress-linux.org> | 2024-04-17 12:02:58 +0000 |
commit | 698f8c2f01ea549d77d7dc3338a12e04c11057b9 (patch) | |
tree | 173a775858bd501c378080a10dca74132f05bc50 /compiler/rustc_expand | |
parent | Initial commit. (diff) | |
download | rustc-698f8c2f01ea549d77d7dc3338a12e04c11057b9.tar.xz rustc-698f8c2f01ea549d77d7dc3338a12e04c11057b9.zip |
Adding upstream version 1.64.0+dfsg1.upstream/1.64.0+dfsg1
Signed-off-by: Daniel Baumann <daniel.baumann@progress-linux.org>
Diffstat (limited to 'compiler/rustc_expand')
21 files changed, 11730 insertions, 0 deletions
diff --git a/compiler/rustc_expand/Cargo.toml b/compiler/rustc_expand/Cargo.toml new file mode 100644 index 000000000..4ee7b6c42 --- /dev/null +++ b/compiler/rustc_expand/Cargo.toml @@ -0,0 +1,27 @@ +[package] +name = "rustc_expand" +version = "0.0.0" +edition = "2021" +build = false + +[lib] +doctest = false + +[dependencies] +rustc_serialize = { path = "../rustc_serialize" } +tracing = "0.1" +rustc_span = { path = "../rustc_span" } +rustc_ast_pretty = { path = "../rustc_ast_pretty" } +rustc_ast_passes = { path = "../rustc_ast_passes" } +rustc_attr = { path = "../rustc_attr" } +rustc_data_structures = { path = "../rustc_data_structures" } +rustc_errors = { path = "../rustc_errors" } +rustc_feature = { path = "../rustc_feature" } +rustc_lint_defs = { path = "../rustc_lint_defs" } +rustc_macros = { path = "../rustc_macros" } +rustc_lexer = { path = "../rustc_lexer" } +rustc_parse = { path = "../rustc_parse" } +rustc_session = { path = "../rustc_session" } +smallvec = { version = "1.8.1", features = ["union", "may_dangle"] } +rustc_ast = { path = "../rustc_ast" } +crossbeam-channel = "0.5.0" diff --git a/compiler/rustc_expand/src/base.rs b/compiler/rustc_expand/src/base.rs new file mode 100644 index 000000000..6e093811f --- /dev/null +++ b/compiler/rustc_expand/src/base.rs @@ -0,0 +1,1444 @@ +use crate::expand::{self, AstFragment, Invocation}; +use crate::module::DirOwnership; + +use rustc_ast::attr::MarkedAttrs; +use rustc_ast::ptr::P; +use rustc_ast::token::{self, Nonterminal}; +use rustc_ast::tokenstream::TokenStream; +use rustc_ast::visit::{AssocCtxt, Visitor}; +use rustc_ast::{self as ast, Attribute, HasAttrs, Item, NodeId, PatKind}; +use rustc_attr::{self as attr, Deprecation, Stability}; +use rustc_data_structures::fx::{FxHashSet, FxIndexMap}; +use rustc_data_structures::sync::{self, Lrc}; +use rustc_errors::{Applicability, DiagnosticBuilder, ErrorGuaranteed, MultiSpan, PResult}; +use rustc_lint_defs::builtin::PROC_MACRO_BACK_COMPAT; +use rustc_lint_defs::{BufferedEarlyLint, BuiltinLintDiagnostics}; +use rustc_parse::{self, parser, MACRO_ARGUMENTS}; +use rustc_session::{parse::ParseSess, Limit, Session, SessionDiagnostic}; +use rustc_span::def_id::{CrateNum, DefId, LocalDefId}; +use rustc_span::edition::Edition; +use rustc_span::hygiene::{AstPass, ExpnData, ExpnKind, LocalExpnId}; +use rustc_span::source_map::SourceMap; +use rustc_span::symbol::{kw, sym, Ident, Symbol}; +use rustc_span::{FileName, Span, DUMMY_SP}; +use smallvec::{smallvec, SmallVec}; + +use std::default::Default; +use std::iter; +use std::path::PathBuf; +use std::rc::Rc; + +pub(crate) use rustc_span::hygiene::MacroKind; + +// When adding new variants, make sure to +// adjust the `visit_*` / `flat_map_*` calls in `InvocationCollector` +// to use `assign_id!` +#[derive(Debug, Clone)] +pub enum Annotatable { + Item(P<ast::Item>), + TraitItem(P<ast::AssocItem>), + ImplItem(P<ast::AssocItem>), + ForeignItem(P<ast::ForeignItem>), + Stmt(P<ast::Stmt>), + Expr(P<ast::Expr>), + Arm(ast::Arm), + ExprField(ast::ExprField), + PatField(ast::PatField), + GenericParam(ast::GenericParam), + Param(ast::Param), + FieldDef(ast::FieldDef), + Variant(ast::Variant), + Crate(ast::Crate), +} + +impl Annotatable { + pub fn span(&self) -> Span { + match *self { + Annotatable::Item(ref item) => item.span, + Annotatable::TraitItem(ref trait_item) => trait_item.span, + Annotatable::ImplItem(ref impl_item) => impl_item.span, + Annotatable::ForeignItem(ref foreign_item) => foreign_item.span, + Annotatable::Stmt(ref stmt) => stmt.span, + Annotatable::Expr(ref expr) => expr.span, + Annotatable::Arm(ref arm) => arm.span, + Annotatable::ExprField(ref field) => field.span, + Annotatable::PatField(ref fp) => fp.pat.span, + Annotatable::GenericParam(ref gp) => gp.ident.span, + Annotatable::Param(ref p) => p.span, + Annotatable::FieldDef(ref sf) => sf.span, + Annotatable::Variant(ref v) => v.span, + Annotatable::Crate(ref c) => c.spans.inner_span, + } + } + + pub fn visit_attrs(&mut self, f: impl FnOnce(&mut Vec<Attribute>)) { + match self { + Annotatable::Item(item) => item.visit_attrs(f), + Annotatable::TraitItem(trait_item) => trait_item.visit_attrs(f), + Annotatable::ImplItem(impl_item) => impl_item.visit_attrs(f), + Annotatable::ForeignItem(foreign_item) => foreign_item.visit_attrs(f), + Annotatable::Stmt(stmt) => stmt.visit_attrs(f), + Annotatable::Expr(expr) => expr.visit_attrs(f), + Annotatable::Arm(arm) => arm.visit_attrs(f), + Annotatable::ExprField(field) => field.visit_attrs(f), + Annotatable::PatField(fp) => fp.visit_attrs(f), + Annotatable::GenericParam(gp) => gp.visit_attrs(f), + Annotatable::Param(p) => p.visit_attrs(f), + Annotatable::FieldDef(sf) => sf.visit_attrs(f), + Annotatable::Variant(v) => v.visit_attrs(f), + Annotatable::Crate(c) => c.visit_attrs(f), + } + } + + pub fn visit_with<'a, V: Visitor<'a>>(&'a self, visitor: &mut V) { + match self { + Annotatable::Item(item) => visitor.visit_item(item), + Annotatable::TraitItem(item) => visitor.visit_assoc_item(item, AssocCtxt::Trait), + Annotatable::ImplItem(item) => visitor.visit_assoc_item(item, AssocCtxt::Impl), + Annotatable::ForeignItem(foreign_item) => visitor.visit_foreign_item(foreign_item), + Annotatable::Stmt(stmt) => visitor.visit_stmt(stmt), + Annotatable::Expr(expr) => visitor.visit_expr(expr), + Annotatable::Arm(arm) => visitor.visit_arm(arm), + Annotatable::ExprField(field) => visitor.visit_expr_field(field), + Annotatable::PatField(fp) => visitor.visit_pat_field(fp), + Annotatable::GenericParam(gp) => visitor.visit_generic_param(gp), + Annotatable::Param(p) => visitor.visit_param(p), + Annotatable::FieldDef(sf) => visitor.visit_field_def(sf), + Annotatable::Variant(v) => visitor.visit_variant(v), + Annotatable::Crate(c) => visitor.visit_crate(c), + } + } + + pub fn to_tokens(&self) -> TokenStream { + match self { + Annotatable::Item(node) => TokenStream::from_ast(node), + Annotatable::TraitItem(node) | Annotatable::ImplItem(node) => { + TokenStream::from_ast(node) + } + Annotatable::ForeignItem(node) => TokenStream::from_ast(node), + Annotatable::Stmt(node) => { + assert!(!matches!(node.kind, ast::StmtKind::Empty)); + TokenStream::from_ast(node) + } + Annotatable::Expr(node) => TokenStream::from_ast(node), + Annotatable::Arm(..) + | Annotatable::ExprField(..) + | Annotatable::PatField(..) + | Annotatable::GenericParam(..) + | Annotatable::Param(..) + | Annotatable::FieldDef(..) + | Annotatable::Variant(..) + | Annotatable::Crate(..) => panic!("unexpected annotatable"), + } + } + + pub fn expect_item(self) -> P<ast::Item> { + match self { + Annotatable::Item(i) => i, + _ => panic!("expected Item"), + } + } + + pub fn expect_trait_item(self) -> P<ast::AssocItem> { + match self { + Annotatable::TraitItem(i) => i, + _ => panic!("expected Item"), + } + } + + pub fn expect_impl_item(self) -> P<ast::AssocItem> { + match self { + Annotatable::ImplItem(i) => i, + _ => panic!("expected Item"), + } + } + + pub fn expect_foreign_item(self) -> P<ast::ForeignItem> { + match self { + Annotatable::ForeignItem(i) => i, + _ => panic!("expected foreign item"), + } + } + + pub fn expect_stmt(self) -> ast::Stmt { + match self { + Annotatable::Stmt(stmt) => stmt.into_inner(), + _ => panic!("expected statement"), + } + } + + pub fn expect_expr(self) -> P<ast::Expr> { + match self { + Annotatable::Expr(expr) => expr, + _ => panic!("expected expression"), + } + } + + pub fn expect_arm(self) -> ast::Arm { + match self { + Annotatable::Arm(arm) => arm, + _ => panic!("expected match arm"), + } + } + + pub fn expect_expr_field(self) -> ast::ExprField { + match self { + Annotatable::ExprField(field) => field, + _ => panic!("expected field"), + } + } + + pub fn expect_pat_field(self) -> ast::PatField { + match self { + Annotatable::PatField(fp) => fp, + _ => panic!("expected field pattern"), + } + } + + pub fn expect_generic_param(self) -> ast::GenericParam { + match self { + Annotatable::GenericParam(gp) => gp, + _ => panic!("expected generic parameter"), + } + } + + pub fn expect_param(self) -> ast::Param { + match self { + Annotatable::Param(param) => param, + _ => panic!("expected parameter"), + } + } + + pub fn expect_field_def(self) -> ast::FieldDef { + match self { + Annotatable::FieldDef(sf) => sf, + _ => panic!("expected struct field"), + } + } + + pub fn expect_variant(self) -> ast::Variant { + match self { + Annotatable::Variant(v) => v, + _ => panic!("expected variant"), + } + } + + pub fn expect_crate(self) -> ast::Crate { + match self { + Annotatable::Crate(krate) => krate, + _ => panic!("expected krate"), + } + } +} + +/// Result of an expansion that may need to be retried. +/// Consider using this for non-`MultiItemModifier` expanders as well. +pub enum ExpandResult<T, U> { + /// Expansion produced a result (possibly dummy). + Ready(T), + /// Expansion could not produce a result and needs to be retried. + Retry(U), +} + +// `meta_item` is the attribute, and `item` is the item being modified. +pub trait MultiItemModifier { + fn expand( + &self, + ecx: &mut ExtCtxt<'_>, + span: Span, + meta_item: &ast::MetaItem, + item: Annotatable, + ) -> ExpandResult<Vec<Annotatable>, Annotatable>; +} + +impl<F> MultiItemModifier for F +where + F: Fn(&mut ExtCtxt<'_>, Span, &ast::MetaItem, Annotatable) -> Vec<Annotatable>, +{ + fn expand( + &self, + ecx: &mut ExtCtxt<'_>, + span: Span, + meta_item: &ast::MetaItem, + item: Annotatable, + ) -> ExpandResult<Vec<Annotatable>, Annotatable> { + ExpandResult::Ready(self(ecx, span, meta_item, item)) + } +} + +pub trait BangProcMacro { + fn expand<'cx>( + &self, + ecx: &'cx mut ExtCtxt<'_>, + span: Span, + ts: TokenStream, + ) -> Result<TokenStream, ErrorGuaranteed>; +} + +impl<F> BangProcMacro for F +where + F: Fn(TokenStream) -> TokenStream, +{ + fn expand<'cx>( + &self, + _ecx: &'cx mut ExtCtxt<'_>, + _span: Span, + ts: TokenStream, + ) -> Result<TokenStream, ErrorGuaranteed> { + // FIXME setup implicit context in TLS before calling self. + Ok(self(ts)) + } +} + +pub trait AttrProcMacro { + fn expand<'cx>( + &self, + ecx: &'cx mut ExtCtxt<'_>, + span: Span, + annotation: TokenStream, + annotated: TokenStream, + ) -> Result<TokenStream, ErrorGuaranteed>; +} + +impl<F> AttrProcMacro for F +where + F: Fn(TokenStream, TokenStream) -> TokenStream, +{ + fn expand<'cx>( + &self, + _ecx: &'cx mut ExtCtxt<'_>, + _span: Span, + annotation: TokenStream, + annotated: TokenStream, + ) -> Result<TokenStream, ErrorGuaranteed> { + // FIXME setup implicit context in TLS before calling self. + Ok(self(annotation, annotated)) + } +} + +/// Represents a thing that maps token trees to Macro Results +pub trait TTMacroExpander { + fn expand<'cx>( + &self, + ecx: &'cx mut ExtCtxt<'_>, + span: Span, + input: TokenStream, + ) -> Box<dyn MacResult + 'cx>; +} + +pub type MacroExpanderFn = + for<'cx> fn(&'cx mut ExtCtxt<'_>, Span, TokenStream) -> Box<dyn MacResult + 'cx>; + +impl<F> TTMacroExpander for F +where + F: for<'cx> Fn(&'cx mut ExtCtxt<'_>, Span, TokenStream) -> Box<dyn MacResult + 'cx>, +{ + fn expand<'cx>( + &self, + ecx: &'cx mut ExtCtxt<'_>, + span: Span, + input: TokenStream, + ) -> Box<dyn MacResult + 'cx> { + self(ecx, span, input) + } +} + +// Use a macro because forwarding to a simple function has type system issues +macro_rules! make_stmts_default { + ($me:expr) => { + $me.make_expr().map(|e| { + smallvec![ast::Stmt { + id: ast::DUMMY_NODE_ID, + span: e.span, + kind: ast::StmtKind::Expr(e), + }] + }) + }; +} + +/// The result of a macro expansion. The return values of the various +/// methods are spliced into the AST at the callsite of the macro. +pub trait MacResult { + /// Creates an expression. + fn make_expr(self: Box<Self>) -> Option<P<ast::Expr>> { + None + } + + /// Creates zero or more items. + fn make_items(self: Box<Self>) -> Option<SmallVec<[P<ast::Item>; 1]>> { + None + } + + /// Creates zero or more impl items. + fn make_impl_items(self: Box<Self>) -> Option<SmallVec<[P<ast::AssocItem>; 1]>> { + None + } + + /// Creates zero or more trait items. + fn make_trait_items(self: Box<Self>) -> Option<SmallVec<[P<ast::AssocItem>; 1]>> { + None + } + + /// Creates zero or more items in an `extern {}` block + fn make_foreign_items(self: Box<Self>) -> Option<SmallVec<[P<ast::ForeignItem>; 1]>> { + None + } + + /// Creates a pattern. + fn make_pat(self: Box<Self>) -> Option<P<ast::Pat>> { + None + } + + /// Creates zero or more statements. + /// + /// By default this attempts to create an expression statement, + /// returning None if that fails. + fn make_stmts(self: Box<Self>) -> Option<SmallVec<[ast::Stmt; 1]>> { + make_stmts_default!(self) + } + + fn make_ty(self: Box<Self>) -> Option<P<ast::Ty>> { + None + } + + fn make_arms(self: Box<Self>) -> Option<SmallVec<[ast::Arm; 1]>> { + None + } + + fn make_expr_fields(self: Box<Self>) -> Option<SmallVec<[ast::ExprField; 1]>> { + None + } + + fn make_pat_fields(self: Box<Self>) -> Option<SmallVec<[ast::PatField; 1]>> { + None + } + + fn make_generic_params(self: Box<Self>) -> Option<SmallVec<[ast::GenericParam; 1]>> { + None + } + + fn make_params(self: Box<Self>) -> Option<SmallVec<[ast::Param; 1]>> { + None + } + + fn make_field_defs(self: Box<Self>) -> Option<SmallVec<[ast::FieldDef; 1]>> { + None + } + + fn make_variants(self: Box<Self>) -> Option<SmallVec<[ast::Variant; 1]>> { + None + } + + fn make_crate(self: Box<Self>) -> Option<ast::Crate> { + // Fn-like macros cannot produce a crate. + unreachable!() + } +} + +macro_rules! make_MacEager { + ( $( $fld:ident: $t:ty, )* ) => { + /// `MacResult` implementation for the common case where you've already + /// built each form of AST that you might return. + #[derive(Default)] + pub struct MacEager { + $( + pub $fld: Option<$t>, + )* + } + + impl MacEager { + $( + pub fn $fld(v: $t) -> Box<dyn MacResult> { + Box::new(MacEager { + $fld: Some(v), + ..Default::default() + }) + } + )* + } + } +} + +make_MacEager! { + expr: P<ast::Expr>, + pat: P<ast::Pat>, + items: SmallVec<[P<ast::Item>; 1]>, + impl_items: SmallVec<[P<ast::AssocItem>; 1]>, + trait_items: SmallVec<[P<ast::AssocItem>; 1]>, + foreign_items: SmallVec<[P<ast::ForeignItem>; 1]>, + stmts: SmallVec<[ast::Stmt; 1]>, + ty: P<ast::Ty>, +} + +impl MacResult for MacEager { + fn make_expr(self: Box<Self>) -> Option<P<ast::Expr>> { + self.expr + } + + fn make_items(self: Box<Self>) -> Option<SmallVec<[P<ast::Item>; 1]>> { + self.items + } + + fn make_impl_items(self: Box<Self>) -> Option<SmallVec<[P<ast::AssocItem>; 1]>> { + self.impl_items + } + + fn make_trait_items(self: Box<Self>) -> Option<SmallVec<[P<ast::AssocItem>; 1]>> { + self.trait_items + } + + fn make_foreign_items(self: Box<Self>) -> Option<SmallVec<[P<ast::ForeignItem>; 1]>> { + self.foreign_items + } + + fn make_stmts(self: Box<Self>) -> Option<SmallVec<[ast::Stmt; 1]>> { + match self.stmts.as_ref().map_or(0, |s| s.len()) { + 0 => make_stmts_default!(self), + _ => self.stmts, + } + } + + fn make_pat(self: Box<Self>) -> Option<P<ast::Pat>> { + if let Some(p) = self.pat { + return Some(p); + } + if let Some(e) = self.expr { + if let ast::ExprKind::Lit(_) = e.kind { + return Some(P(ast::Pat { + id: ast::DUMMY_NODE_ID, + span: e.span, + kind: PatKind::Lit(e), + tokens: None, + })); + } + } + None + } + + fn make_ty(self: Box<Self>) -> Option<P<ast::Ty>> { + self.ty + } +} + +/// Fill-in macro expansion result, to allow compilation to continue +/// after hitting errors. +#[derive(Copy, Clone)] +pub struct DummyResult { + is_error: bool, + span: Span, +} + +impl DummyResult { + /// Creates a default MacResult that can be anything. + /// + /// Use this as a return value after hitting any errors and + /// calling `span_err`. + pub fn any(span: Span) -> Box<dyn MacResult + 'static> { + Box::new(DummyResult { is_error: true, span }) + } + + /// Same as `any`, but must be a valid fragment, not error. + pub fn any_valid(span: Span) -> Box<dyn MacResult + 'static> { + Box::new(DummyResult { is_error: false, span }) + } + + /// A plain dummy expression. + pub fn raw_expr(sp: Span, is_error: bool) -> P<ast::Expr> { + P(ast::Expr { + id: ast::DUMMY_NODE_ID, + kind: if is_error { ast::ExprKind::Err } else { ast::ExprKind::Tup(Vec::new()) }, + span: sp, + attrs: ast::AttrVec::new(), + tokens: None, + }) + } + + /// A plain dummy pattern. + pub fn raw_pat(sp: Span) -> ast::Pat { + ast::Pat { id: ast::DUMMY_NODE_ID, kind: PatKind::Wild, span: sp, tokens: None } + } + + /// A plain dummy type. + pub fn raw_ty(sp: Span, is_error: bool) -> P<ast::Ty> { + P(ast::Ty { + id: ast::DUMMY_NODE_ID, + kind: if is_error { ast::TyKind::Err } else { ast::TyKind::Tup(Vec::new()) }, + span: sp, + tokens: None, + }) + } +} + +impl MacResult for DummyResult { + fn make_expr(self: Box<DummyResult>) -> Option<P<ast::Expr>> { + Some(DummyResult::raw_expr(self.span, self.is_error)) + } + + fn make_pat(self: Box<DummyResult>) -> Option<P<ast::Pat>> { + Some(P(DummyResult::raw_pat(self.span))) + } + + fn make_items(self: Box<DummyResult>) -> Option<SmallVec<[P<ast::Item>; 1]>> { + Some(SmallVec::new()) + } + + fn make_impl_items(self: Box<DummyResult>) -> Option<SmallVec<[P<ast::AssocItem>; 1]>> { + Some(SmallVec::new()) + } + + fn make_trait_items(self: Box<DummyResult>) -> Option<SmallVec<[P<ast::AssocItem>; 1]>> { + Some(SmallVec::new()) + } + + fn make_foreign_items(self: Box<Self>) -> Option<SmallVec<[P<ast::ForeignItem>; 1]>> { + Some(SmallVec::new()) + } + + fn make_stmts(self: Box<DummyResult>) -> Option<SmallVec<[ast::Stmt; 1]>> { + Some(smallvec![ast::Stmt { + id: ast::DUMMY_NODE_ID, + kind: ast::StmtKind::Expr(DummyResult::raw_expr(self.span, self.is_error)), + span: self.span, + }]) + } + + fn make_ty(self: Box<DummyResult>) -> Option<P<ast::Ty>> { + Some(DummyResult::raw_ty(self.span, self.is_error)) + } + + fn make_arms(self: Box<DummyResult>) -> Option<SmallVec<[ast::Arm; 1]>> { + Some(SmallVec::new()) + } + + fn make_expr_fields(self: Box<DummyResult>) -> Option<SmallVec<[ast::ExprField; 1]>> { + Some(SmallVec::new()) + } + + fn make_pat_fields(self: Box<DummyResult>) -> Option<SmallVec<[ast::PatField; 1]>> { + Some(SmallVec::new()) + } + + fn make_generic_params(self: Box<DummyResult>) -> Option<SmallVec<[ast::GenericParam; 1]>> { + Some(SmallVec::new()) + } + + fn make_params(self: Box<DummyResult>) -> Option<SmallVec<[ast::Param; 1]>> { + Some(SmallVec::new()) + } + + fn make_field_defs(self: Box<DummyResult>) -> Option<SmallVec<[ast::FieldDef; 1]>> { + Some(SmallVec::new()) + } + + fn make_variants(self: Box<DummyResult>) -> Option<SmallVec<[ast::Variant; 1]>> { + Some(SmallVec::new()) + } +} + +/// A syntax extension kind. +pub enum SyntaxExtensionKind { + /// A token-based function-like macro. + Bang( + /// An expander with signature TokenStream -> TokenStream. + Box<dyn BangProcMacro + sync::Sync + sync::Send>, + ), + + /// An AST-based function-like macro. + LegacyBang( + /// An expander with signature TokenStream -> AST. + Box<dyn TTMacroExpander + sync::Sync + sync::Send>, + ), + + /// A token-based attribute macro. + Attr( + /// An expander with signature (TokenStream, TokenStream) -> TokenStream. + /// The first TokenSteam is the attribute itself, the second is the annotated item. + /// The produced TokenSteam replaces the input TokenSteam. + Box<dyn AttrProcMacro + sync::Sync + sync::Send>, + ), + + /// An AST-based attribute macro. + LegacyAttr( + /// An expander with signature (AST, AST) -> AST. + /// The first AST fragment is the attribute itself, the second is the annotated item. + /// The produced AST fragment replaces the input AST fragment. + Box<dyn MultiItemModifier + sync::Sync + sync::Send>, + ), + + /// A trivial attribute "macro" that does nothing, + /// only keeps the attribute and marks it as inert, + /// thus making it ineligible for further expansion. + NonMacroAttr, + + /// A token-based derive macro. + Derive( + /// An expander with signature TokenStream -> TokenStream (not yet). + /// The produced TokenSteam is appended to the input TokenSteam. + Box<dyn MultiItemModifier + sync::Sync + sync::Send>, + ), + + /// An AST-based derive macro. + LegacyDerive( + /// An expander with signature AST -> AST. + /// The produced AST fragment is appended to the input AST fragment. + Box<dyn MultiItemModifier + sync::Sync + sync::Send>, + ), +} + +/// A struct representing a macro definition in "lowered" form ready for expansion. +pub struct SyntaxExtension { + /// A syntax extension kind. + pub kind: SyntaxExtensionKind, + /// Span of the macro definition. + pub span: Span, + /// List of unstable features that are treated as stable inside this macro. + pub allow_internal_unstable: Option<Lrc<[Symbol]>>, + /// Suppresses the `unsafe_code` lint for code produced by this macro. + pub allow_internal_unsafe: bool, + /// Enables the macro helper hack (`ident!(...)` -> `$crate::ident!(...)`) for this macro. + pub local_inner_macros: bool, + /// The macro's stability info. + pub stability: Option<Stability>, + /// The macro's deprecation info. + pub deprecation: Option<Deprecation>, + /// Names of helper attributes registered by this macro. + pub helper_attrs: Vec<Symbol>, + /// Edition of the crate in which this macro is defined. + pub edition: Edition, + /// Built-in macros have a couple of special properties like availability + /// in `#[no_implicit_prelude]` modules, so we have to keep this flag. + pub builtin_name: Option<Symbol>, +} + +impl SyntaxExtension { + /// Returns which kind of macro calls this syntax extension. + pub fn macro_kind(&self) -> MacroKind { + match self.kind { + SyntaxExtensionKind::Bang(..) | SyntaxExtensionKind::LegacyBang(..) => MacroKind::Bang, + SyntaxExtensionKind::Attr(..) + | SyntaxExtensionKind::LegacyAttr(..) + | SyntaxExtensionKind::NonMacroAttr => MacroKind::Attr, + SyntaxExtensionKind::Derive(..) | SyntaxExtensionKind::LegacyDerive(..) => { + MacroKind::Derive + } + } + } + + /// Constructs a syntax extension with default properties. + pub fn default(kind: SyntaxExtensionKind, edition: Edition) -> SyntaxExtension { + SyntaxExtension { + span: DUMMY_SP, + allow_internal_unstable: None, + allow_internal_unsafe: false, + local_inner_macros: false, + stability: None, + deprecation: None, + helper_attrs: Vec::new(), + edition, + builtin_name: None, + kind, + } + } + + /// Constructs a syntax extension with the given properties + /// and other properties converted from attributes. + pub fn new( + sess: &Session, + kind: SyntaxExtensionKind, + span: Span, + helper_attrs: Vec<Symbol>, + edition: Edition, + name: Symbol, + attrs: &[ast::Attribute], + ) -> SyntaxExtension { + let allow_internal_unstable = + attr::allow_internal_unstable(sess, &attrs).collect::<Vec<Symbol>>(); + + let mut local_inner_macros = false; + if let Some(macro_export) = sess.find_by_name(attrs, sym::macro_export) { + if let Some(l) = macro_export.meta_item_list() { + local_inner_macros = attr::list_contains_name(&l, sym::local_inner_macros); + } + } + + let (builtin_name, helper_attrs) = sess + .find_by_name(attrs, sym::rustc_builtin_macro) + .map(|attr| { + // Override `helper_attrs` passed above if it's a built-in macro, + // marking `proc_macro_derive` macros as built-in is not a realistic use case. + parse_macro_name_and_helper_attrs(sess.diagnostic(), attr, "built-in").map_or_else( + || (Some(name), Vec::new()), + |(name, helper_attrs)| (Some(name), helper_attrs), + ) + }) + .unwrap_or_else(|| (None, helper_attrs)); + let (stability, const_stability) = attr::find_stability(&sess, attrs, span); + if let Some((_, sp)) = const_stability { + sess.parse_sess + .span_diagnostic + .struct_span_err(sp, "macros cannot have const stability attributes") + .span_label(sp, "invalid const stability attribute") + .span_label( + sess.source_map().guess_head_span(span), + "const stability attribute affects this macro", + ) + .emit(); + } + + SyntaxExtension { + kind, + span, + allow_internal_unstable: (!allow_internal_unstable.is_empty()) + .then(|| allow_internal_unstable.into()), + allow_internal_unsafe: sess.contains_name(attrs, sym::allow_internal_unsafe), + local_inner_macros, + stability: stability.map(|(s, _)| s), + deprecation: attr::find_deprecation(&sess, attrs).map(|(d, _)| d), + helper_attrs, + edition, + builtin_name, + } + } + + pub fn dummy_bang(edition: Edition) -> SyntaxExtension { + fn expander<'cx>( + _: &'cx mut ExtCtxt<'_>, + span: Span, + _: TokenStream, + ) -> Box<dyn MacResult + 'cx> { + DummyResult::any(span) + } + SyntaxExtension::default(SyntaxExtensionKind::LegacyBang(Box::new(expander)), edition) + } + + pub fn dummy_derive(edition: Edition) -> SyntaxExtension { + fn expander( + _: &mut ExtCtxt<'_>, + _: Span, + _: &ast::MetaItem, + _: Annotatable, + ) -> Vec<Annotatable> { + Vec::new() + } + SyntaxExtension::default(SyntaxExtensionKind::Derive(Box::new(expander)), edition) + } + + pub fn non_macro_attr(edition: Edition) -> SyntaxExtension { + SyntaxExtension::default(SyntaxExtensionKind::NonMacroAttr, edition) + } + + pub fn expn_data( + &self, + parent: LocalExpnId, + call_site: Span, + descr: Symbol, + macro_def_id: Option<DefId>, + parent_module: Option<DefId>, + ) -> ExpnData { + ExpnData::new( + ExpnKind::Macro(self.macro_kind(), descr), + parent.to_expn_id(), + call_site, + self.span, + self.allow_internal_unstable.clone(), + self.allow_internal_unsafe, + self.local_inner_macros, + self.edition, + macro_def_id, + parent_module, + ) + } +} + +/// Error type that denotes indeterminacy. +pub struct Indeterminate; + +pub type DeriveResolutions = Vec<(ast::Path, Annotatable, Option<Lrc<SyntaxExtension>>)>; + +pub trait ResolverExpand { + fn next_node_id(&mut self) -> NodeId; + fn invocation_parent(&self, id: LocalExpnId) -> LocalDefId; + + fn resolve_dollar_crates(&mut self); + fn visit_ast_fragment_with_placeholders( + &mut self, + expn_id: LocalExpnId, + fragment: &AstFragment, + ); + fn register_builtin_macro(&mut self, name: Symbol, ext: SyntaxExtensionKind); + + fn expansion_for_ast_pass( + &mut self, + call_site: Span, + pass: AstPass, + features: &[Symbol], + parent_module_id: Option<NodeId>, + ) -> LocalExpnId; + + fn resolve_imports(&mut self); + + fn resolve_macro_invocation( + &mut self, + invoc: &Invocation, + eager_expansion_root: LocalExpnId, + force: bool, + ) -> Result<Lrc<SyntaxExtension>, Indeterminate>; + + fn record_macro_rule_usage(&mut self, mac_id: NodeId, rule_index: usize); + + fn check_unused_macros(&mut self); + + // Resolver interfaces for specific built-in macros. + /// Does `#[derive(...)]` attribute with the given `ExpnId` have built-in `Copy` inside it? + fn has_derive_copy(&self, expn_id: LocalExpnId) -> bool; + /// Resolve paths inside the `#[derive(...)]` attribute with the given `ExpnId`. + fn resolve_derives( + &mut self, + expn_id: LocalExpnId, + force: bool, + derive_paths: &dyn Fn() -> DeriveResolutions, + ) -> Result<(), Indeterminate>; + /// Take resolutions for paths inside the `#[derive(...)]` attribute with the given `ExpnId` + /// back from resolver. + fn take_derive_resolutions(&mut self, expn_id: LocalExpnId) -> Option<DeriveResolutions>; + /// Path resolution logic for `#[cfg_accessible(path)]`. + fn cfg_accessible( + &mut self, + expn_id: LocalExpnId, + path: &ast::Path, + ) -> Result<bool, Indeterminate>; + + /// Decodes the proc-macro quoted span in the specified crate, with the specified id. + /// No caching is performed. + fn get_proc_macro_quoted_span(&self, krate: CrateNum, id: usize) -> Span; + + /// The order of items in the HIR is unrelated to the order of + /// items in the AST. However, we generate proc macro harnesses + /// based on the AST order, and later refer to these harnesses + /// from the HIR. This field keeps track of the order in which + /// we generated proc macros harnesses, so that we can map + /// HIR proc macros items back to their harness items. + fn declare_proc_macro(&mut self, id: NodeId); + + /// Tools registered with `#![register_tool]` and used by tool attributes and lints. + fn registered_tools(&self) -> &FxHashSet<Ident>; +} + +pub trait LintStoreExpand { + fn pre_expansion_lint( + &self, + sess: &Session, + registered_tools: &FxHashSet<Ident>, + node_id: NodeId, + attrs: &[Attribute], + items: &[P<Item>], + name: &str, + ); +} + +type LintStoreExpandDyn<'a> = Option<&'a (dyn LintStoreExpand + 'a)>; + +#[derive(Clone, Default)] +pub struct ModuleData { + /// Path to the module starting from the crate name, like `my_crate::foo::bar`. + pub mod_path: Vec<Ident>, + /// Stack of paths to files loaded by out-of-line module items, + /// used to detect and report recursive module inclusions. + pub file_path_stack: Vec<PathBuf>, + /// Directory to search child module files in, + /// often (but not necessarily) the parent of the top file path on the `file_path_stack`. + pub dir_path: PathBuf, +} + +impl ModuleData { + pub fn with_dir_path(&self, dir_path: PathBuf) -> ModuleData { + ModuleData { + mod_path: self.mod_path.clone(), + file_path_stack: self.file_path_stack.clone(), + dir_path, + } + } +} + +#[derive(Clone)] +pub struct ExpansionData { + pub id: LocalExpnId, + pub depth: usize, + pub module: Rc<ModuleData>, + pub dir_ownership: DirOwnership, + pub prior_type_ascription: Option<(Span, bool)>, + /// Some parent node that is close to this macro call + pub lint_node_id: NodeId, + pub is_trailing_mac: bool, +} + +/// One of these is made during expansion and incrementally updated as we go; +/// when a macro expansion occurs, the resulting nodes have the `backtrace() +/// -> expn_data` of their expansion context stored into their span. +pub struct ExtCtxt<'a> { + pub sess: &'a Session, + pub ecfg: expand::ExpansionConfig<'a>, + pub reduced_recursion_limit: Option<Limit>, + pub root_path: PathBuf, + pub resolver: &'a mut dyn ResolverExpand, + pub current_expansion: ExpansionData, + /// Error recovery mode entered when expansion is stuck + /// (or during eager expansion, but that's a hack). + pub force_mode: bool, + pub expansions: FxIndexMap<Span, Vec<String>>, + /// Used for running pre-expansion lints on freshly loaded modules. + pub(super) lint_store: LintStoreExpandDyn<'a>, + /// Used for storing lints generated during expansion, like `NAMED_ARGUMENTS_USED_POSITIONALLY` + pub buffered_early_lint: Vec<BufferedEarlyLint>, + /// When we 'expand' an inert attribute, we leave it + /// in the AST, but insert it here so that we know + /// not to expand it again. + pub(super) expanded_inert_attrs: MarkedAttrs, +} + +impl<'a> ExtCtxt<'a> { + pub fn new( + sess: &'a Session, + ecfg: expand::ExpansionConfig<'a>, + resolver: &'a mut dyn ResolverExpand, + lint_store: LintStoreExpandDyn<'a>, + ) -> ExtCtxt<'a> { + ExtCtxt { + sess, + ecfg, + reduced_recursion_limit: None, + resolver, + lint_store, + root_path: PathBuf::new(), + current_expansion: ExpansionData { + id: LocalExpnId::ROOT, + depth: 0, + module: Default::default(), + dir_ownership: DirOwnership::Owned { relative: None }, + prior_type_ascription: None, + lint_node_id: ast::CRATE_NODE_ID, + is_trailing_mac: false, + }, + force_mode: false, + expansions: FxIndexMap::default(), + expanded_inert_attrs: MarkedAttrs::new(), + buffered_early_lint: vec![], + } + } + + /// Returns a `Folder` for deeply expanding all macros in an AST node. + pub fn expander<'b>(&'b mut self) -> expand::MacroExpander<'b, 'a> { + expand::MacroExpander::new(self, false) + } + + /// Returns a `Folder` that deeply expands all macros and assigns all `NodeId`s in an AST node. + /// Once `NodeId`s are assigned, the node may not be expanded, removed, or otherwise modified. + pub fn monotonic_expander<'b>(&'b mut self) -> expand::MacroExpander<'b, 'a> { + expand::MacroExpander::new(self, true) + } + pub fn new_parser_from_tts(&self, stream: TokenStream) -> parser::Parser<'a> { + rustc_parse::stream_to_parser(&self.sess.parse_sess, stream, MACRO_ARGUMENTS) + } + pub fn source_map(&self) -> &'a SourceMap { + self.sess.parse_sess.source_map() + } + pub fn parse_sess(&self) -> &'a ParseSess { + &self.sess.parse_sess + } + pub fn call_site(&self) -> Span { + self.current_expansion.id.expn_data().call_site + } + + /// Returns the current expansion kind's description. + pub(crate) fn expansion_descr(&self) -> String { + let expn_data = self.current_expansion.id.expn_data(); + expn_data.kind.descr() + } + + /// Equivalent of `Span::def_site` from the proc macro API, + /// except that the location is taken from the span passed as an argument. + pub fn with_def_site_ctxt(&self, span: Span) -> Span { + span.with_def_site_ctxt(self.current_expansion.id.to_expn_id()) + } + + /// Equivalent of `Span::call_site` from the proc macro API, + /// except that the location is taken from the span passed as an argument. + pub fn with_call_site_ctxt(&self, span: Span) -> Span { + span.with_call_site_ctxt(self.current_expansion.id.to_expn_id()) + } + + /// Equivalent of `Span::mixed_site` from the proc macro API, + /// except that the location is taken from the span passed as an argument. + pub fn with_mixed_site_ctxt(&self, span: Span) -> Span { + span.with_mixed_site_ctxt(self.current_expansion.id.to_expn_id()) + } + + /// Returns span for the macro which originally caused the current expansion to happen. + /// + /// Stops backtracing at include! boundary. + pub fn expansion_cause(&self) -> Option<Span> { + self.current_expansion.id.expansion_cause() + } + + #[rustc_lint_diagnostics] + pub fn struct_span_err<S: Into<MultiSpan>>( + &self, + sp: S, + msg: &str, + ) -> DiagnosticBuilder<'a, ErrorGuaranteed> { + self.sess.parse_sess.span_diagnostic.struct_span_err(sp, msg) + } + + pub fn create_err( + &self, + err: impl SessionDiagnostic<'a>, + ) -> DiagnosticBuilder<'a, ErrorGuaranteed> { + self.sess.create_err(err) + } + + pub fn emit_err(&self, err: impl SessionDiagnostic<'a>) -> ErrorGuaranteed { + self.sess.emit_err(err) + } + + /// Emit `msg` attached to `sp`, without immediately stopping + /// compilation. + /// + /// Compilation will be stopped in the near future (at the end of + /// the macro expansion phase). + #[rustc_lint_diagnostics] + pub fn span_err<S: Into<MultiSpan>>(&self, sp: S, msg: &str) { + self.sess.parse_sess.span_diagnostic.span_err(sp, msg); + } + #[rustc_lint_diagnostics] + pub fn span_warn<S: Into<MultiSpan>>(&self, sp: S, msg: &str) { + self.sess.parse_sess.span_diagnostic.span_warn(sp, msg); + } + pub fn span_bug<S: Into<MultiSpan>>(&self, sp: S, msg: &str) -> ! { + self.sess.parse_sess.span_diagnostic.span_bug(sp, msg); + } + pub fn trace_macros_diag(&mut self) { + for (sp, notes) in self.expansions.iter() { + let mut db = self.sess.parse_sess.span_diagnostic.span_note_diag(*sp, "trace_macro"); + for note in notes { + db.note(note); + } + db.emit(); + } + // Fixme: does this result in errors? + self.expansions.clear(); + } + pub fn bug(&self, msg: &str) -> ! { + self.sess.parse_sess.span_diagnostic.bug(msg); + } + pub fn trace_macros(&self) -> bool { + self.ecfg.trace_mac + } + pub fn set_trace_macros(&mut self, x: bool) { + self.ecfg.trace_mac = x + } + pub fn std_path(&self, components: &[Symbol]) -> Vec<Ident> { + let def_site = self.with_def_site_ctxt(DUMMY_SP); + iter::once(Ident::new(kw::DollarCrate, def_site)) + .chain(components.iter().map(|&s| Ident::with_dummy_span(s))) + .collect() + } + pub fn def_site_path(&self, components: &[Symbol]) -> Vec<Ident> { + let def_site = self.with_def_site_ctxt(DUMMY_SP); + components.iter().map(|&s| Ident::new(s, def_site)).collect() + } + + pub fn check_unused_macros(&mut self) { + self.resolver.check_unused_macros(); + } +} + +/// Resolves a `path` mentioned inside Rust code, returning an absolute path. +/// +/// This unifies the logic used for resolving `include_X!`. +pub fn resolve_path( + parse_sess: &ParseSess, + path: impl Into<PathBuf>, + span: Span, +) -> PResult<'_, PathBuf> { + let path = path.into(); + + // Relative paths are resolved relative to the file in which they are found + // after macro expansion (that is, they are unhygienic). + if !path.is_absolute() { + let callsite = span.source_callsite(); + let mut result = match parse_sess.source_map().span_to_filename(callsite) { + FileName::Real(name) => name + .into_local_path() + .expect("attempting to resolve a file path in an external file"), + FileName::DocTest(path, _) => path, + other => { + return Err(parse_sess.span_diagnostic.struct_span_err( + span, + &format!( + "cannot resolve relative path in non-file source `{}`", + parse_sess.source_map().filename_for_diagnostics(&other) + ), + )); + } + }; + result.pop(); + result.push(path); + Ok(result) + } else { + Ok(path) + } +} + +/// Extracts a string literal from the macro expanded version of `expr`, +/// returning a diagnostic error of `err_msg` if `expr` is not a string literal. +/// The returned bool indicates whether an applicable suggestion has already been +/// added to the diagnostic to avoid emitting multiple suggestions. `Err(None)` +/// indicates that an ast error was encountered. +pub fn expr_to_spanned_string<'a>( + cx: &'a mut ExtCtxt<'_>, + expr: P<ast::Expr>, + err_msg: &str, +) -> Result<(Symbol, ast::StrStyle, Span), Option<(DiagnosticBuilder<'a, ErrorGuaranteed>, bool)>> { + // Perform eager expansion on the expression. + // We want to be able to handle e.g., `concat!("foo", "bar")`. + let expr = cx.expander().fully_expand_fragment(AstFragment::Expr(expr)).make_expr(); + + Err(match expr.kind { + ast::ExprKind::Lit(ref l) => match l.kind { + ast::LitKind::Str(s, style) => return Ok((s, style, expr.span)), + ast::LitKind::ByteStr(_) => { + let mut err = cx.struct_span_err(l.span, err_msg); + err.span_suggestion( + expr.span.shrink_to_lo(), + "consider removing the leading `b`", + "", + Applicability::MaybeIncorrect, + ); + Some((err, true)) + } + ast::LitKind::Err(_) => None, + _ => Some((cx.struct_span_err(l.span, err_msg), false)), + }, + ast::ExprKind::Err => None, + _ => Some((cx.struct_span_err(expr.span, err_msg), false)), + }) +} + +/// Extracts a string literal from the macro expanded version of `expr`, +/// emitting `err_msg` if `expr` is not a string literal. This does not stop +/// compilation on error, merely emits a non-fatal error and returns `None`. +pub fn expr_to_string( + cx: &mut ExtCtxt<'_>, + expr: P<ast::Expr>, + err_msg: &str, +) -> Option<(Symbol, ast::StrStyle)> { + expr_to_spanned_string(cx, expr, err_msg) + .map_err(|err| { + err.map(|(mut err, _)| { + err.emit(); + }) + }) + .ok() + .map(|(symbol, style, _)| (symbol, style)) +} + +/// Non-fatally assert that `tts` is empty. Note that this function +/// returns even when `tts` is non-empty, macros that *need* to stop +/// compilation should call +/// `cx.parse_sess.span_diagnostic.abort_if_errors()` (this should be +/// done as rarely as possible). +pub fn check_zero_tts(cx: &ExtCtxt<'_>, sp: Span, tts: TokenStream, name: &str) { + if !tts.is_empty() { + cx.span_err(sp, &format!("{} takes no arguments", name)); + } +} + +/// Parse an expression. On error, emit it, advancing to `Eof`, and return `None`. +pub fn parse_expr(p: &mut parser::Parser<'_>) -> Option<P<ast::Expr>> { + match p.parse_expr() { + Ok(e) => return Some(e), + Err(mut err) => { + err.emit(); + } + } + while p.token != token::Eof { + p.bump(); + } + None +} + +/// Interpreting `tts` as a comma-separated sequence of expressions, +/// expect exactly one string literal, or emit an error and return `None`. +pub fn get_single_str_from_tts( + cx: &mut ExtCtxt<'_>, + sp: Span, + tts: TokenStream, + name: &str, +) -> Option<Symbol> { + let mut p = cx.new_parser_from_tts(tts); + if p.token == token::Eof { + cx.span_err(sp, &format!("{} takes 1 argument", name)); + return None; + } + let ret = parse_expr(&mut p)?; + let _ = p.eat(&token::Comma); + + if p.token != token::Eof { + cx.span_err(sp, &format!("{} takes 1 argument", name)); + } + expr_to_string(cx, ret, "argument must be a string literal").map(|(s, _)| s) +} + +/// Extracts comma-separated expressions from `tts`. +/// On error, emit it, and return `None`. +pub fn get_exprs_from_tts( + cx: &mut ExtCtxt<'_>, + sp: Span, + tts: TokenStream, +) -> Option<Vec<P<ast::Expr>>> { + let mut p = cx.new_parser_from_tts(tts); + let mut es = Vec::new(); + while p.token != token::Eof { + let expr = parse_expr(&mut p)?; + + // Perform eager expansion on the expression. + // We want to be able to handle e.g., `concat!("foo", "bar")`. + let expr = cx.expander().fully_expand_fragment(AstFragment::Expr(expr)).make_expr(); + + es.push(expr); + if p.eat(&token::Comma) { + continue; + } + if p.token != token::Eof { + cx.span_err(sp, "expected token: `,`"); + return None; + } + } + Some(es) +} + +pub fn parse_macro_name_and_helper_attrs( + diag: &rustc_errors::Handler, + attr: &Attribute, + descr: &str, +) -> Option<(Symbol, Vec<Symbol>)> { + // Once we've located the `#[proc_macro_derive]` attribute, verify + // that it's of the form `#[proc_macro_derive(Foo)]` or + // `#[proc_macro_derive(Foo, attributes(A, ..))]` + let list = attr.meta_item_list()?; + if list.len() != 1 && list.len() != 2 { + diag.span_err(attr.span, "attribute must have either one or two arguments"); + return None; + } + let Some(trait_attr) = list[0].meta_item() else { + diag.span_err(list[0].span(), "not a meta item"); + return None; + }; + let trait_ident = match trait_attr.ident() { + Some(trait_ident) if trait_attr.is_word() => trait_ident, + _ => { + diag.span_err(trait_attr.span, "must only be one word"); + return None; + } + }; + + if !trait_ident.name.can_be_raw() { + diag.span_err( + trait_attr.span, + &format!("`{}` cannot be a name of {} macro", trait_ident, descr), + ); + } + + let attributes_attr = list.get(1); + let proc_attrs: Vec<_> = if let Some(attr) = attributes_attr { + if !attr.has_name(sym::attributes) { + diag.span_err(attr.span(), "second argument must be `attributes`"); + } + attr.meta_item_list() + .unwrap_or_else(|| { + diag.span_err(attr.span(), "attribute must be of form: `attributes(foo, bar)`"); + &[] + }) + .iter() + .filter_map(|attr| { + let Some(attr) = attr.meta_item() else { + diag.span_err(attr.span(), "not a meta item"); + return None; + }; + + let ident = match attr.ident() { + Some(ident) if attr.is_word() => ident, + _ => { + diag.span_err(attr.span, "must only be one word"); + return None; + } + }; + if !ident.name.can_be_raw() { + diag.span_err( + attr.span, + &format!("`{}` cannot be a name of derive helper attribute", ident), + ); + } + + Some(ident.name) + }) + .collect() + } else { + Vec::new() + }; + + Some((trait_ident.name, proc_attrs)) +} + +/// This nonterminal looks like some specific enums from +/// `proc-macro-hack` and `procedural-masquerade` crates. +/// We need to maintain some special pretty-printing behavior for them due to incorrect +/// asserts in old versions of those crates and their wide use in the ecosystem. +/// See issue #73345 for more details. +/// FIXME(#73933): Remove this eventually. +fn pretty_printing_compatibility_hack(item: &Item, sess: &ParseSess) -> bool { + let name = item.ident.name; + if name == sym::ProceduralMasqueradeDummyType { + if let ast::ItemKind::Enum(enum_def, _) = &item.kind { + if let [variant] = &*enum_def.variants { + if variant.ident.name == sym::Input { + sess.buffer_lint_with_diagnostic( + &PROC_MACRO_BACK_COMPAT, + item.ident.span, + ast::CRATE_NODE_ID, + "using `procedural-masquerade` crate", + BuiltinLintDiagnostics::ProcMacroBackCompat( + "The `procedural-masquerade` crate has been unnecessary since Rust 1.30.0. \ + Versions of this crate below 0.1.7 will eventually stop compiling.".to_string()) + ); + return true; + } + } + } + } + false +} + +pub(crate) fn ann_pretty_printing_compatibility_hack(ann: &Annotatable, sess: &ParseSess) -> bool { + let item = match ann { + Annotatable::Item(item) => item, + Annotatable::Stmt(stmt) => match &stmt.kind { + ast::StmtKind::Item(item) => item, + _ => return false, + }, + _ => return false, + }; + pretty_printing_compatibility_hack(item, sess) +} + +pub(crate) fn nt_pretty_printing_compatibility_hack(nt: &Nonterminal, sess: &ParseSess) -> bool { + let item = match nt { + Nonterminal::NtItem(item) => item, + Nonterminal::NtStmt(stmt) => match &stmt.kind { + ast::StmtKind::Item(item) => item, + _ => return false, + }, + _ => return false, + }; + pretty_printing_compatibility_hack(item, sess) +} diff --git a/compiler/rustc_expand/src/build.rs b/compiler/rustc_expand/src/build.rs new file mode 100644 index 000000000..fa3e2a4a5 --- /dev/null +++ b/compiler/rustc_expand/src/build.rs @@ -0,0 +1,616 @@ +use crate::base::ExtCtxt; + +use rustc_ast::attr; +use rustc_ast::ptr::P; +use rustc_ast::{self as ast, AttrVec, BlockCheckMode, Expr, LocalKind, PatKind, UnOp}; +use rustc_span::source_map::Spanned; +use rustc_span::symbol::{kw, sym, Ident, Symbol}; + +use rustc_span::Span; + +impl<'a> ExtCtxt<'a> { + pub fn path(&self, span: Span, strs: Vec<Ident>) -> ast::Path { + self.path_all(span, false, strs, vec![]) + } + pub fn path_ident(&self, span: Span, id: Ident) -> ast::Path { + self.path(span, vec![id]) + } + pub fn path_global(&self, span: Span, strs: Vec<Ident>) -> ast::Path { + self.path_all(span, true, strs, vec![]) + } + pub fn path_all( + &self, + span: Span, + global: bool, + mut idents: Vec<Ident>, + args: Vec<ast::GenericArg>, + ) -> ast::Path { + assert!(!idents.is_empty()); + let add_root = global && !idents[0].is_path_segment_keyword(); + let mut segments = Vec::with_capacity(idents.len() + add_root as usize); + if add_root { + segments.push(ast::PathSegment::path_root(span)); + } + let last_ident = idents.pop().unwrap(); + segments.extend( + idents.into_iter().map(|ident| ast::PathSegment::from_ident(ident.with_span_pos(span))), + ); + let args = if !args.is_empty() { + let args = args.into_iter().map(ast::AngleBracketedArg::Arg).collect(); + ast::AngleBracketedArgs { args, span }.into() + } else { + None + }; + segments.push(ast::PathSegment { + ident: last_ident.with_span_pos(span), + id: ast::DUMMY_NODE_ID, + args, + }); + ast::Path { span, segments, tokens: None } + } + + pub fn ty_mt(&self, ty: P<ast::Ty>, mutbl: ast::Mutability) -> ast::MutTy { + ast::MutTy { ty, mutbl } + } + + pub fn ty(&self, span: Span, kind: ast::TyKind) -> P<ast::Ty> { + P(ast::Ty { id: ast::DUMMY_NODE_ID, span, kind, tokens: None }) + } + + pub fn ty_infer(&self, span: Span) -> P<ast::Ty> { + self.ty(span, ast::TyKind::Infer) + } + + pub fn ty_path(&self, path: ast::Path) -> P<ast::Ty> { + self.ty(path.span, ast::TyKind::Path(None, path)) + } + + // Might need to take bounds as an argument in the future, if you ever want + // to generate a bounded existential trait type. + pub fn ty_ident(&self, span: Span, ident: Ident) -> P<ast::Ty> { + self.ty_path(self.path_ident(span, ident)) + } + + pub fn anon_const(&self, span: Span, kind: ast::ExprKind) -> ast::AnonConst { + ast::AnonConst { + id: ast::DUMMY_NODE_ID, + value: P(ast::Expr { + id: ast::DUMMY_NODE_ID, + kind, + span, + attrs: AttrVec::new(), + tokens: None, + }), + } + } + + pub fn const_ident(&self, span: Span, ident: Ident) -> ast::AnonConst { + self.anon_const(span, ast::ExprKind::Path(None, self.path_ident(span, ident))) + } + + pub fn ty_rptr( + &self, + span: Span, + ty: P<ast::Ty>, + lifetime: Option<ast::Lifetime>, + mutbl: ast::Mutability, + ) -> P<ast::Ty> { + self.ty(span, ast::TyKind::Rptr(lifetime, self.ty_mt(ty, mutbl))) + } + + pub fn ty_ptr(&self, span: Span, ty: P<ast::Ty>, mutbl: ast::Mutability) -> P<ast::Ty> { + self.ty(span, ast::TyKind::Ptr(self.ty_mt(ty, mutbl))) + } + + pub fn typaram( + &self, + span: Span, + ident: Ident, + attrs: Vec<ast::Attribute>, + bounds: ast::GenericBounds, + default: Option<P<ast::Ty>>, + ) -> ast::GenericParam { + ast::GenericParam { + ident: ident.with_span_pos(span), + id: ast::DUMMY_NODE_ID, + attrs: attrs.into(), + bounds, + kind: ast::GenericParamKind::Type { default }, + is_placeholder: false, + colon_span: None, + } + } + + pub fn trait_ref(&self, path: ast::Path) -> ast::TraitRef { + ast::TraitRef { path, ref_id: ast::DUMMY_NODE_ID } + } + + pub fn poly_trait_ref(&self, span: Span, path: ast::Path) -> ast::PolyTraitRef { + ast::PolyTraitRef { + bound_generic_params: Vec::new(), + trait_ref: self.trait_ref(path), + span, + } + } + + pub fn trait_bound(&self, path: ast::Path) -> ast::GenericBound { + ast::GenericBound::Trait( + self.poly_trait_ref(path.span, path), + ast::TraitBoundModifier::None, + ) + } + + pub fn lifetime(&self, span: Span, ident: Ident) -> ast::Lifetime { + ast::Lifetime { id: ast::DUMMY_NODE_ID, ident: ident.with_span_pos(span) } + } + + pub fn lifetime_static(&self, span: Span) -> ast::Lifetime { + self.lifetime(span, Ident::new(kw::StaticLifetime, span)) + } + + pub fn stmt_expr(&self, expr: P<ast::Expr>) -> ast::Stmt { + ast::Stmt { id: ast::DUMMY_NODE_ID, span: expr.span, kind: ast::StmtKind::Expr(expr) } + } + + pub fn stmt_let_pat(&self, sp: Span, pat: P<ast::Pat>, ex: P<ast::Expr>) -> ast::Stmt { + let local = P(ast::Local { + pat, + ty: None, + id: ast::DUMMY_NODE_ID, + kind: LocalKind::Init(ex), + span: sp, + attrs: AttrVec::new(), + tokens: None, + }); + self.stmt_local(local, sp) + } + + pub fn stmt_let(&self, sp: Span, mutbl: bool, ident: Ident, ex: P<ast::Expr>) -> ast::Stmt { + self.stmt_let_ty(sp, mutbl, ident, None, ex) + } + + pub fn stmt_let_ty( + &self, + sp: Span, + mutbl: bool, + ident: Ident, + ty: Option<P<ast::Ty>>, + ex: P<ast::Expr>, + ) -> ast::Stmt { + let pat = if mutbl { + let binding_mode = ast::BindingMode::ByValue(ast::Mutability::Mut); + self.pat_ident_binding_mode(sp, ident, binding_mode) + } else { + self.pat_ident(sp, ident) + }; + let local = P(ast::Local { + pat, + ty, + id: ast::DUMMY_NODE_ID, + kind: LocalKind::Init(ex), + span: sp, + attrs: AttrVec::new(), + tokens: None, + }); + self.stmt_local(local, sp) + } + + // Generates `let _: Type;`, which is usually used for type assertions. + pub fn stmt_let_type_only(&self, span: Span, ty: P<ast::Ty>) -> ast::Stmt { + let local = P(ast::Local { + pat: self.pat_wild(span), + ty: Some(ty), + id: ast::DUMMY_NODE_ID, + kind: LocalKind::Decl, + span, + attrs: AttrVec::new(), + tokens: None, + }); + self.stmt_local(local, span) + } + + pub fn stmt_local(&self, local: P<ast::Local>, span: Span) -> ast::Stmt { + ast::Stmt { id: ast::DUMMY_NODE_ID, kind: ast::StmtKind::Local(local), span } + } + + pub fn stmt_item(&self, sp: Span, item: P<ast::Item>) -> ast::Stmt { + ast::Stmt { id: ast::DUMMY_NODE_ID, kind: ast::StmtKind::Item(item), span: sp } + } + + pub fn block_expr(&self, expr: P<ast::Expr>) -> P<ast::Block> { + self.block( + expr.span, + vec![ast::Stmt { + id: ast::DUMMY_NODE_ID, + span: expr.span, + kind: ast::StmtKind::Expr(expr), + }], + ) + } + pub fn block(&self, span: Span, stmts: Vec<ast::Stmt>) -> P<ast::Block> { + P(ast::Block { + stmts, + id: ast::DUMMY_NODE_ID, + rules: BlockCheckMode::Default, + span, + tokens: None, + could_be_bare_literal: false, + }) + } + + pub fn expr(&self, span: Span, kind: ast::ExprKind) -> P<ast::Expr> { + P(ast::Expr { id: ast::DUMMY_NODE_ID, kind, span, attrs: AttrVec::new(), tokens: None }) + } + + pub fn expr_path(&self, path: ast::Path) -> P<ast::Expr> { + self.expr(path.span, ast::ExprKind::Path(None, path)) + } + + pub fn expr_ident(&self, span: Span, id: Ident) -> P<ast::Expr> { + self.expr_path(self.path_ident(span, id)) + } + pub fn expr_self(&self, span: Span) -> P<ast::Expr> { + self.expr_ident(span, Ident::with_dummy_span(kw::SelfLower)) + } + + pub fn expr_binary( + &self, + sp: Span, + op: ast::BinOpKind, + lhs: P<ast::Expr>, + rhs: P<ast::Expr>, + ) -> P<ast::Expr> { + self.expr(sp, ast::ExprKind::Binary(Spanned { node: op, span: sp }, lhs, rhs)) + } + + pub fn expr_deref(&self, sp: Span, e: P<ast::Expr>) -> P<ast::Expr> { + self.expr(sp, ast::ExprKind::Unary(UnOp::Deref, e)) + } + + pub fn expr_addr_of(&self, sp: Span, e: P<ast::Expr>) -> P<ast::Expr> { + self.expr(sp, ast::ExprKind::AddrOf(ast::BorrowKind::Ref, ast::Mutability::Not, e)) + } + + pub fn expr_call( + &self, + span: Span, + expr: P<ast::Expr>, + args: Vec<P<ast::Expr>>, + ) -> P<ast::Expr> { + self.expr(span, ast::ExprKind::Call(expr, args)) + } + pub fn expr_call_ident(&self, span: Span, id: Ident, args: Vec<P<ast::Expr>>) -> P<ast::Expr> { + self.expr(span, ast::ExprKind::Call(self.expr_ident(span, id), args)) + } + pub fn expr_call_global( + &self, + sp: Span, + fn_path: Vec<Ident>, + args: Vec<P<ast::Expr>>, + ) -> P<ast::Expr> { + let pathexpr = self.expr_path(self.path_global(sp, fn_path)); + self.expr_call(sp, pathexpr, args) + } + pub fn expr_block(&self, b: P<ast::Block>) -> P<ast::Expr> { + self.expr(b.span, ast::ExprKind::Block(b, None)) + } + pub fn field_imm(&self, span: Span, ident: Ident, e: P<ast::Expr>) -> ast::ExprField { + ast::ExprField { + ident: ident.with_span_pos(span), + expr: e, + span, + is_shorthand: false, + attrs: AttrVec::new(), + id: ast::DUMMY_NODE_ID, + is_placeholder: false, + } + } + pub fn expr_struct( + &self, + span: Span, + path: ast::Path, + fields: Vec<ast::ExprField>, + ) -> P<ast::Expr> { + self.expr( + span, + ast::ExprKind::Struct(P(ast::StructExpr { + qself: None, + path, + fields, + rest: ast::StructRest::None, + })), + ) + } + pub fn expr_struct_ident( + &self, + span: Span, + id: Ident, + fields: Vec<ast::ExprField>, + ) -> P<ast::Expr> { + self.expr_struct(span, self.path_ident(span, id), fields) + } + + pub fn expr_lit(&self, span: Span, lit_kind: ast::LitKind) -> P<ast::Expr> { + let lit = ast::Lit::from_lit_kind(lit_kind, span); + self.expr(span, ast::ExprKind::Lit(lit)) + } + pub fn expr_usize(&self, span: Span, i: usize) -> P<ast::Expr> { + self.expr_lit( + span, + ast::LitKind::Int(i as u128, ast::LitIntType::Unsigned(ast::UintTy::Usize)), + ) + } + pub fn expr_u32(&self, sp: Span, u: u32) -> P<ast::Expr> { + self.expr_lit(sp, ast::LitKind::Int(u as u128, ast::LitIntType::Unsigned(ast::UintTy::U32))) + } + pub fn expr_bool(&self, sp: Span, value: bool) -> P<ast::Expr> { + self.expr_lit(sp, ast::LitKind::Bool(value)) + } + + /// `[expr1, expr2, ...]` + pub fn expr_array(&self, sp: Span, exprs: Vec<P<ast::Expr>>) -> P<ast::Expr> { + self.expr(sp, ast::ExprKind::Array(exprs)) + } + + /// `&[expr1, expr2, ...]` + pub fn expr_array_ref(&self, sp: Span, exprs: Vec<P<ast::Expr>>) -> P<ast::Expr> { + self.expr_addr_of(sp, self.expr_array(sp, exprs)) + } + + pub fn expr_str(&self, sp: Span, s: Symbol) -> P<ast::Expr> { + self.expr_lit(sp, ast::LitKind::Str(s, ast::StrStyle::Cooked)) + } + + pub fn expr_cast(&self, sp: Span, expr: P<ast::Expr>, ty: P<ast::Ty>) -> P<ast::Expr> { + self.expr(sp, ast::ExprKind::Cast(expr, ty)) + } + + pub fn expr_some(&self, sp: Span, expr: P<ast::Expr>) -> P<ast::Expr> { + let some = self.std_path(&[sym::option, sym::Option, sym::Some]); + self.expr_call_global(sp, some, vec![expr]) + } + + pub fn expr_none(&self, sp: Span) -> P<ast::Expr> { + let none = self.std_path(&[sym::option, sym::Option, sym::None]); + self.expr_path(self.path_global(sp, none)) + } + pub fn expr_tuple(&self, sp: Span, exprs: Vec<P<ast::Expr>>) -> P<ast::Expr> { + self.expr(sp, ast::ExprKind::Tup(exprs)) + } + + pub fn expr_fail(&self, span: Span, msg: Symbol) -> P<ast::Expr> { + self.expr_call_global( + span, + [sym::std, sym::rt, sym::begin_panic].iter().map(|s| Ident::new(*s, span)).collect(), + vec![self.expr_str(span, msg)], + ) + } + + pub fn expr_unreachable(&self, span: Span) -> P<ast::Expr> { + self.expr_fail(span, Symbol::intern("internal error: entered unreachable code")) + } + + pub fn expr_ok(&self, sp: Span, expr: P<ast::Expr>) -> P<ast::Expr> { + let ok = self.std_path(&[sym::result, sym::Result, sym::Ok]); + self.expr_call_global(sp, ok, vec![expr]) + } + + pub fn expr_try(&self, sp: Span, head: P<ast::Expr>) -> P<ast::Expr> { + let ok = self.std_path(&[sym::result, sym::Result, sym::Ok]); + let ok_path = self.path_global(sp, ok); + let err = self.std_path(&[sym::result, sym::Result, sym::Err]); + let err_path = self.path_global(sp, err); + + let binding_variable = Ident::new(sym::__try_var, sp); + let binding_pat = self.pat_ident(sp, binding_variable); + let binding_expr = self.expr_ident(sp, binding_variable); + + // `Ok(__try_var)` pattern + let ok_pat = self.pat_tuple_struct(sp, ok_path, vec![binding_pat.clone()]); + + // `Err(__try_var)` (pattern and expression respectively) + let err_pat = self.pat_tuple_struct(sp, err_path.clone(), vec![binding_pat]); + let err_inner_expr = + self.expr_call(sp, self.expr_path(err_path), vec![binding_expr.clone()]); + // `return Err(__try_var)` + let err_expr = self.expr(sp, ast::ExprKind::Ret(Some(err_inner_expr))); + + // `Ok(__try_var) => __try_var` + let ok_arm = self.arm(sp, ok_pat, binding_expr); + // `Err(__try_var) => return Err(__try_var)` + let err_arm = self.arm(sp, err_pat, err_expr); + + // `match head { Ok() => ..., Err() => ... }` + self.expr_match(sp, head, vec![ok_arm, err_arm]) + } + + pub fn pat(&self, span: Span, kind: PatKind) -> P<ast::Pat> { + P(ast::Pat { id: ast::DUMMY_NODE_ID, kind, span, tokens: None }) + } + pub fn pat_wild(&self, span: Span) -> P<ast::Pat> { + self.pat(span, PatKind::Wild) + } + pub fn pat_lit(&self, span: Span, expr: P<ast::Expr>) -> P<ast::Pat> { + self.pat(span, PatKind::Lit(expr)) + } + pub fn pat_ident(&self, span: Span, ident: Ident) -> P<ast::Pat> { + let binding_mode = ast::BindingMode::ByValue(ast::Mutability::Not); + self.pat_ident_binding_mode(span, ident, binding_mode) + } + + pub fn pat_ident_binding_mode( + &self, + span: Span, + ident: Ident, + bm: ast::BindingMode, + ) -> P<ast::Pat> { + let pat = PatKind::Ident(bm, ident.with_span_pos(span), None); + self.pat(span, pat) + } + pub fn pat_path(&self, span: Span, path: ast::Path) -> P<ast::Pat> { + self.pat(span, PatKind::Path(None, path)) + } + pub fn pat_tuple_struct( + &self, + span: Span, + path: ast::Path, + subpats: Vec<P<ast::Pat>>, + ) -> P<ast::Pat> { + self.pat(span, PatKind::TupleStruct(None, path, subpats)) + } + pub fn pat_struct( + &self, + span: Span, + path: ast::Path, + field_pats: Vec<ast::PatField>, + ) -> P<ast::Pat> { + self.pat(span, PatKind::Struct(None, path, field_pats, false)) + } + pub fn pat_tuple(&self, span: Span, pats: Vec<P<ast::Pat>>) -> P<ast::Pat> { + self.pat(span, PatKind::Tuple(pats)) + } + + pub fn pat_some(&self, span: Span, pat: P<ast::Pat>) -> P<ast::Pat> { + let some = self.std_path(&[sym::option, sym::Option, sym::Some]); + let path = self.path_global(span, some); + self.pat_tuple_struct(span, path, vec![pat]) + } + + pub fn arm(&self, span: Span, pat: P<ast::Pat>, expr: P<ast::Expr>) -> ast::Arm { + ast::Arm { + attrs: AttrVec::new(), + pat, + guard: None, + body: expr, + span, + id: ast::DUMMY_NODE_ID, + is_placeholder: false, + } + } + + pub fn arm_unreachable(&self, span: Span) -> ast::Arm { + self.arm(span, self.pat_wild(span), self.expr_unreachable(span)) + } + + pub fn expr_match(&self, span: Span, arg: P<ast::Expr>, arms: Vec<ast::Arm>) -> P<Expr> { + self.expr(span, ast::ExprKind::Match(arg, arms)) + } + + pub fn expr_if( + &self, + span: Span, + cond: P<ast::Expr>, + then: P<ast::Expr>, + els: Option<P<ast::Expr>>, + ) -> P<ast::Expr> { + let els = els.map(|x| self.expr_block(self.block_expr(x))); + self.expr(span, ast::ExprKind::If(cond, self.block_expr(then), els)) + } + + pub fn lambda(&self, span: Span, ids: Vec<Ident>, body: P<ast::Expr>) -> P<ast::Expr> { + let fn_decl = self.fn_decl( + ids.iter().map(|id| self.param(span, *id, self.ty(span, ast::TyKind::Infer))).collect(), + ast::FnRetTy::Default(span), + ); + + // FIXME -- We are using `span` as the span of the `|...|` + // part of the lambda, but it probably (maybe?) corresponds to + // the entire lambda body. Probably we should extend the API + // here, but that's not entirely clear. + self.expr( + span, + ast::ExprKind::Closure( + ast::ClosureBinder::NotPresent, + ast::CaptureBy::Ref, + ast::Async::No, + ast::Movability::Movable, + fn_decl, + body, + span, + ), + ) + } + + pub fn lambda0(&self, span: Span, body: P<ast::Expr>) -> P<ast::Expr> { + self.lambda(span, Vec::new(), body) + } + + pub fn lambda1(&self, span: Span, body: P<ast::Expr>, ident: Ident) -> P<ast::Expr> { + self.lambda(span, vec![ident], body) + } + + pub fn lambda_stmts_1(&self, span: Span, stmts: Vec<ast::Stmt>, ident: Ident) -> P<ast::Expr> { + self.lambda1(span, self.expr_block(self.block(span, stmts)), ident) + } + + pub fn param(&self, span: Span, ident: Ident, ty: P<ast::Ty>) -> ast::Param { + let arg_pat = self.pat_ident(span, ident); + ast::Param { + attrs: AttrVec::default(), + id: ast::DUMMY_NODE_ID, + pat: arg_pat, + span, + ty, + is_placeholder: false, + } + } + + // `self` is unused but keep it as method for the convenience use. + pub fn fn_decl(&self, inputs: Vec<ast::Param>, output: ast::FnRetTy) -> P<ast::FnDecl> { + P(ast::FnDecl { inputs, output }) + } + + pub fn item( + &self, + span: Span, + name: Ident, + attrs: Vec<ast::Attribute>, + kind: ast::ItemKind, + ) -> P<ast::Item> { + // FIXME: Would be nice if our generated code didn't violate + // Rust coding conventions + P(ast::Item { + ident: name, + attrs, + id: ast::DUMMY_NODE_ID, + kind, + vis: ast::Visibility { + span: span.shrink_to_lo(), + kind: ast::VisibilityKind::Inherited, + tokens: None, + }, + span, + tokens: None, + }) + } + + pub fn item_static( + &self, + span: Span, + name: Ident, + ty: P<ast::Ty>, + mutbl: ast::Mutability, + expr: P<ast::Expr>, + ) -> P<ast::Item> { + self.item(span, name, Vec::new(), ast::ItemKind::Static(ty, mutbl, Some(expr))) + } + + pub fn item_const( + &self, + span: Span, + name: Ident, + ty: P<ast::Ty>, + expr: P<ast::Expr>, + ) -> P<ast::Item> { + let def = ast::Defaultness::Final; + self.item(span, name, Vec::new(), ast::ItemKind::Const(def, ty, Some(expr))) + } + + pub fn attribute(&self, mi: ast::MetaItem) -> ast::Attribute { + attr::mk_attr_outer(mi) + } + + pub fn meta_word(&self, sp: Span, w: Symbol) -> ast::MetaItem { + attr::mk_word_item(Ident::new(w, sp)) + } +} diff --git a/compiler/rustc_expand/src/config.rs b/compiler/rustc_expand/src/config.rs new file mode 100644 index 000000000..3e1acf438 --- /dev/null +++ b/compiler/rustc_expand/src/config.rs @@ -0,0 +1,535 @@ +//! Conditional compilation stripping. + +use rustc_ast::ptr::P; +use rustc_ast::token::{Delimiter, Token, TokenKind}; +use rustc_ast::tokenstream::{AttrAnnotatedTokenStream, AttrAnnotatedTokenTree}; +use rustc_ast::tokenstream::{DelimSpan, Spacing}; +use rustc_ast::tokenstream::{LazyTokenStream, TokenTree}; +use rustc_ast::NodeId; +use rustc_ast::{self as ast, AttrStyle, Attribute, HasAttrs, HasTokens, MetaItem}; +use rustc_attr as attr; +use rustc_data_structures::fx::FxHashMap; +use rustc_data_structures::map_in_place::MapInPlace; +use rustc_errors::{error_code, struct_span_err, Applicability, Handler}; +use rustc_feature::{Feature, Features, State as FeatureState}; +use rustc_feature::{ + ACCEPTED_FEATURES, ACTIVE_FEATURES, REMOVED_FEATURES, STABLE_REMOVED_FEATURES, +}; +use rustc_parse::validate_attr; +use rustc_session::parse::feature_err; +use rustc_session::Session; +use rustc_span::edition::{Edition, ALL_EDITIONS}; +use rustc_span::symbol::{sym, Symbol}; +use rustc_span::{Span, DUMMY_SP}; + +/// A folder that strips out items that do not belong in the current configuration. +pub struct StripUnconfigured<'a> { + pub sess: &'a Session, + pub features: Option<&'a Features>, + /// If `true`, perform cfg-stripping on attached tokens. + /// This is only used for the input to derive macros, + /// which needs eager expansion of `cfg` and `cfg_attr` + pub config_tokens: bool, + pub lint_node_id: NodeId, +} + +fn get_features( + sess: &Session, + span_handler: &Handler, + krate_attrs: &[ast::Attribute], +) -> Features { + fn feature_removed(span_handler: &Handler, span: Span, reason: Option<&str>) { + let mut err = struct_span_err!(span_handler, span, E0557, "feature has been removed"); + err.span_label(span, "feature has been removed"); + if let Some(reason) = reason { + err.note(reason); + } + err.emit(); + } + + fn active_features_up_to(edition: Edition) -> impl Iterator<Item = &'static Feature> { + ACTIVE_FEATURES.iter().filter(move |feature| { + if let Some(feature_edition) = feature.edition { + feature_edition <= edition + } else { + false + } + }) + } + + let mut features = Features::default(); + let mut edition_enabled_features = FxHashMap::default(); + let crate_edition = sess.edition(); + + for &edition in ALL_EDITIONS { + if edition <= crate_edition { + // The `crate_edition` implies its respective umbrella feature-gate + // (i.e., `#![feature(rust_20XX_preview)]` isn't needed on edition 20XX). + edition_enabled_features.insert(edition.feature_name(), edition); + } + } + + for feature in active_features_up_to(crate_edition) { + feature.set(&mut features, DUMMY_SP); + edition_enabled_features.insert(feature.name, crate_edition); + } + + // Process the edition umbrella feature-gates first, to ensure + // `edition_enabled_features` is completed before it's queried. + for attr in krate_attrs { + if !attr.has_name(sym::feature) { + continue; + } + + let Some(list) = attr.meta_item_list() else { + continue; + }; + + for mi in list { + if !mi.is_word() { + continue; + } + + let name = mi.name_or_empty(); + + let edition = ALL_EDITIONS.iter().find(|e| name == e.feature_name()).copied(); + if let Some(edition) = edition { + if edition <= crate_edition { + continue; + } + + for feature in active_features_up_to(edition) { + // FIXME(Manishearth) there is currently no way to set + // lib features by edition + feature.set(&mut features, DUMMY_SP); + edition_enabled_features.insert(feature.name, edition); + } + } + } + } + + for attr in krate_attrs { + if !attr.has_name(sym::feature) { + continue; + } + + let Some(list) = attr.meta_item_list() else { + continue; + }; + + let bad_input = |span| { + struct_span_err!(span_handler, span, E0556, "malformed `feature` attribute input") + }; + + for mi in list { + let name = match mi.ident() { + Some(ident) if mi.is_word() => ident.name, + Some(ident) => { + bad_input(mi.span()) + .span_suggestion( + mi.span(), + "expected just one word", + ident.name, + Applicability::MaybeIncorrect, + ) + .emit(); + continue; + } + None => { + bad_input(mi.span()).span_label(mi.span(), "expected just one word").emit(); + continue; + } + }; + + if let Some(edition) = edition_enabled_features.get(&name) { + let msg = + &format!("the feature `{}` is included in the Rust {} edition", name, edition); + span_handler.struct_span_warn_with_code(mi.span(), msg, error_code!(E0705)).emit(); + continue; + } + + if ALL_EDITIONS.iter().any(|e| name == e.feature_name()) { + // Handled in the separate loop above. + continue; + } + + let removed = REMOVED_FEATURES.iter().find(|f| name == f.name); + let stable_removed = STABLE_REMOVED_FEATURES.iter().find(|f| name == f.name); + if let Some(Feature { state, .. }) = removed.or(stable_removed) { + if let FeatureState::Removed { reason } | FeatureState::Stabilized { reason } = + state + { + feature_removed(span_handler, mi.span(), *reason); + continue; + } + } + + if let Some(Feature { since, .. }) = ACCEPTED_FEATURES.iter().find(|f| name == f.name) { + let since = Some(Symbol::intern(since)); + features.declared_lang_features.push((name, mi.span(), since)); + features.active_features.insert(name); + continue; + } + + if let Some(allowed) = sess.opts.unstable_opts.allow_features.as_ref() { + if allowed.iter().all(|f| name.as_str() != f) { + struct_span_err!( + span_handler, + mi.span(), + E0725, + "the feature `{}` is not in the list of allowed features", + name + ) + .emit(); + continue; + } + } + + if let Some(f) = ACTIVE_FEATURES.iter().find(|f| name == f.name) { + f.set(&mut features, mi.span()); + features.declared_lang_features.push((name, mi.span(), None)); + features.active_features.insert(name); + continue; + } + + features.declared_lib_features.push((name, mi.span())); + features.active_features.insert(name); + } + } + + features +} + +// `cfg_attr`-process the crate's attributes and compute the crate's features. +pub fn features( + sess: &Session, + mut krate: ast::Crate, + lint_node_id: NodeId, +) -> (ast::Crate, Features) { + let mut strip_unconfigured = + StripUnconfigured { sess, features: None, config_tokens: false, lint_node_id }; + + let unconfigured_attrs = krate.attrs.clone(); + let diag = &sess.parse_sess.span_diagnostic; + let err_count = diag.err_count(); + let features = match strip_unconfigured.configure_krate_attrs(krate.attrs) { + None => { + // The entire crate is unconfigured. + krate.attrs = Vec::new(); + krate.items = Vec::new(); + Features::default() + } + Some(attrs) => { + krate.attrs = attrs; + let features = get_features(sess, diag, &krate.attrs); + if err_count == diag.err_count() { + // Avoid reconfiguring malformed `cfg_attr`s. + strip_unconfigured.features = Some(&features); + // Run configuration again, this time with features available + // so that we can perform feature-gating. + strip_unconfigured.configure_krate_attrs(unconfigured_attrs); + } + features + } + }; + (krate, features) +} + +#[macro_export] +macro_rules! configure { + ($this:ident, $node:ident) => { + match $this.configure($node) { + Some(node) => node, + None => return Default::default(), + } + }; +} + +impl<'a> StripUnconfigured<'a> { + pub fn configure<T: HasAttrs + HasTokens>(&self, mut node: T) -> Option<T> { + self.process_cfg_attrs(&mut node); + if self.in_cfg(node.attrs()) { + self.try_configure_tokens(&mut node); + Some(node) + } else { + None + } + } + + fn try_configure_tokens<T: HasTokens>(&self, node: &mut T) { + if self.config_tokens { + if let Some(Some(tokens)) = node.tokens_mut() { + let attr_annotated_tokens = tokens.create_token_stream(); + *tokens = LazyTokenStream::new(self.configure_tokens(&attr_annotated_tokens)); + } + } + } + + fn configure_krate_attrs(&self, mut attrs: Vec<ast::Attribute>) -> Option<Vec<ast::Attribute>> { + attrs.flat_map_in_place(|attr| self.process_cfg_attr(attr)); + if self.in_cfg(&attrs) { Some(attrs) } else { None } + } + + /// Performs cfg-expansion on `stream`, producing a new `AttrAnnotatedTokenStream`. + /// This is only used during the invocation of `derive` proc-macros, + /// which require that we cfg-expand their entire input. + /// Normal cfg-expansion operates on parsed AST nodes via the `configure` method + fn configure_tokens(&self, stream: &AttrAnnotatedTokenStream) -> AttrAnnotatedTokenStream { + fn can_skip(stream: &AttrAnnotatedTokenStream) -> bool { + stream.0.iter().all(|(tree, _spacing)| match tree { + AttrAnnotatedTokenTree::Attributes(_) => false, + AttrAnnotatedTokenTree::Token(_) => true, + AttrAnnotatedTokenTree::Delimited(_, _, inner) => can_skip(inner), + }) + } + + if can_skip(stream) { + return stream.clone(); + } + + let trees: Vec<_> = stream + .0 + .iter() + .flat_map(|(tree, spacing)| match tree.clone() { + AttrAnnotatedTokenTree::Attributes(mut data) => { + let mut attrs: Vec<_> = std::mem::take(&mut data.attrs).into(); + attrs.flat_map_in_place(|attr| self.process_cfg_attr(attr)); + data.attrs = attrs.into(); + + if self.in_cfg(&data.attrs) { + data.tokens = LazyTokenStream::new( + self.configure_tokens(&data.tokens.create_token_stream()), + ); + Some((AttrAnnotatedTokenTree::Attributes(data), *spacing)).into_iter() + } else { + None.into_iter() + } + } + AttrAnnotatedTokenTree::Delimited(sp, delim, mut inner) => { + inner = self.configure_tokens(&inner); + Some((AttrAnnotatedTokenTree::Delimited(sp, delim, inner), *spacing)) + .into_iter() + } + AttrAnnotatedTokenTree::Token(ref token) if let TokenKind::Interpolated(ref nt) = token.kind => { + panic!( + "Nonterminal should have been flattened at {:?}: {:?}", + token.span, nt + ); + } + AttrAnnotatedTokenTree::Token(token) => { + Some((AttrAnnotatedTokenTree::Token(token), *spacing)).into_iter() + } + }) + .collect(); + AttrAnnotatedTokenStream::new(trees) + } + + /// Parse and expand all `cfg_attr` attributes into a list of attributes + /// that are within each `cfg_attr` that has a true configuration predicate. + /// + /// Gives compiler warnings if any `cfg_attr` does not contain any + /// attributes and is in the original source code. Gives compiler errors if + /// the syntax of any `cfg_attr` is incorrect. + fn process_cfg_attrs<T: HasAttrs>(&self, node: &mut T) { + node.visit_attrs(|attrs| { + attrs.flat_map_in_place(|attr| self.process_cfg_attr(attr)); + }); + } + + fn process_cfg_attr(&self, attr: Attribute) -> Vec<Attribute> { + if attr.has_name(sym::cfg_attr) { self.expand_cfg_attr(attr, true) } else { vec![attr] } + } + + /// Parse and expand a single `cfg_attr` attribute into a list of attributes + /// when the configuration predicate is true, or otherwise expand into an + /// empty list of attributes. + /// + /// Gives a compiler warning when the `cfg_attr` contains no attributes and + /// is in the original source file. Gives a compiler error if the syntax of + /// the attribute is incorrect. + pub(crate) fn expand_cfg_attr(&self, attr: Attribute, recursive: bool) -> Vec<Attribute> { + let Some((cfg_predicate, expanded_attrs)) = + rustc_parse::parse_cfg_attr(&attr, &self.sess.parse_sess) else { + return vec![]; + }; + + // Lint on zero attributes in source. + if expanded_attrs.is_empty() { + self.sess.parse_sess.buffer_lint( + rustc_lint_defs::builtin::UNUSED_ATTRIBUTES, + attr.span, + ast::CRATE_NODE_ID, + "`#[cfg_attr]` does not expand to any attributes", + ); + } + + if !attr::cfg_matches( + &cfg_predicate, + &self.sess.parse_sess, + self.lint_node_id, + self.features, + ) { + return vec![]; + } + + if recursive { + // We call `process_cfg_attr` recursively in case there's a + // `cfg_attr` inside of another `cfg_attr`. E.g. + // `#[cfg_attr(false, cfg_attr(true, some_attr))]`. + expanded_attrs + .into_iter() + .flat_map(|item| self.process_cfg_attr(self.expand_cfg_attr_item(&attr, item))) + .collect() + } else { + expanded_attrs.into_iter().map(|item| self.expand_cfg_attr_item(&attr, item)).collect() + } + } + + fn expand_cfg_attr_item( + &self, + attr: &Attribute, + (item, item_span): (ast::AttrItem, Span), + ) -> Attribute { + let orig_tokens = attr.tokens().to_tokenstream(); + + // We are taking an attribute of the form `#[cfg_attr(pred, attr)]` + // and producing an attribute of the form `#[attr]`. We + // have captured tokens for `attr` itself, but we need to + // synthesize tokens for the wrapper `#` and `[]`, which + // we do below. + + // Use the `#` in `#[cfg_attr(pred, attr)]` as the `#` token + // for `attr` when we expand it to `#[attr]` + let mut orig_trees = orig_tokens.into_trees(); + let TokenTree::Token(pound_token @ Token { kind: TokenKind::Pound, .. }, _) = orig_trees.next().unwrap() else { + panic!("Bad tokens for attribute {:?}", attr); + }; + let pound_span = pound_token.span; + + let mut trees = vec![(AttrAnnotatedTokenTree::Token(pound_token), Spacing::Alone)]; + if attr.style == AttrStyle::Inner { + // For inner attributes, we do the same thing for the `!` in `#![some_attr]` + let TokenTree::Token(bang_token @ Token { kind: TokenKind::Not, .. }, _) = orig_trees.next().unwrap() else { + panic!("Bad tokens for attribute {:?}", attr); + }; + trees.push((AttrAnnotatedTokenTree::Token(bang_token), Spacing::Alone)); + } + // We don't really have a good span to use for the synthesized `[]` + // in `#[attr]`, so just use the span of the `#` token. + let bracket_group = AttrAnnotatedTokenTree::Delimited( + DelimSpan::from_single(pound_span), + Delimiter::Bracket, + item.tokens + .as_ref() + .unwrap_or_else(|| panic!("Missing tokens for {:?}", item)) + .create_token_stream(), + ); + trees.push((bracket_group, Spacing::Alone)); + let tokens = Some(LazyTokenStream::new(AttrAnnotatedTokenStream::new(trees))); + let attr = attr::mk_attr_from_item(item, tokens, attr.style, item_span); + if attr.has_name(sym::crate_type) { + self.sess.parse_sess.buffer_lint( + rustc_lint_defs::builtin::DEPRECATED_CFG_ATTR_CRATE_TYPE_NAME, + attr.span, + ast::CRATE_NODE_ID, + "`crate_type` within an `#![cfg_attr] attribute is deprecated`", + ); + } + if attr.has_name(sym::crate_name) { + self.sess.parse_sess.buffer_lint( + rustc_lint_defs::builtin::DEPRECATED_CFG_ATTR_CRATE_TYPE_NAME, + attr.span, + ast::CRATE_NODE_ID, + "`crate_name` within an `#![cfg_attr] attribute is deprecated`", + ); + } + attr + } + + /// Determines if a node with the given attributes should be included in this configuration. + fn in_cfg(&self, attrs: &[Attribute]) -> bool { + attrs.iter().all(|attr| !is_cfg(attr) || self.cfg_true(attr)) + } + + pub(crate) fn cfg_true(&self, attr: &Attribute) -> bool { + let meta_item = match validate_attr::parse_meta(&self.sess.parse_sess, attr) { + Ok(meta_item) => meta_item, + Err(mut err) => { + err.emit(); + return true; + } + }; + parse_cfg(&meta_item, &self.sess).map_or(true, |meta_item| { + attr::cfg_matches(&meta_item, &self.sess.parse_sess, self.lint_node_id, self.features) + }) + } + + /// If attributes are not allowed on expressions, emit an error for `attr` + pub(crate) fn maybe_emit_expr_attr_err(&self, attr: &Attribute) { + if !self.features.map_or(true, |features| features.stmt_expr_attributes) { + let mut err = feature_err( + &self.sess.parse_sess, + sym::stmt_expr_attributes, + attr.span, + "attributes on expressions are experimental", + ); + + if attr.is_doc_comment() { + err.help("`///` is for documentation comments. For a plain comment, use `//`."); + } + + err.emit(); + } + } + + pub fn configure_expr(&self, expr: &mut P<ast::Expr>) { + for attr in expr.attrs.iter() { + self.maybe_emit_expr_attr_err(attr); + } + + // If an expr is valid to cfg away it will have been removed by the + // outer stmt or expression folder before descending in here. + // Anything else is always required, and thus has to error out + // in case of a cfg attr. + // + // N.B., this is intentionally not part of the visit_expr() function + // in order for filter_map_expr() to be able to avoid this check + if let Some(attr) = expr.attrs().iter().find(|a| is_cfg(*a)) { + let msg = "removing an expression is not supported in this position"; + self.sess.parse_sess.span_diagnostic.span_err(attr.span, msg); + } + + self.process_cfg_attrs(expr); + self.try_configure_tokens(&mut *expr); + } +} + +pub fn parse_cfg<'a>(meta_item: &'a MetaItem, sess: &Session) -> Option<&'a MetaItem> { + let error = |span, msg, suggestion: &str| { + let mut err = sess.parse_sess.span_diagnostic.struct_span_err(span, msg); + if !suggestion.is_empty() { + err.span_suggestion( + span, + "expected syntax is", + suggestion, + Applicability::HasPlaceholders, + ); + } + err.emit(); + None + }; + let span = meta_item.span; + match meta_item.meta_item_list() { + None => error(span, "`cfg` is not followed by parentheses", "cfg(/* predicate */)"), + Some([]) => error(span, "`cfg` predicate is not specified", ""), + Some([_, .., l]) => error(l.span(), "multiple `cfg` predicates are specified", ""), + Some([single]) => match single.meta_item() { + Some(meta_item) => Some(meta_item), + None => error(single.span(), "`cfg` predicate key cannot be a literal", ""), + }, + } +} + +fn is_cfg(attr: &Attribute) -> bool { + attr.has_name(sym::cfg) +} diff --git a/compiler/rustc_expand/src/expand.rs b/compiler/rustc_expand/src/expand.rs new file mode 100644 index 000000000..93eeca5b2 --- /dev/null +++ b/compiler/rustc_expand/src/expand.rs @@ -0,0 +1,1888 @@ +use crate::base::*; +use crate::config::StripUnconfigured; +use crate::hygiene::SyntaxContext; +use crate::mbe::macro_rules::annotate_err_with_kind; +use crate::module::{mod_dir_path, parse_external_mod, DirOwnership, ParsedExternalMod}; +use crate::placeholders::{placeholder, PlaceholderExpander}; + +use rustc_ast as ast; +use rustc_ast::mut_visit::*; +use rustc_ast::ptr::P; +use rustc_ast::token::{self, Delimiter}; +use rustc_ast::tokenstream::TokenStream; +use rustc_ast::visit::{self, AssocCtxt, Visitor}; +use rustc_ast::{AssocItemKind, AstNodeWrapper, AttrStyle, ExprKind, ForeignItemKind}; +use rustc_ast::{HasAttrs, HasNodeId}; +use rustc_ast::{Inline, ItemKind, MacArgs, MacStmtStyle, MetaItemKind, ModKind}; +use rustc_ast::{NestedMetaItem, NodeId, PatKind, StmtKind, TyKind}; +use rustc_ast_pretty::pprust; +use rustc_data_structures::map_in_place::MapInPlace; +use rustc_data_structures::sync::Lrc; +use rustc_errors::{Applicability, PResult}; +use rustc_feature::Features; +use rustc_parse::parser::{ + AttemptLocalParseRecovery, CommaRecoveryMode, ForceCollect, Parser, RecoverColon, RecoverComma, +}; +use rustc_parse::validate_attr; +use rustc_session::lint::builtin::{UNUSED_ATTRIBUTES, UNUSED_DOC_COMMENTS}; +use rustc_session::lint::BuiltinLintDiagnostics; +use rustc_session::parse::{feature_err, ParseSess}; +use rustc_session::Limit; +use rustc_span::symbol::{sym, Ident}; +use rustc_span::{FileName, LocalExpnId, Span}; + +use smallvec::SmallVec; +use std::ops::Deref; +use std::path::PathBuf; +use std::rc::Rc; +use std::{iter, mem}; + +macro_rules! ast_fragments { + ( + $($Kind:ident($AstTy:ty) { + $kind_name:expr; + $(one fn $mut_visit_ast:ident; fn $visit_ast:ident;)? + $(many fn $flat_map_ast_elt:ident; fn $visit_ast_elt:ident($($args:tt)*);)? + fn $make_ast:ident; + })* + ) => { + /// A fragment of AST that can be produced by a single macro expansion. + /// Can also serve as an input and intermediate result for macro expansion operations. + pub enum AstFragment { + OptExpr(Option<P<ast::Expr>>), + $($Kind($AstTy),)* + } + + /// "Discriminant" of an AST fragment. + #[derive(Copy, Clone, PartialEq, Eq)] + pub enum AstFragmentKind { + OptExpr, + $($Kind,)* + } + + impl AstFragmentKind { + pub fn name(self) -> &'static str { + match self { + AstFragmentKind::OptExpr => "expression", + $(AstFragmentKind::$Kind => $kind_name,)* + } + } + + fn make_from<'a>(self, result: Box<dyn MacResult + 'a>) -> Option<AstFragment> { + match self { + AstFragmentKind::OptExpr => + result.make_expr().map(Some).map(AstFragment::OptExpr), + $(AstFragmentKind::$Kind => result.$make_ast().map(AstFragment::$Kind),)* + } + } + } + + impl AstFragment { + pub fn add_placeholders(&mut self, placeholders: &[NodeId]) { + if placeholders.is_empty() { + return; + } + match self { + $($(AstFragment::$Kind(ast) => ast.extend(placeholders.iter().flat_map(|id| { + ${ignore(flat_map_ast_elt)} + placeholder(AstFragmentKind::$Kind, *id, None).$make_ast() + })),)?)* + _ => panic!("unexpected AST fragment kind") + } + } + + pub fn make_opt_expr(self) -> Option<P<ast::Expr>> { + match self { + AstFragment::OptExpr(expr) => expr, + _ => panic!("AstFragment::make_* called on the wrong kind of fragment"), + } + } + + $(pub fn $make_ast(self) -> $AstTy { + match self { + AstFragment::$Kind(ast) => ast, + _ => panic!("AstFragment::make_* called on the wrong kind of fragment"), + } + })* + + fn make_ast<T: InvocationCollectorNode>(self) -> T::OutputTy { + T::fragment_to_output(self) + } + + pub fn mut_visit_with<F: MutVisitor>(&mut self, vis: &mut F) { + match self { + AstFragment::OptExpr(opt_expr) => { + visit_clobber(opt_expr, |opt_expr| { + if let Some(expr) = opt_expr { + vis.filter_map_expr(expr) + } else { + None + } + }); + } + $($(AstFragment::$Kind(ast) => vis.$mut_visit_ast(ast),)?)* + $($(AstFragment::$Kind(ast) => + ast.flat_map_in_place(|ast| vis.$flat_map_ast_elt(ast)),)?)* + } + } + + pub fn visit_with<'a, V: Visitor<'a>>(&'a self, visitor: &mut V) { + match *self { + AstFragment::OptExpr(Some(ref expr)) => visitor.visit_expr(expr), + AstFragment::OptExpr(None) => {} + $($(AstFragment::$Kind(ref ast) => visitor.$visit_ast(ast),)?)* + $($(AstFragment::$Kind(ref ast) => for ast_elt in &ast[..] { + visitor.$visit_ast_elt(ast_elt, $($args)*); + })?)* + } + } + } + + impl<'a> MacResult for crate::mbe::macro_rules::ParserAnyMacro<'a> { + $(fn $make_ast(self: Box<crate::mbe::macro_rules::ParserAnyMacro<'a>>) + -> Option<$AstTy> { + Some(self.make(AstFragmentKind::$Kind).$make_ast()) + })* + } + } +} + +ast_fragments! { + Expr(P<ast::Expr>) { "expression"; one fn visit_expr; fn visit_expr; fn make_expr; } + Pat(P<ast::Pat>) { "pattern"; one fn visit_pat; fn visit_pat; fn make_pat; } + Ty(P<ast::Ty>) { "type"; one fn visit_ty; fn visit_ty; fn make_ty; } + Stmts(SmallVec<[ast::Stmt; 1]>) { + "statement"; many fn flat_map_stmt; fn visit_stmt(); fn make_stmts; + } + Items(SmallVec<[P<ast::Item>; 1]>) { + "item"; many fn flat_map_item; fn visit_item(); fn make_items; + } + TraitItems(SmallVec<[P<ast::AssocItem>; 1]>) { + "trait item"; + many fn flat_map_trait_item; + fn visit_assoc_item(AssocCtxt::Trait); + fn make_trait_items; + } + ImplItems(SmallVec<[P<ast::AssocItem>; 1]>) { + "impl item"; + many fn flat_map_impl_item; + fn visit_assoc_item(AssocCtxt::Impl); + fn make_impl_items; + } + ForeignItems(SmallVec<[P<ast::ForeignItem>; 1]>) { + "foreign item"; + many fn flat_map_foreign_item; + fn visit_foreign_item(); + fn make_foreign_items; + } + Arms(SmallVec<[ast::Arm; 1]>) { + "match arm"; many fn flat_map_arm; fn visit_arm(); fn make_arms; + } + ExprFields(SmallVec<[ast::ExprField; 1]>) { + "field expression"; many fn flat_map_expr_field; fn visit_expr_field(); fn make_expr_fields; + } + PatFields(SmallVec<[ast::PatField; 1]>) { + "field pattern"; + many fn flat_map_pat_field; + fn visit_pat_field(); + fn make_pat_fields; + } + GenericParams(SmallVec<[ast::GenericParam; 1]>) { + "generic parameter"; + many fn flat_map_generic_param; + fn visit_generic_param(); + fn make_generic_params; + } + Params(SmallVec<[ast::Param; 1]>) { + "function parameter"; many fn flat_map_param; fn visit_param(); fn make_params; + } + FieldDefs(SmallVec<[ast::FieldDef; 1]>) { + "field"; + many fn flat_map_field_def; + fn visit_field_def(); + fn make_field_defs; + } + Variants(SmallVec<[ast::Variant; 1]>) { + "variant"; many fn flat_map_variant; fn visit_variant(); fn make_variants; + } + Crate(ast::Crate) { "crate"; one fn visit_crate; fn visit_crate; fn make_crate; } +} + +pub enum SupportsMacroExpansion { + No, + Yes { supports_inner_attrs: bool }, +} + +impl AstFragmentKind { + pub(crate) fn dummy(self, span: Span) -> AstFragment { + self.make_from(DummyResult::any(span)).expect("couldn't create a dummy AST fragment") + } + + pub fn supports_macro_expansion(self) -> SupportsMacroExpansion { + match self { + AstFragmentKind::OptExpr + | AstFragmentKind::Expr + | AstFragmentKind::Stmts + | AstFragmentKind::Ty + | AstFragmentKind::Pat => SupportsMacroExpansion::Yes { supports_inner_attrs: false }, + AstFragmentKind::Items + | AstFragmentKind::TraitItems + | AstFragmentKind::ImplItems + | AstFragmentKind::ForeignItems + | AstFragmentKind::Crate => SupportsMacroExpansion::Yes { supports_inner_attrs: true }, + AstFragmentKind::Arms + | AstFragmentKind::ExprFields + | AstFragmentKind::PatFields + | AstFragmentKind::GenericParams + | AstFragmentKind::Params + | AstFragmentKind::FieldDefs + | AstFragmentKind::Variants => SupportsMacroExpansion::No, + } + } + + fn expect_from_annotatables<I: IntoIterator<Item = Annotatable>>( + self, + items: I, + ) -> AstFragment { + let mut items = items.into_iter(); + match self { + AstFragmentKind::Arms => { + AstFragment::Arms(items.map(Annotatable::expect_arm).collect()) + } + AstFragmentKind::ExprFields => { + AstFragment::ExprFields(items.map(Annotatable::expect_expr_field).collect()) + } + AstFragmentKind::PatFields => { + AstFragment::PatFields(items.map(Annotatable::expect_pat_field).collect()) + } + AstFragmentKind::GenericParams => { + AstFragment::GenericParams(items.map(Annotatable::expect_generic_param).collect()) + } + AstFragmentKind::Params => { + AstFragment::Params(items.map(Annotatable::expect_param).collect()) + } + AstFragmentKind::FieldDefs => { + AstFragment::FieldDefs(items.map(Annotatable::expect_field_def).collect()) + } + AstFragmentKind::Variants => { + AstFragment::Variants(items.map(Annotatable::expect_variant).collect()) + } + AstFragmentKind::Items => { + AstFragment::Items(items.map(Annotatable::expect_item).collect()) + } + AstFragmentKind::ImplItems => { + AstFragment::ImplItems(items.map(Annotatable::expect_impl_item).collect()) + } + AstFragmentKind::TraitItems => { + AstFragment::TraitItems(items.map(Annotatable::expect_trait_item).collect()) + } + AstFragmentKind::ForeignItems => { + AstFragment::ForeignItems(items.map(Annotatable::expect_foreign_item).collect()) + } + AstFragmentKind::Stmts => { + AstFragment::Stmts(items.map(Annotatable::expect_stmt).collect()) + } + AstFragmentKind::Expr => AstFragment::Expr( + items.next().expect("expected exactly one expression").expect_expr(), + ), + AstFragmentKind::OptExpr => { + AstFragment::OptExpr(items.next().map(Annotatable::expect_expr)) + } + AstFragmentKind::Crate => { + AstFragment::Crate(items.next().expect("expected exactly one crate").expect_crate()) + } + AstFragmentKind::Pat | AstFragmentKind::Ty => { + panic!("patterns and types aren't annotatable") + } + } + } +} + +pub struct Invocation { + pub kind: InvocationKind, + pub fragment_kind: AstFragmentKind, + pub expansion_data: ExpansionData, +} + +pub enum InvocationKind { + Bang { + mac: ast::MacCall, + span: Span, + }, + Attr { + attr: ast::Attribute, + // Re-insertion position for inert attributes. + pos: usize, + item: Annotatable, + // Required for resolving derive helper attributes. + derives: Vec<ast::Path>, + }, + Derive { + path: ast::Path, + item: Annotatable, + }, +} + +impl InvocationKind { + fn placeholder_visibility(&self) -> Option<ast::Visibility> { + // HACK: For unnamed fields placeholders should have the same visibility as the actual + // fields because for tuple structs/variants resolve determines visibilities of their + // constructor using these field visibilities before attributes on them are are expanded. + // The assumption is that the attribute expansion cannot change field visibilities, + // and it holds because only inert attributes are supported in this position. + match self { + InvocationKind::Attr { item: Annotatable::FieldDef(field), .. } + | InvocationKind::Derive { item: Annotatable::FieldDef(field), .. } + if field.ident.is_none() => + { + Some(field.vis.clone()) + } + _ => None, + } + } +} + +impl Invocation { + pub fn span(&self) -> Span { + match &self.kind { + InvocationKind::Bang { span, .. } => *span, + InvocationKind::Attr { attr, .. } => attr.span, + InvocationKind::Derive { path, .. } => path.span, + } + } +} + +pub struct MacroExpander<'a, 'b> { + pub cx: &'a mut ExtCtxt<'b>, + monotonic: bool, // cf. `cx.monotonic_expander()` +} + +impl<'a, 'b> MacroExpander<'a, 'b> { + pub fn new(cx: &'a mut ExtCtxt<'b>, monotonic: bool) -> Self { + MacroExpander { cx, monotonic } + } + + pub fn expand_crate(&mut self, krate: ast::Crate) -> ast::Crate { + let file_path = match self.cx.source_map().span_to_filename(krate.spans.inner_span) { + FileName::Real(name) => name + .into_local_path() + .expect("attempting to resolve a file path in an external file"), + other => PathBuf::from(other.prefer_local().to_string()), + }; + let dir_path = file_path.parent().unwrap_or(&file_path).to_owned(); + self.cx.root_path = dir_path.clone(); + self.cx.current_expansion.module = Rc::new(ModuleData { + mod_path: vec![Ident::from_str(&self.cx.ecfg.crate_name)], + file_path_stack: vec![file_path], + dir_path, + }); + let krate = self.fully_expand_fragment(AstFragment::Crate(krate)).make_crate(); + assert_eq!(krate.id, ast::CRATE_NODE_ID); + self.cx.trace_macros_diag(); + krate + } + + // Recursively expand all macro invocations in this AST fragment. + pub fn fully_expand_fragment(&mut self, input_fragment: AstFragment) -> AstFragment { + let orig_expansion_data = self.cx.current_expansion.clone(); + let orig_force_mode = self.cx.force_mode; + + // Collect all macro invocations and replace them with placeholders. + let (mut fragment_with_placeholders, mut invocations) = + self.collect_invocations(input_fragment, &[]); + + // Optimization: if we resolve all imports now, + // we'll be able to immediately resolve most of imported macros. + self.resolve_imports(); + + // Resolve paths in all invocations and produce output expanded fragments for them, but + // do not insert them into our input AST fragment yet, only store in `expanded_fragments`. + // The output fragments also go through expansion recursively until no invocations are left. + // Unresolved macros produce dummy outputs as a recovery measure. + invocations.reverse(); + let mut expanded_fragments = Vec::new(); + let mut undetermined_invocations = Vec::new(); + let (mut progress, mut force) = (false, !self.monotonic); + loop { + let Some((invoc, ext)) = invocations.pop() else { + self.resolve_imports(); + if undetermined_invocations.is_empty() { + break; + } + invocations = mem::take(&mut undetermined_invocations); + force = !mem::replace(&mut progress, false); + if force && self.monotonic { + self.cx.sess.delay_span_bug( + invocations.last().unwrap().0.span(), + "expansion entered force mode without producing any errors", + ); + } + continue; + }; + + let ext = match ext { + Some(ext) => ext, + None => { + let eager_expansion_root = if self.monotonic { + invoc.expansion_data.id + } else { + orig_expansion_data.id + }; + match self.cx.resolver.resolve_macro_invocation( + &invoc, + eager_expansion_root, + force, + ) { + Ok(ext) => ext, + Err(Indeterminate) => { + // Cannot resolve, will retry this invocation later. + undetermined_invocations.push((invoc, None)); + continue; + } + } + } + }; + + let ExpansionData { depth, id: expn_id, .. } = invoc.expansion_data; + let depth = depth - orig_expansion_data.depth; + self.cx.current_expansion = invoc.expansion_data.clone(); + self.cx.force_mode = force; + + let fragment_kind = invoc.fragment_kind; + let (expanded_fragment, new_invocations) = match self.expand_invoc(invoc, &ext.kind) { + ExpandResult::Ready(fragment) => { + let mut derive_invocations = Vec::new(); + let derive_placeholders = self + .cx + .resolver + .take_derive_resolutions(expn_id) + .map(|derives| { + derive_invocations.reserve(derives.len()); + derives + .into_iter() + .map(|(path, item, _exts)| { + // FIXME: Consider using the derive resolutions (`_exts`) + // instead of enqueuing the derives to be resolved again later. + let expn_id = LocalExpnId::fresh_empty(); + derive_invocations.push(( + Invocation { + kind: InvocationKind::Derive { path, item }, + fragment_kind, + expansion_data: ExpansionData { + id: expn_id, + ..self.cx.current_expansion.clone() + }, + }, + None, + )); + NodeId::placeholder_from_expn_id(expn_id) + }) + .collect::<Vec<_>>() + }) + .unwrap_or_default(); + + let (fragment, collected_invocations) = + self.collect_invocations(fragment, &derive_placeholders); + // We choose to expand any derive invocations associated with this macro invocation + // *before* any macro invocations collected from the output fragment + derive_invocations.extend(collected_invocations); + (fragment, derive_invocations) + } + ExpandResult::Retry(invoc) => { + if force { + self.cx.span_bug( + invoc.span(), + "expansion entered force mode but is still stuck", + ); + } else { + // Cannot expand, will retry this invocation later. + undetermined_invocations.push((invoc, Some(ext))); + continue; + } + } + }; + + progress = true; + if expanded_fragments.len() < depth { + expanded_fragments.push(Vec::new()); + } + expanded_fragments[depth - 1].push((expn_id, expanded_fragment)); + invocations.extend(new_invocations.into_iter().rev()); + } + + self.cx.current_expansion = orig_expansion_data; + self.cx.force_mode = orig_force_mode; + + // Finally incorporate all the expanded macros into the input AST fragment. + let mut placeholder_expander = PlaceholderExpander::default(); + while let Some(expanded_fragments) = expanded_fragments.pop() { + for (expn_id, expanded_fragment) in expanded_fragments.into_iter().rev() { + placeholder_expander + .add(NodeId::placeholder_from_expn_id(expn_id), expanded_fragment); + } + } + fragment_with_placeholders.mut_visit_with(&mut placeholder_expander); + fragment_with_placeholders + } + + fn resolve_imports(&mut self) { + if self.monotonic { + self.cx.resolver.resolve_imports(); + } + } + + /// Collects all macro invocations reachable at this time in this AST fragment, and replace + /// them with "placeholders" - dummy macro invocations with specially crafted `NodeId`s. + /// Then call into resolver that builds a skeleton ("reduced graph") of the fragment and + /// prepares data for resolving paths of macro invocations. + fn collect_invocations( + &mut self, + mut fragment: AstFragment, + extra_placeholders: &[NodeId], + ) -> (AstFragment, Vec<(Invocation, Option<Lrc<SyntaxExtension>>)>) { + // Resolve `$crate`s in the fragment for pretty-printing. + self.cx.resolver.resolve_dollar_crates(); + + let mut invocations = { + let mut collector = InvocationCollector { + // Non-derive macro invocations cannot see the results of cfg expansion - they + // will either be removed along with the item, or invoked before the cfg/cfg_attr + // attribute is expanded. Therefore, we don't need to configure the tokens + // Derive macros *can* see the results of cfg-expansion - they are handled + // specially in `fully_expand_fragment` + cx: self.cx, + invocations: Vec::new(), + monotonic: self.monotonic, + }; + fragment.mut_visit_with(&mut collector); + fragment.add_placeholders(extra_placeholders); + collector.invocations + }; + + if self.monotonic { + self.cx + .resolver + .visit_ast_fragment_with_placeholders(self.cx.current_expansion.id, &fragment); + + if self.cx.sess.opts.unstable_opts.incremental_relative_spans { + for (invoc, _) in invocations.iter_mut() { + let expn_id = invoc.expansion_data.id; + let parent_def = self.cx.resolver.invocation_parent(expn_id); + let span = match &mut invoc.kind { + InvocationKind::Bang { ref mut span, .. } => span, + InvocationKind::Attr { attr, .. } => &mut attr.span, + InvocationKind::Derive { path, .. } => &mut path.span, + }; + *span = span.with_parent(Some(parent_def)); + } + } + } + + (fragment, invocations) + } + + fn error_recursion_limit_reached(&mut self) { + let expn_data = self.cx.current_expansion.id.expn_data(); + let suggested_limit = match self.cx.ecfg.recursion_limit { + Limit(0) => Limit(2), + limit => limit * 2, + }; + self.cx + .struct_span_err( + expn_data.call_site, + &format!("recursion limit reached while expanding `{}`", expn_data.kind.descr()), + ) + .help(&format!( + "consider increasing the recursion limit by adding a \ + `#![recursion_limit = \"{}\"]` attribute to your crate (`{}`)", + suggested_limit, self.cx.ecfg.crate_name, + )) + .emit(); + self.cx.trace_macros_diag(); + } + + /// A macro's expansion does not fit in this fragment kind. + /// For example, a non-type macro in a type position. + fn error_wrong_fragment_kind(&mut self, kind: AstFragmentKind, mac: &ast::MacCall, span: Span) { + let msg = format!( + "non-{kind} macro in {kind} position: {path}", + kind = kind.name(), + path = pprust::path_to_string(&mac.path), + ); + self.cx.span_err(span, &msg); + self.cx.trace_macros_diag(); + } + + fn expand_invoc( + &mut self, + invoc: Invocation, + ext: &SyntaxExtensionKind, + ) -> ExpandResult<AstFragment, Invocation> { + let recursion_limit = + self.cx.reduced_recursion_limit.unwrap_or(self.cx.ecfg.recursion_limit); + if !recursion_limit.value_within_limit(self.cx.current_expansion.depth) { + if self.cx.reduced_recursion_limit.is_none() { + self.error_recursion_limit_reached(); + } + + // Reduce the recursion limit by half each time it triggers. + self.cx.reduced_recursion_limit = Some(recursion_limit / 2); + + return ExpandResult::Ready(invoc.fragment_kind.dummy(invoc.span())); + } + + let (fragment_kind, span) = (invoc.fragment_kind, invoc.span()); + ExpandResult::Ready(match invoc.kind { + InvocationKind::Bang { mac, .. } => match ext { + SyntaxExtensionKind::Bang(expander) => { + let Ok(tok_result) = expander.expand(self.cx, span, mac.args.inner_tokens()) else { + return ExpandResult::Ready(fragment_kind.dummy(span)); + }; + self.parse_ast_fragment(tok_result, fragment_kind, &mac.path, span) + } + SyntaxExtensionKind::LegacyBang(expander) => { + let prev = self.cx.current_expansion.prior_type_ascription; + self.cx.current_expansion.prior_type_ascription = mac.prior_type_ascription; + let tok_result = expander.expand(self.cx, span, mac.args.inner_tokens()); + let result = if let Some(result) = fragment_kind.make_from(tok_result) { + result + } else { + self.error_wrong_fragment_kind(fragment_kind, &mac, span); + fragment_kind.dummy(span) + }; + self.cx.current_expansion.prior_type_ascription = prev; + result + } + _ => unreachable!(), + }, + InvocationKind::Attr { attr, pos, mut item, derives } => match ext { + SyntaxExtensionKind::Attr(expander) => { + self.gate_proc_macro_input(&item); + self.gate_proc_macro_attr_item(span, &item); + let tokens = match &item { + // FIXME: Collect tokens and use them instead of generating + // fake ones. These are unstable, so it needs to be + // fixed prior to stabilization + // Fake tokens when we are invoking an inner attribute, and + // we are invoking it on an out-of-line module or crate. + Annotatable::Crate(krate) => rustc_parse::fake_token_stream_for_crate( + &self.cx.sess.parse_sess, + krate, + ), + Annotatable::Item(item_inner) + if matches!(attr.style, AttrStyle::Inner) + && matches!( + item_inner.kind, + ItemKind::Mod( + _, + ModKind::Unloaded | ModKind::Loaded(_, Inline::No, _), + ) + ) => + { + rustc_parse::fake_token_stream_for_item( + &self.cx.sess.parse_sess, + item_inner, + ) + } + _ => item.to_tokens(), + }; + let attr_item = attr.unwrap_normal_item(); + if let MacArgs::Eq(..) = attr_item.args { + self.cx.span_err(span, "key-value macro attributes are not supported"); + } + let inner_tokens = attr_item.args.inner_tokens(); + let Ok(tok_result) = expander.expand(self.cx, span, inner_tokens, tokens) else { + return ExpandResult::Ready(fragment_kind.dummy(span)); + }; + self.parse_ast_fragment(tok_result, fragment_kind, &attr_item.path, span) + } + SyntaxExtensionKind::LegacyAttr(expander) => { + match validate_attr::parse_meta(&self.cx.sess.parse_sess, &attr) { + Ok(meta) => { + let items = match expander.expand(self.cx, span, &meta, item) { + ExpandResult::Ready(items) => items, + ExpandResult::Retry(item) => { + // Reassemble the original invocation for retrying. + return ExpandResult::Retry(Invocation { + kind: InvocationKind::Attr { attr, pos, item, derives }, + ..invoc + }); + } + }; + if fragment_kind == AstFragmentKind::Expr && items.is_empty() { + let msg = + "removing an expression is not supported in this position"; + self.cx.span_err(span, msg); + fragment_kind.dummy(span) + } else { + fragment_kind.expect_from_annotatables(items) + } + } + Err(mut err) => { + err.emit(); + fragment_kind.dummy(span) + } + } + } + SyntaxExtensionKind::NonMacroAttr => { + self.cx.expanded_inert_attrs.mark(&attr); + item.visit_attrs(|attrs| attrs.insert(pos, attr)); + fragment_kind.expect_from_annotatables(iter::once(item)) + } + _ => unreachable!(), + }, + InvocationKind::Derive { path, item } => match ext { + SyntaxExtensionKind::Derive(expander) + | SyntaxExtensionKind::LegacyDerive(expander) => { + if let SyntaxExtensionKind::Derive(..) = ext { + self.gate_proc_macro_input(&item); + } + let meta = ast::MetaItem { kind: MetaItemKind::Word, span, path }; + let items = match expander.expand(self.cx, span, &meta, item) { + ExpandResult::Ready(items) => items, + ExpandResult::Retry(item) => { + // Reassemble the original invocation for retrying. + return ExpandResult::Retry(Invocation { + kind: InvocationKind::Derive { path: meta.path, item }, + ..invoc + }); + } + }; + fragment_kind.expect_from_annotatables(items) + } + _ => unreachable!(), + }, + }) + } + + fn gate_proc_macro_attr_item(&self, span: Span, item: &Annotatable) { + let kind = match item { + Annotatable::Item(_) + | Annotatable::TraitItem(_) + | Annotatable::ImplItem(_) + | Annotatable::ForeignItem(_) + | Annotatable::Crate(..) => return, + Annotatable::Stmt(stmt) => { + // Attributes are stable on item statements, + // but unstable on all other kinds of statements + if stmt.is_item() { + return; + } + "statements" + } + Annotatable::Expr(_) => "expressions", + Annotatable::Arm(..) + | Annotatable::ExprField(..) + | Annotatable::PatField(..) + | Annotatable::GenericParam(..) + | Annotatable::Param(..) + | Annotatable::FieldDef(..) + | Annotatable::Variant(..) => panic!("unexpected annotatable"), + }; + if self.cx.ecfg.proc_macro_hygiene() { + return; + } + feature_err( + &self.cx.sess.parse_sess, + sym::proc_macro_hygiene, + span, + &format!("custom attributes cannot be applied to {}", kind), + ) + .emit(); + } + + fn gate_proc_macro_input(&self, annotatable: &Annotatable) { + struct GateProcMacroInput<'a> { + parse_sess: &'a ParseSess, + } + + impl<'ast, 'a> Visitor<'ast> for GateProcMacroInput<'a> { + fn visit_item(&mut self, item: &'ast ast::Item) { + match &item.kind { + ItemKind::Mod(_, mod_kind) + if !matches!(mod_kind, ModKind::Loaded(_, Inline::Yes, _)) => + { + feature_err( + self.parse_sess, + sym::proc_macro_hygiene, + item.span, + "non-inline modules in proc macro input are unstable", + ) + .emit(); + } + _ => {} + } + + visit::walk_item(self, item); + } + } + + if !self.cx.ecfg.proc_macro_hygiene() { + annotatable + .visit_with(&mut GateProcMacroInput { parse_sess: &self.cx.sess.parse_sess }); + } + } + + fn parse_ast_fragment( + &mut self, + toks: TokenStream, + kind: AstFragmentKind, + path: &ast::Path, + span: Span, + ) -> AstFragment { + let mut parser = self.cx.new_parser_from_tts(toks); + match parse_ast_fragment(&mut parser, kind) { + Ok(fragment) => { + ensure_complete_parse(&mut parser, path, kind.name(), span); + fragment + } + Err(mut err) => { + if err.span.is_dummy() { + err.set_span(span); + } + annotate_err_with_kind(&mut err, kind, span); + err.emit(); + self.cx.trace_macros_diag(); + kind.dummy(span) + } + } + } +} + +pub fn parse_ast_fragment<'a>( + this: &mut Parser<'a>, + kind: AstFragmentKind, +) -> PResult<'a, AstFragment> { + Ok(match kind { + AstFragmentKind::Items => { + let mut items = SmallVec::new(); + while let Some(item) = this.parse_item(ForceCollect::No)? { + items.push(item); + } + AstFragment::Items(items) + } + AstFragmentKind::TraitItems => { + let mut items = SmallVec::new(); + while let Some(item) = this.parse_trait_item(ForceCollect::No)? { + items.extend(item); + } + AstFragment::TraitItems(items) + } + AstFragmentKind::ImplItems => { + let mut items = SmallVec::new(); + while let Some(item) = this.parse_impl_item(ForceCollect::No)? { + items.extend(item); + } + AstFragment::ImplItems(items) + } + AstFragmentKind::ForeignItems => { + let mut items = SmallVec::new(); + while let Some(item) = this.parse_foreign_item(ForceCollect::No)? { + items.extend(item); + } + AstFragment::ForeignItems(items) + } + AstFragmentKind::Stmts => { + let mut stmts = SmallVec::new(); + // Won't make progress on a `}`. + while this.token != token::Eof && this.token != token::CloseDelim(Delimiter::Brace) { + if let Some(stmt) = this.parse_full_stmt(AttemptLocalParseRecovery::Yes)? { + stmts.push(stmt); + } + } + AstFragment::Stmts(stmts) + } + AstFragmentKind::Expr => AstFragment::Expr(this.parse_expr()?), + AstFragmentKind::OptExpr => { + if this.token != token::Eof { + AstFragment::OptExpr(Some(this.parse_expr()?)) + } else { + AstFragment::OptExpr(None) + } + } + AstFragmentKind::Ty => AstFragment::Ty(this.parse_ty()?), + AstFragmentKind::Pat => AstFragment::Pat(this.parse_pat_allow_top_alt( + None, + RecoverComma::No, + RecoverColon::Yes, + CommaRecoveryMode::LikelyTuple, + )?), + AstFragmentKind::Crate => AstFragment::Crate(this.parse_crate_mod()?), + AstFragmentKind::Arms + | AstFragmentKind::ExprFields + | AstFragmentKind::PatFields + | AstFragmentKind::GenericParams + | AstFragmentKind::Params + | AstFragmentKind::FieldDefs + | AstFragmentKind::Variants => panic!("unexpected AST fragment kind"), + }) +} + +pub fn ensure_complete_parse<'a>( + this: &mut Parser<'a>, + macro_path: &ast::Path, + kind_name: &str, + span: Span, +) { + if this.token != token::Eof { + let token = pprust::token_to_string(&this.token); + let msg = format!("macro expansion ignores token `{}` and any following", token); + // Avoid emitting backtrace info twice. + let def_site_span = this.token.span.with_ctxt(SyntaxContext::root()); + let mut err = this.struct_span_err(def_site_span, &msg); + err.span_label(span, "caused by the macro expansion here"); + let msg = format!( + "the usage of `{}!` is likely invalid in {} context", + pprust::path_to_string(macro_path), + kind_name, + ); + err.note(&msg); + let semi_span = this.sess.source_map().next_point(span); + + let semi_full_span = semi_span.to(this.sess.source_map().next_point(semi_span)); + match this.sess.source_map().span_to_snippet(semi_full_span) { + Ok(ref snippet) if &snippet[..] != ";" && kind_name == "expression" => { + err.span_suggestion( + semi_span, + "you might be missing a semicolon here", + ";", + Applicability::MaybeIncorrect, + ); + } + _ => {} + } + err.emit(); + } +} + +/// Wraps a call to `noop_visit_*` / `noop_flat_map_*` +/// for an AST node that supports attributes +/// (see the `Annotatable` enum) +/// This method assigns a `NodeId`, and sets that `NodeId` +/// as our current 'lint node id'. If a macro call is found +/// inside this AST node, we will use this AST node's `NodeId` +/// to emit lints associated with that macro (allowing +/// `#[allow]` / `#[deny]` to be applied close to +/// the macro invocation). +/// +/// Do *not* call this for a macro AST node +/// (e.g. `ExprKind::MacCall`) - we cannot emit lints +/// at these AST nodes, since they are removed and +/// replaced with the result of macro expansion. +/// +/// All other `NodeId`s are assigned by `visit_id`. +/// * `self` is the 'self' parameter for the current method, +/// * `id` is a mutable reference to the `NodeId` field +/// of the current AST node. +/// * `closure` is a closure that executes the +/// `noop_visit_*` / `noop_flat_map_*` method +/// for the current AST node. +macro_rules! assign_id { + ($self:ident, $id:expr, $closure:expr) => {{ + let old_id = $self.cx.current_expansion.lint_node_id; + if $self.monotonic { + debug_assert_eq!(*$id, ast::DUMMY_NODE_ID); + let new_id = $self.cx.resolver.next_node_id(); + *$id = new_id; + $self.cx.current_expansion.lint_node_id = new_id; + } + let ret = ($closure)(); + $self.cx.current_expansion.lint_node_id = old_id; + ret + }}; +} + +enum AddSemicolon { + Yes, + No, +} + +/// A trait implemented for all `AstFragment` nodes and providing all pieces +/// of functionality used by `InvocationCollector`. +trait InvocationCollectorNode: HasAttrs + HasNodeId + Sized { + type OutputTy = SmallVec<[Self; 1]>; + type AttrsTy: Deref<Target = [ast::Attribute]> = Vec<ast::Attribute>; + const KIND: AstFragmentKind; + fn to_annotatable(self) -> Annotatable; + fn fragment_to_output(fragment: AstFragment) -> Self::OutputTy; + fn descr() -> &'static str { + unreachable!() + } + fn noop_flat_map<V: MutVisitor>(self, _visitor: &mut V) -> Self::OutputTy { + unreachable!() + } + fn noop_visit<V: MutVisitor>(&mut self, _visitor: &mut V) { + unreachable!() + } + fn is_mac_call(&self) -> bool { + false + } + fn take_mac_call(self) -> (ast::MacCall, Self::AttrsTy, AddSemicolon) { + unreachable!() + } + fn pre_flat_map_node_collect_attr(_cfg: &StripUnconfigured<'_>, _attr: &ast::Attribute) {} + fn post_flat_map_node_collect_bang(_output: &mut Self::OutputTy, _add_semicolon: AddSemicolon) { + } + fn wrap_flat_map_node_noop_flat_map( + node: Self, + collector: &mut InvocationCollector<'_, '_>, + noop_flat_map: impl FnOnce(Self, &mut InvocationCollector<'_, '_>) -> Self::OutputTy, + ) -> Result<Self::OutputTy, Self> { + Ok(noop_flat_map(node, collector)) + } +} + +impl InvocationCollectorNode for P<ast::Item> { + const KIND: AstFragmentKind = AstFragmentKind::Items; + fn to_annotatable(self) -> Annotatable { + Annotatable::Item(self) + } + fn fragment_to_output(fragment: AstFragment) -> Self::OutputTy { + fragment.make_items() + } + fn noop_flat_map<V: MutVisitor>(self, visitor: &mut V) -> Self::OutputTy { + noop_flat_map_item(self, visitor) + } + fn is_mac_call(&self) -> bool { + matches!(self.kind, ItemKind::MacCall(..)) + } + fn take_mac_call(self) -> (ast::MacCall, Self::AttrsTy, AddSemicolon) { + let node = self.into_inner(); + match node.kind { + ItemKind::MacCall(mac) => (mac, node.attrs, AddSemicolon::No), + _ => unreachable!(), + } + } + fn wrap_flat_map_node_noop_flat_map( + mut node: Self, + collector: &mut InvocationCollector<'_, '_>, + noop_flat_map: impl FnOnce(Self, &mut InvocationCollector<'_, '_>) -> Self::OutputTy, + ) -> Result<Self::OutputTy, Self> { + if !matches!(node.kind, ItemKind::Mod(..)) { + return Ok(noop_flat_map(node, collector)); + } + + // Work around borrow checker not seeing through `P`'s deref. + let (ident, span, mut attrs) = (node.ident, node.span, mem::take(&mut node.attrs)); + let ItemKind::Mod(_, mod_kind) = &mut node.kind else { + unreachable!() + }; + + let ecx = &mut collector.cx; + let (file_path, dir_path, dir_ownership) = match mod_kind { + ModKind::Loaded(_, inline, _) => { + // Inline `mod foo { ... }`, but we still need to push directories. + let (dir_path, dir_ownership) = mod_dir_path( + &ecx.sess, + ident, + &attrs, + &ecx.current_expansion.module, + ecx.current_expansion.dir_ownership, + *inline, + ); + node.attrs = attrs; + (None, dir_path, dir_ownership) + } + ModKind::Unloaded => { + // We have an outline `mod foo;` so we need to parse the file. + let old_attrs_len = attrs.len(); + let ParsedExternalMod { items, spans, file_path, dir_path, dir_ownership } = + parse_external_mod( + &ecx.sess, + ident, + span, + &ecx.current_expansion.module, + ecx.current_expansion.dir_ownership, + &mut attrs, + ); + + if let Some(lint_store) = ecx.lint_store { + lint_store.pre_expansion_lint( + ecx.sess, + ecx.resolver.registered_tools(), + ecx.current_expansion.lint_node_id, + &attrs, + &items, + ident.name.as_str(), + ); + } + + *mod_kind = ModKind::Loaded(items, Inline::No, spans); + node.attrs = attrs; + if node.attrs.len() > old_attrs_len { + // If we loaded an out-of-line module and added some inner attributes, + // then we need to re-configure it and re-collect attributes for + // resolution and expansion. + return Err(node); + } + (Some(file_path), dir_path, dir_ownership) + } + }; + + // Set the module info before we flat map. + let mut module = ecx.current_expansion.module.with_dir_path(dir_path); + module.mod_path.push(ident); + if let Some(file_path) = file_path { + module.file_path_stack.push(file_path); + } + + let orig_module = mem::replace(&mut ecx.current_expansion.module, Rc::new(module)); + let orig_dir_ownership = + mem::replace(&mut ecx.current_expansion.dir_ownership, dir_ownership); + + let res = Ok(noop_flat_map(node, collector)); + + collector.cx.current_expansion.dir_ownership = orig_dir_ownership; + collector.cx.current_expansion.module = orig_module; + res + } +} + +struct TraitItemTag; +impl InvocationCollectorNode for AstNodeWrapper<P<ast::AssocItem>, TraitItemTag> { + type OutputTy = SmallVec<[P<ast::AssocItem>; 1]>; + const KIND: AstFragmentKind = AstFragmentKind::TraitItems; + fn to_annotatable(self) -> Annotatable { + Annotatable::TraitItem(self.wrapped) + } + fn fragment_to_output(fragment: AstFragment) -> Self::OutputTy { + fragment.make_trait_items() + } + fn noop_flat_map<V: MutVisitor>(self, visitor: &mut V) -> Self::OutputTy { + noop_flat_map_assoc_item(self.wrapped, visitor) + } + fn is_mac_call(&self) -> bool { + matches!(self.wrapped.kind, AssocItemKind::MacCall(..)) + } + fn take_mac_call(self) -> (ast::MacCall, Self::AttrsTy, AddSemicolon) { + let item = self.wrapped.into_inner(); + match item.kind { + AssocItemKind::MacCall(mac) => (mac, item.attrs, AddSemicolon::No), + _ => unreachable!(), + } + } +} + +struct ImplItemTag; +impl InvocationCollectorNode for AstNodeWrapper<P<ast::AssocItem>, ImplItemTag> { + type OutputTy = SmallVec<[P<ast::AssocItem>; 1]>; + const KIND: AstFragmentKind = AstFragmentKind::ImplItems; + fn to_annotatable(self) -> Annotatable { + Annotatable::ImplItem(self.wrapped) + } + fn fragment_to_output(fragment: AstFragment) -> Self::OutputTy { + fragment.make_impl_items() + } + fn noop_flat_map<V: MutVisitor>(self, visitor: &mut V) -> Self::OutputTy { + noop_flat_map_assoc_item(self.wrapped, visitor) + } + fn is_mac_call(&self) -> bool { + matches!(self.wrapped.kind, AssocItemKind::MacCall(..)) + } + fn take_mac_call(self) -> (ast::MacCall, Self::AttrsTy, AddSemicolon) { + let item = self.wrapped.into_inner(); + match item.kind { + AssocItemKind::MacCall(mac) => (mac, item.attrs, AddSemicolon::No), + _ => unreachable!(), + } + } +} + +impl InvocationCollectorNode for P<ast::ForeignItem> { + const KIND: AstFragmentKind = AstFragmentKind::ForeignItems; + fn to_annotatable(self) -> Annotatable { + Annotatable::ForeignItem(self) + } + fn fragment_to_output(fragment: AstFragment) -> Self::OutputTy { + fragment.make_foreign_items() + } + fn noop_flat_map<V: MutVisitor>(self, visitor: &mut V) -> Self::OutputTy { + noop_flat_map_foreign_item(self, visitor) + } + fn is_mac_call(&self) -> bool { + matches!(self.kind, ForeignItemKind::MacCall(..)) + } + fn take_mac_call(self) -> (ast::MacCall, Self::AttrsTy, AddSemicolon) { + let node = self.into_inner(); + match node.kind { + ForeignItemKind::MacCall(mac) => (mac, node.attrs, AddSemicolon::No), + _ => unreachable!(), + } + } +} + +impl InvocationCollectorNode for ast::Variant { + const KIND: AstFragmentKind = AstFragmentKind::Variants; + fn to_annotatable(self) -> Annotatable { + Annotatable::Variant(self) + } + fn fragment_to_output(fragment: AstFragment) -> Self::OutputTy { + fragment.make_variants() + } + fn noop_flat_map<V: MutVisitor>(self, visitor: &mut V) -> Self::OutputTy { + noop_flat_map_variant(self, visitor) + } +} + +impl InvocationCollectorNode for ast::FieldDef { + const KIND: AstFragmentKind = AstFragmentKind::FieldDefs; + fn to_annotatable(self) -> Annotatable { + Annotatable::FieldDef(self) + } + fn fragment_to_output(fragment: AstFragment) -> Self::OutputTy { + fragment.make_field_defs() + } + fn noop_flat_map<V: MutVisitor>(self, visitor: &mut V) -> Self::OutputTy { + noop_flat_map_field_def(self, visitor) + } +} + +impl InvocationCollectorNode for ast::PatField { + const KIND: AstFragmentKind = AstFragmentKind::PatFields; + fn to_annotatable(self) -> Annotatable { + Annotatable::PatField(self) + } + fn fragment_to_output(fragment: AstFragment) -> Self::OutputTy { + fragment.make_pat_fields() + } + fn noop_flat_map<V: MutVisitor>(self, visitor: &mut V) -> Self::OutputTy { + noop_flat_map_pat_field(self, visitor) + } +} + +impl InvocationCollectorNode for ast::ExprField { + const KIND: AstFragmentKind = AstFragmentKind::ExprFields; + fn to_annotatable(self) -> Annotatable { + Annotatable::ExprField(self) + } + fn fragment_to_output(fragment: AstFragment) -> Self::OutputTy { + fragment.make_expr_fields() + } + fn noop_flat_map<V: MutVisitor>(self, visitor: &mut V) -> Self::OutputTy { + noop_flat_map_expr_field(self, visitor) + } +} + +impl InvocationCollectorNode for ast::Param { + const KIND: AstFragmentKind = AstFragmentKind::Params; + fn to_annotatable(self) -> Annotatable { + Annotatable::Param(self) + } + fn fragment_to_output(fragment: AstFragment) -> Self::OutputTy { + fragment.make_params() + } + fn noop_flat_map<V: MutVisitor>(self, visitor: &mut V) -> Self::OutputTy { + noop_flat_map_param(self, visitor) + } +} + +impl InvocationCollectorNode for ast::GenericParam { + const KIND: AstFragmentKind = AstFragmentKind::GenericParams; + fn to_annotatable(self) -> Annotatable { + Annotatable::GenericParam(self) + } + fn fragment_to_output(fragment: AstFragment) -> Self::OutputTy { + fragment.make_generic_params() + } + fn noop_flat_map<V: MutVisitor>(self, visitor: &mut V) -> Self::OutputTy { + noop_flat_map_generic_param(self, visitor) + } +} + +impl InvocationCollectorNode for ast::Arm { + const KIND: AstFragmentKind = AstFragmentKind::Arms; + fn to_annotatable(self) -> Annotatable { + Annotatable::Arm(self) + } + fn fragment_to_output(fragment: AstFragment) -> Self::OutputTy { + fragment.make_arms() + } + fn noop_flat_map<V: MutVisitor>(self, visitor: &mut V) -> Self::OutputTy { + noop_flat_map_arm(self, visitor) + } +} + +impl InvocationCollectorNode for ast::Stmt { + type AttrsTy = ast::AttrVec; + const KIND: AstFragmentKind = AstFragmentKind::Stmts; + fn to_annotatable(self) -> Annotatable { + Annotatable::Stmt(P(self)) + } + fn fragment_to_output(fragment: AstFragment) -> Self::OutputTy { + fragment.make_stmts() + } + fn noop_flat_map<V: MutVisitor>(self, visitor: &mut V) -> Self::OutputTy { + noop_flat_map_stmt(self, visitor) + } + fn is_mac_call(&self) -> bool { + match &self.kind { + StmtKind::MacCall(..) => true, + StmtKind::Item(item) => matches!(item.kind, ItemKind::MacCall(..)), + StmtKind::Semi(expr) => matches!(expr.kind, ExprKind::MacCall(..)), + StmtKind::Expr(..) => unreachable!(), + StmtKind::Local(..) | StmtKind::Empty => false, + } + } + fn take_mac_call(self) -> (ast::MacCall, Self::AttrsTy, AddSemicolon) { + // We pull macro invocations (both attributes and fn-like macro calls) out of their + // `StmtKind`s and treat them as statement macro invocations, not as items or expressions. + let (add_semicolon, mac, attrs) = match self.kind { + StmtKind::MacCall(mac) => { + let ast::MacCallStmt { mac, style, attrs, .. } = mac.into_inner(); + (style == MacStmtStyle::Semicolon, mac, attrs) + } + StmtKind::Item(item) => match item.into_inner() { + ast::Item { kind: ItemKind::MacCall(mac), attrs, .. } => { + (mac.args.need_semicolon(), mac, attrs.into()) + } + _ => unreachable!(), + }, + StmtKind::Semi(expr) => match expr.into_inner() { + ast::Expr { kind: ExprKind::MacCall(mac), attrs, .. } => { + (mac.args.need_semicolon(), mac, attrs) + } + _ => unreachable!(), + }, + _ => unreachable!(), + }; + (mac, attrs, if add_semicolon { AddSemicolon::Yes } else { AddSemicolon::No }) + } + fn post_flat_map_node_collect_bang(stmts: &mut Self::OutputTy, add_semicolon: AddSemicolon) { + // If this is a macro invocation with a semicolon, then apply that + // semicolon to the final statement produced by expansion. + if matches!(add_semicolon, AddSemicolon::Yes) { + if let Some(stmt) = stmts.pop() { + stmts.push(stmt.add_trailing_semicolon()); + } + } + } +} + +impl InvocationCollectorNode for ast::Crate { + type OutputTy = ast::Crate; + const KIND: AstFragmentKind = AstFragmentKind::Crate; + fn to_annotatable(self) -> Annotatable { + Annotatable::Crate(self) + } + fn fragment_to_output(fragment: AstFragment) -> Self::OutputTy { + fragment.make_crate() + } + fn noop_visit<V: MutVisitor>(&mut self, visitor: &mut V) { + noop_visit_crate(self, visitor) + } +} + +impl InvocationCollectorNode for P<ast::Ty> { + type OutputTy = P<ast::Ty>; + const KIND: AstFragmentKind = AstFragmentKind::Ty; + fn to_annotatable(self) -> Annotatable { + unreachable!() + } + fn fragment_to_output(fragment: AstFragment) -> Self::OutputTy { + fragment.make_ty() + } + fn noop_visit<V: MutVisitor>(&mut self, visitor: &mut V) { + noop_visit_ty(self, visitor) + } + fn is_mac_call(&self) -> bool { + matches!(self.kind, ast::TyKind::MacCall(..)) + } + fn take_mac_call(self) -> (ast::MacCall, Self::AttrsTy, AddSemicolon) { + let node = self.into_inner(); + match node.kind { + TyKind::MacCall(mac) => (mac, Vec::new(), AddSemicolon::No), + _ => unreachable!(), + } + } +} + +impl InvocationCollectorNode for P<ast::Pat> { + type OutputTy = P<ast::Pat>; + const KIND: AstFragmentKind = AstFragmentKind::Pat; + fn to_annotatable(self) -> Annotatable { + unreachable!() + } + fn fragment_to_output(fragment: AstFragment) -> Self::OutputTy { + fragment.make_pat() + } + fn noop_visit<V: MutVisitor>(&mut self, visitor: &mut V) { + noop_visit_pat(self, visitor) + } + fn is_mac_call(&self) -> bool { + matches!(self.kind, PatKind::MacCall(..)) + } + fn take_mac_call(self) -> (ast::MacCall, Self::AttrsTy, AddSemicolon) { + let node = self.into_inner(); + match node.kind { + PatKind::MacCall(mac) => (mac, Vec::new(), AddSemicolon::No), + _ => unreachable!(), + } + } +} + +impl InvocationCollectorNode for P<ast::Expr> { + type OutputTy = P<ast::Expr>; + type AttrsTy = ast::AttrVec; + const KIND: AstFragmentKind = AstFragmentKind::Expr; + fn to_annotatable(self) -> Annotatable { + Annotatable::Expr(self) + } + fn fragment_to_output(fragment: AstFragment) -> Self::OutputTy { + fragment.make_expr() + } + fn descr() -> &'static str { + "an expression" + } + fn noop_visit<V: MutVisitor>(&mut self, visitor: &mut V) { + noop_visit_expr(self, visitor) + } + fn is_mac_call(&self) -> bool { + matches!(self.kind, ExprKind::MacCall(..)) + } + fn take_mac_call(self) -> (ast::MacCall, Self::AttrsTy, AddSemicolon) { + let node = self.into_inner(); + match node.kind { + ExprKind::MacCall(mac) => (mac, node.attrs, AddSemicolon::No), + _ => unreachable!(), + } + } +} + +struct OptExprTag; +impl InvocationCollectorNode for AstNodeWrapper<P<ast::Expr>, OptExprTag> { + type OutputTy = Option<P<ast::Expr>>; + type AttrsTy = ast::AttrVec; + const KIND: AstFragmentKind = AstFragmentKind::OptExpr; + fn to_annotatable(self) -> Annotatable { + Annotatable::Expr(self.wrapped) + } + fn fragment_to_output(fragment: AstFragment) -> Self::OutputTy { + fragment.make_opt_expr() + } + fn noop_flat_map<V: MutVisitor>(mut self, visitor: &mut V) -> Self::OutputTy { + noop_visit_expr(&mut self.wrapped, visitor); + Some(self.wrapped) + } + fn is_mac_call(&self) -> bool { + matches!(self.wrapped.kind, ast::ExprKind::MacCall(..)) + } + fn take_mac_call(self) -> (ast::MacCall, Self::AttrsTy, AddSemicolon) { + let node = self.wrapped.into_inner(); + match node.kind { + ExprKind::MacCall(mac) => (mac, node.attrs, AddSemicolon::No), + _ => unreachable!(), + } + } + fn pre_flat_map_node_collect_attr(cfg: &StripUnconfigured<'_>, attr: &ast::Attribute) { + cfg.maybe_emit_expr_attr_err(&attr); + } +} + +struct InvocationCollector<'a, 'b> { + cx: &'a mut ExtCtxt<'b>, + invocations: Vec<(Invocation, Option<Lrc<SyntaxExtension>>)>, + monotonic: bool, +} + +impl<'a, 'b> InvocationCollector<'a, 'b> { + fn cfg(&self) -> StripUnconfigured<'_> { + StripUnconfigured { + sess: &self.cx.sess, + features: self.cx.ecfg.features, + config_tokens: false, + lint_node_id: self.cx.current_expansion.lint_node_id, + } + } + + fn collect(&mut self, fragment_kind: AstFragmentKind, kind: InvocationKind) -> AstFragment { + let expn_id = LocalExpnId::fresh_empty(); + let vis = kind.placeholder_visibility(); + self.invocations.push(( + Invocation { + kind, + fragment_kind, + expansion_data: ExpansionData { + id: expn_id, + depth: self.cx.current_expansion.depth + 1, + ..self.cx.current_expansion.clone() + }, + }, + None, + )); + placeholder(fragment_kind, NodeId::placeholder_from_expn_id(expn_id), vis) + } + + fn collect_bang(&mut self, mac: ast::MacCall, kind: AstFragmentKind) -> AstFragment { + // cache the macro call span so that it can be + // easily adjusted for incremental compilation + let span = mac.span(); + self.collect(kind, InvocationKind::Bang { mac, span }) + } + + fn collect_attr( + &mut self, + (attr, pos, derives): (ast::Attribute, usize, Vec<ast::Path>), + item: Annotatable, + kind: AstFragmentKind, + ) -> AstFragment { + self.collect(kind, InvocationKind::Attr { attr, pos, item, derives }) + } + + /// If `item` is an attribute invocation, remove the attribute and return it together with + /// its position and derives following it. We have to collect the derives in order to resolve + /// legacy derive helpers (helpers written before derives that introduce them). + fn take_first_attr( + &self, + item: &mut impl HasAttrs, + ) -> Option<(ast::Attribute, usize, Vec<ast::Path>)> { + let mut attr = None; + + let mut cfg_pos = None; + let mut attr_pos = None; + for (pos, attr) in item.attrs().iter().enumerate() { + if !attr.is_doc_comment() && !self.cx.expanded_inert_attrs.is_marked(attr) { + let name = attr.ident().map(|ident| ident.name); + if name == Some(sym::cfg) || name == Some(sym::cfg_attr) { + cfg_pos = Some(pos); // a cfg attr found, no need to search anymore + break; + } else if attr_pos.is_none() + && !name.map_or(false, rustc_feature::is_builtin_attr_name) + { + attr_pos = Some(pos); // a non-cfg attr found, still may find a cfg attr + } + } + } + + item.visit_attrs(|attrs| { + attr = Some(match (cfg_pos, attr_pos) { + (Some(pos), _) => (attrs.remove(pos), pos, Vec::new()), + (_, Some(pos)) => { + let attr = attrs.remove(pos); + let following_derives = attrs[pos..] + .iter() + .filter(|a| a.has_name(sym::derive)) + .flat_map(|a| a.meta_item_list().unwrap_or_default()) + .filter_map(|nested_meta| match nested_meta { + NestedMetaItem::MetaItem(ast::MetaItem { + kind: MetaItemKind::Word, + path, + .. + }) => Some(path), + _ => None, + }) + .collect(); + + (attr, pos, following_derives) + } + _ => return, + }); + }); + + attr + } + + // Detect use of feature-gated or invalid attributes on macro invocations + // since they will not be detected after macro expansion. + fn check_attributes(&self, attrs: &[ast::Attribute], call: &ast::MacCall) { + let features = self.cx.ecfg.features.unwrap(); + let mut attrs = attrs.iter().peekable(); + let mut span: Option<Span> = None; + while let Some(attr) = attrs.next() { + rustc_ast_passes::feature_gate::check_attribute(attr, self.cx.sess, features); + validate_attr::check_meta(&self.cx.sess.parse_sess, attr); + + let current_span = if let Some(sp) = span { sp.to(attr.span) } else { attr.span }; + span = Some(current_span); + + if attrs.peek().map_or(false, |next_attr| next_attr.doc_str().is_some()) { + continue; + } + + if attr.is_doc_comment() { + self.cx.sess.parse_sess.buffer_lint_with_diagnostic( + &UNUSED_DOC_COMMENTS, + current_span, + self.cx.current_expansion.lint_node_id, + "unused doc comment", + BuiltinLintDiagnostics::UnusedDocComment(attr.span), + ); + } else if rustc_attr::is_builtin_attr(attr) { + let attr_name = attr.ident().unwrap().name; + // `#[cfg]` and `#[cfg_attr]` are special - they are + // eagerly evaluated. + if attr_name != sym::cfg && attr_name != sym::cfg_attr { + self.cx.sess.parse_sess.buffer_lint_with_diagnostic( + &UNUSED_ATTRIBUTES, + attr.span, + self.cx.current_expansion.lint_node_id, + &format!("unused attribute `{}`", attr_name), + BuiltinLintDiagnostics::UnusedBuiltinAttribute { + attr_name, + macro_name: pprust::path_to_string(&call.path), + invoc_span: call.path.span, + }, + ); + } + } + } + } + + fn expand_cfg_true( + &mut self, + node: &mut impl HasAttrs, + attr: ast::Attribute, + pos: usize, + ) -> bool { + let res = self.cfg().cfg_true(&attr); + if res { + // FIXME: `cfg(TRUE)` attributes do not currently remove themselves during expansion, + // and some tools like rustdoc and clippy rely on that. Find a way to remove them + // while keeping the tools working. + self.cx.expanded_inert_attrs.mark(&attr); + node.visit_attrs(|attrs| attrs.insert(pos, attr)); + } + res + } + + fn expand_cfg_attr(&self, node: &mut impl HasAttrs, attr: ast::Attribute, pos: usize) { + node.visit_attrs(|attrs| { + attrs.splice(pos..pos, self.cfg().expand_cfg_attr(attr, false)); + }); + } + + fn flat_map_node<Node: InvocationCollectorNode<OutputTy: Default>>( + &mut self, + mut node: Node, + ) -> Node::OutputTy { + loop { + return match self.take_first_attr(&mut node) { + Some((attr, pos, derives)) => match attr.name_or_empty() { + sym::cfg => { + if self.expand_cfg_true(&mut node, attr, pos) { + continue; + } + Default::default() + } + sym::cfg_attr => { + self.expand_cfg_attr(&mut node, attr, pos); + continue; + } + _ => { + Node::pre_flat_map_node_collect_attr(&self.cfg(), &attr); + self.collect_attr((attr, pos, derives), node.to_annotatable(), Node::KIND) + .make_ast::<Node>() + } + }, + None if node.is_mac_call() => { + let (mac, attrs, add_semicolon) = node.take_mac_call(); + self.check_attributes(&attrs, &mac); + let mut res = self.collect_bang(mac, Node::KIND).make_ast::<Node>(); + Node::post_flat_map_node_collect_bang(&mut res, add_semicolon); + res + } + None => { + match Node::wrap_flat_map_node_noop_flat_map(node, self, |mut node, this| { + assign_id!(this, node.node_id_mut(), || node.noop_flat_map(this)) + }) { + Ok(output) => output, + Err(returned_node) => { + node = returned_node; + continue; + } + } + } + }; + } + } + + fn visit_node<Node: InvocationCollectorNode<OutputTy = Node> + DummyAstNode>( + &mut self, + node: &mut Node, + ) { + loop { + return match self.take_first_attr(node) { + Some((attr, pos, derives)) => match attr.name_or_empty() { + sym::cfg => { + let span = attr.span; + if self.expand_cfg_true(node, attr, pos) { + continue; + } + let msg = + format!("removing {} is not supported in this position", Node::descr()); + self.cx.span_err(span, &msg); + continue; + } + sym::cfg_attr => { + self.expand_cfg_attr(node, attr, pos); + continue; + } + _ => visit_clobber(node, |node| { + self.collect_attr((attr, pos, derives), node.to_annotatable(), Node::KIND) + .make_ast::<Node>() + }), + }, + None if node.is_mac_call() => { + visit_clobber(node, |node| { + // Do not clobber unless it's actually a macro (uncommon case). + let (mac, attrs, _) = node.take_mac_call(); + self.check_attributes(&attrs, &mac); + self.collect_bang(mac, Node::KIND).make_ast::<Node>() + }) + } + None => { + assign_id!(self, node.node_id_mut(), || node.noop_visit(self)) + } + }; + } + } +} + +impl<'a, 'b> MutVisitor for InvocationCollector<'a, 'b> { + fn flat_map_item(&mut self, node: P<ast::Item>) -> SmallVec<[P<ast::Item>; 1]> { + self.flat_map_node(node) + } + + fn flat_map_trait_item(&mut self, node: P<ast::AssocItem>) -> SmallVec<[P<ast::AssocItem>; 1]> { + self.flat_map_node(AstNodeWrapper::new(node, TraitItemTag)) + } + + fn flat_map_impl_item(&mut self, node: P<ast::AssocItem>) -> SmallVec<[P<ast::AssocItem>; 1]> { + self.flat_map_node(AstNodeWrapper::new(node, ImplItemTag)) + } + + fn flat_map_foreign_item( + &mut self, + node: P<ast::ForeignItem>, + ) -> SmallVec<[P<ast::ForeignItem>; 1]> { + self.flat_map_node(node) + } + + fn flat_map_variant(&mut self, node: ast::Variant) -> SmallVec<[ast::Variant; 1]> { + self.flat_map_node(node) + } + + fn flat_map_field_def(&mut self, node: ast::FieldDef) -> SmallVec<[ast::FieldDef; 1]> { + self.flat_map_node(node) + } + + fn flat_map_pat_field(&mut self, node: ast::PatField) -> SmallVec<[ast::PatField; 1]> { + self.flat_map_node(node) + } + + fn flat_map_expr_field(&mut self, node: ast::ExprField) -> SmallVec<[ast::ExprField; 1]> { + self.flat_map_node(node) + } + + fn flat_map_param(&mut self, node: ast::Param) -> SmallVec<[ast::Param; 1]> { + self.flat_map_node(node) + } + + fn flat_map_generic_param( + &mut self, + node: ast::GenericParam, + ) -> SmallVec<[ast::GenericParam; 1]> { + self.flat_map_node(node) + } + + fn flat_map_arm(&mut self, node: ast::Arm) -> SmallVec<[ast::Arm; 1]> { + self.flat_map_node(node) + } + + fn flat_map_stmt(&mut self, node: ast::Stmt) -> SmallVec<[ast::Stmt; 1]> { + // FIXME: invocations in semicolon-less expressions positions are expanded as expressions, + // changing that requires some compatibility measures. + if node.is_expr() { + // The only way that we can end up with a `MacCall` expression statement, + // (as opposed to a `StmtKind::MacCall`) is if we have a macro as the + // trailing expression in a block (e.g. `fn foo() { my_macro!() }`). + // Record this information, so that we can report a more specific + // `SEMICOLON_IN_EXPRESSIONS_FROM_MACROS` lint if needed. + // See #78991 for an investigation of treating macros in this position + // as statements, rather than expressions, during parsing. + return match &node.kind { + StmtKind::Expr(expr) + if matches!(**expr, ast::Expr { kind: ExprKind::MacCall(..), .. }) => + { + self.cx.current_expansion.is_trailing_mac = true; + // Don't use `assign_id` for this statement - it may get removed + // entirely due to a `#[cfg]` on the contained expression + let res = noop_flat_map_stmt(node, self); + self.cx.current_expansion.is_trailing_mac = false; + res + } + _ => noop_flat_map_stmt(node, self), + }; + } + + self.flat_map_node(node) + } + + fn visit_crate(&mut self, node: &mut ast::Crate) { + self.visit_node(node) + } + + fn visit_ty(&mut self, node: &mut P<ast::Ty>) { + self.visit_node(node) + } + + fn visit_pat(&mut self, node: &mut P<ast::Pat>) { + self.visit_node(node) + } + + fn visit_expr(&mut self, node: &mut P<ast::Expr>) { + // FIXME: Feature gating is performed inconsistently between `Expr` and `OptExpr`. + if let Some(attr) = node.attrs.first() { + self.cfg().maybe_emit_expr_attr_err(attr); + } + self.visit_node(node) + } + + fn filter_map_expr(&mut self, node: P<ast::Expr>) -> Option<P<ast::Expr>> { + self.flat_map_node(AstNodeWrapper::new(node, OptExprTag)) + } + + fn visit_block(&mut self, node: &mut P<ast::Block>) { + let orig_dir_ownership = mem::replace( + &mut self.cx.current_expansion.dir_ownership, + DirOwnership::UnownedViaBlock, + ); + noop_visit_block(node, self); + self.cx.current_expansion.dir_ownership = orig_dir_ownership; + } + + fn visit_id(&mut self, id: &mut NodeId) { + // We may have already assigned a `NodeId` + // by calling `assign_id` + if self.monotonic && *id == ast::DUMMY_NODE_ID { + *id = self.cx.resolver.next_node_id(); + } + } +} + +pub struct ExpansionConfig<'feat> { + pub crate_name: String, + pub features: Option<&'feat Features>, + pub recursion_limit: Limit, + pub trace_mac: bool, + pub should_test: bool, // If false, strip `#[test]` nodes + pub span_debug: bool, // If true, use verbose debugging for `proc_macro::Span` + pub proc_macro_backtrace: bool, // If true, show backtraces for proc-macro panics +} + +impl<'feat> ExpansionConfig<'feat> { + pub fn default(crate_name: String) -> ExpansionConfig<'static> { + ExpansionConfig { + crate_name, + features: None, + recursion_limit: Limit::new(1024), + trace_mac: false, + should_test: false, + span_debug: false, + proc_macro_backtrace: false, + } + } + + fn proc_macro_hygiene(&self) -> bool { + self.features.map_or(false, |features| features.proc_macro_hygiene) + } +} diff --git a/compiler/rustc_expand/src/lib.rs b/compiler/rustc_expand/src/lib.rs new file mode 100644 index 000000000..9d0232822 --- /dev/null +++ b/compiler/rustc_expand/src/lib.rs @@ -0,0 +1,53 @@ +#![feature(array_windows)] +#![feature(associated_type_bounds)] +#![feature(associated_type_defaults)] +#![feature(if_let_guard)] +#![feature(let_chains)] +#![feature(let_else)] +#![feature(macro_metavar_expr)] +#![feature(proc_macro_diagnostic)] +#![feature(proc_macro_internals)] +#![feature(proc_macro_span)] +#![feature(rustc_attrs)] +#![feature(try_blocks)] +#![recursion_limit = "256"] + +#[macro_use] +extern crate rustc_macros; + +extern crate proc_macro as pm; + +mod placeholders; +mod proc_macro_server; + +pub use mbe::macro_rules::compile_declarative_macro; +pub(crate) use rustc_span::hygiene; +pub mod base; +pub mod build; +#[macro_use] +pub mod config; +pub mod expand; +pub mod module; +pub mod proc_macro; + +pub(crate) mod mbe; + +// HACK(Centril, #64197): These shouldn't really be here. +// Rather, they should be with their respective modules which are defined in other crates. +// However, since for now constructing a `ParseSess` sorta requires `config` from this crate, +// these tests will need to live here in the interim. + +#[cfg(test)] +mod tests; +#[cfg(test)] +mod parse { + mod tests; +} +#[cfg(test)] +mod tokenstream { + mod tests; +} +#[cfg(test)] +mod mut_visit { + mod tests; +} diff --git a/compiler/rustc_expand/src/mbe.rs b/compiler/rustc_expand/src/mbe.rs new file mode 100644 index 000000000..f42576b16 --- /dev/null +++ b/compiler/rustc_expand/src/mbe.rs @@ -0,0 +1,110 @@ +//! This module implements declarative macros: old `macro_rules` and the newer +//! `macro`. Declarative macros are also known as "macro by example", and that's +//! why we call this module `mbe`. For external documentation, prefer the +//! official terminology: "declarative macros". + +pub(crate) mod macro_check; +pub(crate) mod macro_parser; +pub(crate) mod macro_rules; +pub(crate) mod metavar_expr; +pub(crate) mod quoted; +pub(crate) mod transcribe; + +use metavar_expr::MetaVarExpr; +use rustc_ast::token::{Delimiter, NonterminalKind, Token, TokenKind}; +use rustc_ast::tokenstream::DelimSpan; +use rustc_span::symbol::Ident; +use rustc_span::Span; + +/// Contains the sub-token-trees of a "delimited" token tree such as `(a b c)`. +/// The delimiters are not represented explicitly in the `tts` vector. +#[derive(PartialEq, Encodable, Decodable, Debug)] +struct Delimited { + delim: Delimiter, + /// FIXME: #67062 has details about why this is sub-optimal. + tts: Vec<TokenTree>, +} + +#[derive(PartialEq, Encodable, Decodable, Debug)] +struct SequenceRepetition { + /// The sequence of token trees + tts: Vec<TokenTree>, + /// The optional separator + separator: Option<Token>, + /// Whether the sequence can be repeated zero (*), or one or more times (+) + kleene: KleeneToken, + /// The number of `Match`s that appear in the sequence (and subsequences) + num_captures: usize, +} + +#[derive(Clone, PartialEq, Encodable, Decodable, Debug, Copy)] +struct KleeneToken { + span: Span, + op: KleeneOp, +} + +impl KleeneToken { + fn new(op: KleeneOp, span: Span) -> KleeneToken { + KleeneToken { span, op } + } +} + +/// A Kleene-style [repetition operator](https://en.wikipedia.org/wiki/Kleene_star) +/// for token sequences. +#[derive(Clone, PartialEq, Encodable, Decodable, Debug, Copy)] +enum KleeneOp { + /// Kleene star (`*`) for zero or more repetitions + ZeroOrMore, + /// Kleene plus (`+`) for one or more repetitions + OneOrMore, + /// Kleene optional (`?`) for zero or one repetitions + ZeroOrOne, +} + +/// Similar to `tokenstream::TokenTree`, except that `Sequence`, `MetaVar`, `MetaVarDecl`, and +/// `MetaVarExpr` are "first-class" token trees. Useful for parsing macros. +#[derive(Debug, PartialEq, Encodable, Decodable)] +enum TokenTree { + Token(Token), + /// A delimited sequence, e.g. `($e:expr)` (RHS) or `{ $e }` (LHS). + Delimited(DelimSpan, Delimited), + /// A kleene-style repetition sequence, e.g. `$($e:expr)*` (RHS) or `$($e),*` (LHS). + Sequence(DelimSpan, SequenceRepetition), + /// e.g., `$var`. + MetaVar(Span, Ident), + /// e.g., `$var:expr`. Only appears on the LHS. + MetaVarDecl(Span, Ident /* name to bind */, Option<NonterminalKind>), + /// A meta-variable expression inside `${...}`. + MetaVarExpr(DelimSpan, MetaVarExpr), +} + +impl TokenTree { + /// Returns `true` if the given token tree is delimited. + fn is_delimited(&self) -> bool { + matches!(*self, TokenTree::Delimited(..)) + } + + /// Returns `true` if the given token tree is a token of the given kind. + fn is_token(&self, expected_kind: &TokenKind) -> bool { + match self { + TokenTree::Token(Token { kind: actual_kind, .. }) => actual_kind == expected_kind, + _ => false, + } + } + + /// Retrieves the `TokenTree`'s span. + fn span(&self) -> Span { + match *self { + TokenTree::Token(Token { span, .. }) + | TokenTree::MetaVar(span, _) + | TokenTree::MetaVarDecl(span, _, _) => span, + TokenTree::Delimited(span, _) + | TokenTree::MetaVarExpr(span, _) + | TokenTree::Sequence(span, _) => span.entire(), + } + } + + fn token(kind: TokenKind, span: Span) -> TokenTree { + TokenTree::Token(Token::new(kind, span)) + } +} diff --git a/compiler/rustc_expand/src/mbe/macro_check.rs b/compiler/rustc_expand/src/mbe/macro_check.rs new file mode 100644 index 000000000..8994a2f78 --- /dev/null +++ b/compiler/rustc_expand/src/mbe/macro_check.rs @@ -0,0 +1,652 @@ +//! Checks that meta-variables in macro definition are correctly declared and used. +//! +//! # What is checked +//! +//! ## Meta-variables must not be bound twice +//! +//! ```compile_fail +//! macro_rules! foo { ($x:tt $x:tt) => { $x }; } +//! ``` +//! +//! This check is sound (no false-negative) and complete (no false-positive). +//! +//! ## Meta-variables must not be free +//! +//! ``` +//! macro_rules! foo { () => { $x }; } +//! ``` +//! +//! This check is also done at macro instantiation but only if the branch is taken. +//! +//! ## Meta-variables must repeat at least as many times as their binder +//! +//! ``` +//! macro_rules! foo { ($($x:tt)*) => { $x }; } +//! ``` +//! +//! This check is also done at macro instantiation but only if the branch is taken. +//! +//! ## Meta-variables must repeat with the same Kleene operators as their binder +//! +//! ``` +//! macro_rules! foo { ($($x:tt)+) => { $($x)* }; } +//! ``` +//! +//! This check is not done at macro instantiation. +//! +//! # Disclaimer +//! +//! In the presence of nested macros (a macro defined in a macro), those checks may have false +//! positives and false negatives. We try to detect those cases by recognizing potential macro +//! definitions in RHSes, but nested macros may be hidden through the use of particular values of +//! meta-variables. +//! +//! ## Examples of false positive +//! +//! False positives can come from cases where we don't recognize a nested macro, because it depends +//! on particular values of meta-variables. In the following example, we think both instances of +//! `$x` are free, which is a correct statement if `$name` is anything but `macro_rules`. But when +//! `$name` is `macro_rules`, like in the instantiation below, then `$x:tt` is actually a binder of +//! the nested macro and `$x` is bound to it. +//! +//! ``` +//! macro_rules! foo { ($name:ident) => { $name! bar { ($x:tt) => { $x }; } }; } +//! foo!(macro_rules); +//! ``` +//! +//! False positives can also come from cases where we think there is a nested macro while there +//! isn't. In the following example, we think `$x` is free, which is incorrect because `bar` is not +//! a nested macro since it is not evaluated as code by `stringify!`. +//! +//! ``` +//! macro_rules! foo { () => { stringify!(macro_rules! bar { () => { $x }; }) }; } +//! ``` +//! +//! ## Examples of false negative +//! +//! False negatives can come from cases where we don't recognize a meta-variable, because it depends +//! on particular values of meta-variables. In the following examples, we don't see that if `$d` is +//! instantiated with `$` then `$d z` becomes `$z` in the nested macro definition and is thus a free +//! meta-variable. Note however, that if `foo` is instantiated, then we would check the definition +//! of `bar` and would see the issue. +//! +//! ``` +//! macro_rules! foo { ($d:tt) => { macro_rules! bar { ($y:tt) => { $d z }; } }; } +//! ``` +//! +//! # How it is checked +//! +//! There are 3 main functions: `check_binders`, `check_occurrences`, and `check_nested_macro`. They +//! all need some kind of environment. +//! +//! ## Environments +//! +//! Environments are used to pass information. +//! +//! ### From LHS to RHS +//! +//! When checking a LHS with `check_binders`, we produce (and use) an environment for binders, +//! namely `Binders`. This is a mapping from binder name to information about that binder: the span +//! of the binder for error messages and the stack of Kleene operators under which it was bound in +//! the LHS. +//! +//! This environment is used by both the LHS and RHS. The LHS uses it to detect duplicate binders. +//! The RHS uses it to detect the other errors. +//! +//! ### From outer macro to inner macro +//! +//! When checking the RHS of an outer macro and we detect a nested macro definition, we push the +//! current state, namely `MacroState`, to an environment of nested macro definitions. Each state +//! stores the LHS binders when entering the macro definition as well as the stack of Kleene +//! operators under which the inner macro is defined in the RHS. +//! +//! This environment is a stack representing the nesting of macro definitions. As such, the stack of +//! Kleene operators under which a meta-variable is repeating is the concatenation of the stacks +//! stored when entering a macro definition starting from the state in which the meta-variable is +//! bound. +use crate::mbe::{KleeneToken, TokenTree}; + +use rustc_ast::token::{Delimiter, Token, TokenKind}; +use rustc_ast::{NodeId, DUMMY_NODE_ID}; +use rustc_data_structures::fx::FxHashMap; +use rustc_errors::MultiSpan; +use rustc_session::lint::builtin::{META_VARIABLE_MISUSE, MISSING_FRAGMENT_SPECIFIER}; +use rustc_session::parse::ParseSess; +use rustc_span::symbol::kw; +use rustc_span::{symbol::MacroRulesNormalizedIdent, Span}; + +use smallvec::SmallVec; + +use std::iter; + +/// Stack represented as linked list. +/// +/// Those are used for environments because they grow incrementally and are not mutable. +enum Stack<'a, T> { + /// Empty stack. + Empty, + /// A non-empty stack. + Push { + /// The top element. + top: T, + /// The previous elements. + prev: &'a Stack<'a, T>, + }, +} + +impl<'a, T> Stack<'a, T> { + /// Returns whether a stack is empty. + fn is_empty(&self) -> bool { + matches!(*self, Stack::Empty) + } + + /// Returns a new stack with an element of top. + fn push(&'a self, top: T) -> Stack<'a, T> { + Stack::Push { top, prev: self } + } +} + +impl<'a, T> Iterator for &'a Stack<'a, T> { + type Item = &'a T; + + // Iterates from top to bottom of the stack. + fn next(&mut self) -> Option<&'a T> { + match *self { + Stack::Empty => None, + Stack::Push { ref top, ref prev } => { + *self = prev; + Some(top) + } + } + } +} + +impl From<&Stack<'_, KleeneToken>> for SmallVec<[KleeneToken; 1]> { + fn from(ops: &Stack<'_, KleeneToken>) -> SmallVec<[KleeneToken; 1]> { + let mut ops: SmallVec<[KleeneToken; 1]> = ops.cloned().collect(); + // The stack is innermost on top. We want outermost first. + ops.reverse(); + ops + } +} + +/// Information attached to a meta-variable binder in LHS. +struct BinderInfo { + /// The span of the meta-variable in LHS. + span: Span, + /// The stack of Kleene operators (outermost first). + ops: SmallVec<[KleeneToken; 1]>, +} + +/// An environment of meta-variables to their binder information. +type Binders = FxHashMap<MacroRulesNormalizedIdent, BinderInfo>; + +/// The state at which we entered a macro definition in the RHS of another macro definition. +struct MacroState<'a> { + /// The binders of the branch where we entered the macro definition. + binders: &'a Binders, + /// The stack of Kleene operators (outermost first) where we entered the macro definition. + ops: SmallVec<[KleeneToken; 1]>, +} + +/// Checks that meta-variables are used correctly in a macro definition. +/// +/// Arguments: +/// - `sess` is used to emit diagnostics and lints +/// - `node_id` is used to emit lints +/// - `span` is used when no spans are available +/// - `lhses` and `rhses` should have the same length and represent the macro definition +pub(super) fn check_meta_variables( + sess: &ParseSess, + node_id: NodeId, + span: Span, + lhses: &[TokenTree], + rhses: &[TokenTree], +) -> bool { + if lhses.len() != rhses.len() { + sess.span_diagnostic.span_bug(span, "length mismatch between LHSes and RHSes") + } + let mut valid = true; + for (lhs, rhs) in iter::zip(lhses, rhses) { + let mut binders = Binders::default(); + check_binders(sess, node_id, lhs, &Stack::Empty, &mut binders, &Stack::Empty, &mut valid); + check_occurrences(sess, node_id, rhs, &Stack::Empty, &binders, &Stack::Empty, &mut valid); + } + valid +} + +/// Checks `lhs` as part of the LHS of a macro definition, extends `binders` with new binders, and +/// sets `valid` to false in case of errors. +/// +/// Arguments: +/// - `sess` is used to emit diagnostics and lints +/// - `node_id` is used to emit lints +/// - `lhs` is checked as part of a LHS +/// - `macros` is the stack of possible outer macros +/// - `binders` contains the binders of the LHS +/// - `ops` is the stack of Kleene operators from the LHS +/// - `valid` is set in case of errors +fn check_binders( + sess: &ParseSess, + node_id: NodeId, + lhs: &TokenTree, + macros: &Stack<'_, MacroState<'_>>, + binders: &mut Binders, + ops: &Stack<'_, KleeneToken>, + valid: &mut bool, +) { + match *lhs { + TokenTree::Token(..) => {} + // This can only happen when checking a nested macro because this LHS is then in the RHS of + // the outer macro. See ui/macros/macro-of-higher-order.rs where $y:$fragment in the + // LHS of the nested macro (and RHS of the outer macro) is parsed as MetaVar(y) Colon + // MetaVar(fragment) and not as MetaVarDecl(y, fragment). + TokenTree::MetaVar(span, name) => { + if macros.is_empty() { + sess.span_diagnostic.span_bug(span, "unexpected MetaVar in lhs"); + } + let name = MacroRulesNormalizedIdent::new(name); + // There are 3 possibilities: + if let Some(prev_info) = binders.get(&name) { + // 1. The meta-variable is already bound in the current LHS: This is an error. + let mut span = MultiSpan::from_span(span); + span.push_span_label(prev_info.span, "previous declaration"); + buffer_lint(sess, span, node_id, "duplicate matcher binding"); + } else if get_binder_info(macros, binders, name).is_none() { + // 2. The meta-variable is free: This is a binder. + binders.insert(name, BinderInfo { span, ops: ops.into() }); + } else { + // 3. The meta-variable is bound: This is an occurrence. + check_occurrences(sess, node_id, lhs, macros, binders, ops, valid); + } + } + // Similarly, this can only happen when checking a toplevel macro. + TokenTree::MetaVarDecl(span, name, kind) => { + if kind.is_none() && node_id != DUMMY_NODE_ID { + // FIXME: Report this as a hard error eventually and remove equivalent errors from + // `parse_tt_inner` and `nameize`. Until then the error may be reported twice, once + // as a hard error and then once as a buffered lint. + sess.buffer_lint( + MISSING_FRAGMENT_SPECIFIER, + span, + node_id, + "missing fragment specifier", + ); + } + if !macros.is_empty() { + sess.span_diagnostic.span_bug(span, "unexpected MetaVarDecl in nested lhs"); + } + let name = MacroRulesNormalizedIdent::new(name); + if let Some(prev_info) = get_binder_info(macros, binders, name) { + // Duplicate binders at the top-level macro definition are errors. The lint is only + // for nested macro definitions. + sess.span_diagnostic + .struct_span_err(span, "duplicate matcher binding") + .span_label(span, "duplicate binding") + .span_label(prev_info.span, "previous binding") + .emit(); + *valid = false; + } else { + binders.insert(name, BinderInfo { span, ops: ops.into() }); + } + } + // `MetaVarExpr` can not appear in the LHS of a macro arm + TokenTree::MetaVarExpr(..) => {} + TokenTree::Delimited(_, ref del) => { + for tt in &del.tts { + check_binders(sess, node_id, tt, macros, binders, ops, valid); + } + } + TokenTree::Sequence(_, ref seq) => { + let ops = ops.push(seq.kleene); + for tt in &seq.tts { + check_binders(sess, node_id, tt, macros, binders, &ops, valid); + } + } + } +} + +/// Returns the binder information of a meta-variable. +/// +/// Arguments: +/// - `macros` is the stack of possible outer macros +/// - `binders` contains the current binders +/// - `name` is the name of the meta-variable we are looking for +fn get_binder_info<'a>( + mut macros: &'a Stack<'a, MacroState<'a>>, + binders: &'a Binders, + name: MacroRulesNormalizedIdent, +) -> Option<&'a BinderInfo> { + binders.get(&name).or_else(|| macros.find_map(|state| state.binders.get(&name))) +} + +/// Checks `rhs` as part of the RHS of a macro definition and sets `valid` to false in case of +/// errors. +/// +/// Arguments: +/// - `sess` is used to emit diagnostics and lints +/// - `node_id` is used to emit lints +/// - `rhs` is checked as part of a RHS +/// - `macros` is the stack of possible outer macros +/// - `binders` contains the binders of the associated LHS +/// - `ops` is the stack of Kleene operators from the RHS +/// - `valid` is set in case of errors +fn check_occurrences( + sess: &ParseSess, + node_id: NodeId, + rhs: &TokenTree, + macros: &Stack<'_, MacroState<'_>>, + binders: &Binders, + ops: &Stack<'_, KleeneToken>, + valid: &mut bool, +) { + match *rhs { + TokenTree::Token(..) => {} + TokenTree::MetaVarDecl(span, _name, _kind) => { + sess.span_diagnostic.span_bug(span, "unexpected MetaVarDecl in rhs") + } + TokenTree::MetaVar(span, name) => { + let name = MacroRulesNormalizedIdent::new(name); + check_ops_is_prefix(sess, node_id, macros, binders, ops, span, name); + } + TokenTree::MetaVarExpr(dl, ref mve) => { + let Some(name) = mve.ident().map(MacroRulesNormalizedIdent::new) else { + return; + }; + check_ops_is_prefix(sess, node_id, macros, binders, ops, dl.entire(), name); + } + TokenTree::Delimited(_, ref del) => { + check_nested_occurrences(sess, node_id, &del.tts, macros, binders, ops, valid); + } + TokenTree::Sequence(_, ref seq) => { + let ops = ops.push(seq.kleene); + check_nested_occurrences(sess, node_id, &seq.tts, macros, binders, &ops, valid); + } + } +} + +/// Represents the processed prefix of a nested macro. +#[derive(Clone, Copy, PartialEq, Eq)] +enum NestedMacroState { + /// Nothing that matches a nested macro definition was processed yet. + Empty, + /// The token `macro_rules` was processed. + MacroRules, + /// The tokens `macro_rules!` were processed. + MacroRulesNot, + /// The tokens `macro_rules!` followed by a name were processed. The name may be either directly + /// an identifier or a meta-variable (that hopefully would be instantiated by an identifier). + MacroRulesNotName, + /// The keyword `macro` was processed. + Macro, + /// The keyword `macro` followed by a name was processed. + MacroName, + /// The keyword `macro` followed by a name and a token delimited by parentheses was processed. + MacroNameParen, +} + +/// Checks `tts` as part of the RHS of a macro definition, tries to recognize nested macro +/// definitions, and sets `valid` to false in case of errors. +/// +/// Arguments: +/// - `sess` is used to emit diagnostics and lints +/// - `node_id` is used to emit lints +/// - `tts` is checked as part of a RHS and may contain macro definitions +/// - `macros` is the stack of possible outer macros +/// - `binders` contains the binders of the associated LHS +/// - `ops` is the stack of Kleene operators from the RHS +/// - `valid` is set in case of errors +fn check_nested_occurrences( + sess: &ParseSess, + node_id: NodeId, + tts: &[TokenTree], + macros: &Stack<'_, MacroState<'_>>, + binders: &Binders, + ops: &Stack<'_, KleeneToken>, + valid: &mut bool, +) { + let mut state = NestedMacroState::Empty; + let nested_macros = macros.push(MacroState { binders, ops: ops.into() }); + let mut nested_binders = Binders::default(); + for tt in tts { + match (state, tt) { + ( + NestedMacroState::Empty, + &TokenTree::Token(Token { kind: TokenKind::Ident(name, false), .. }), + ) => { + if name == kw::MacroRules { + state = NestedMacroState::MacroRules; + } else if name == kw::Macro { + state = NestedMacroState::Macro; + } + } + ( + NestedMacroState::MacroRules, + &TokenTree::Token(Token { kind: TokenKind::Not, .. }), + ) => { + state = NestedMacroState::MacroRulesNot; + } + ( + NestedMacroState::MacroRulesNot, + &TokenTree::Token(Token { kind: TokenKind::Ident(..), .. }), + ) => { + state = NestedMacroState::MacroRulesNotName; + } + (NestedMacroState::MacroRulesNot, &TokenTree::MetaVar(..)) => { + state = NestedMacroState::MacroRulesNotName; + // We check that the meta-variable is correctly used. + check_occurrences(sess, node_id, tt, macros, binders, ops, valid); + } + (NestedMacroState::MacroRulesNotName, &TokenTree::Delimited(_, ref del)) + | (NestedMacroState::MacroName, &TokenTree::Delimited(_, ref del)) + if del.delim == Delimiter::Brace => + { + let macro_rules = state == NestedMacroState::MacroRulesNotName; + state = NestedMacroState::Empty; + let rest = + check_nested_macro(sess, node_id, macro_rules, &del.tts, &nested_macros, valid); + // If we did not check the whole macro definition, then check the rest as if outside + // the macro definition. + check_nested_occurrences( + sess, + node_id, + &del.tts[rest..], + macros, + binders, + ops, + valid, + ); + } + ( + NestedMacroState::Macro, + &TokenTree::Token(Token { kind: TokenKind::Ident(..), .. }), + ) => { + state = NestedMacroState::MacroName; + } + (NestedMacroState::Macro, &TokenTree::MetaVar(..)) => { + state = NestedMacroState::MacroName; + // We check that the meta-variable is correctly used. + check_occurrences(sess, node_id, tt, macros, binders, ops, valid); + } + (NestedMacroState::MacroName, &TokenTree::Delimited(_, ref del)) + if del.delim == Delimiter::Parenthesis => + { + state = NestedMacroState::MacroNameParen; + nested_binders = Binders::default(); + check_binders( + sess, + node_id, + tt, + &nested_macros, + &mut nested_binders, + &Stack::Empty, + valid, + ); + } + (NestedMacroState::MacroNameParen, &TokenTree::Delimited(_, ref del)) + if del.delim == Delimiter::Brace => + { + state = NestedMacroState::Empty; + check_occurrences( + sess, + node_id, + tt, + &nested_macros, + &nested_binders, + &Stack::Empty, + valid, + ); + } + (_, ref tt) => { + state = NestedMacroState::Empty; + check_occurrences(sess, node_id, tt, macros, binders, ops, valid); + } + } + } +} + +/// Checks the body of nested macro, returns where the check stopped, and sets `valid` to false in +/// case of errors. +/// +/// The token trees are checked as long as they look like a list of (LHS) => {RHS} token trees. This +/// check is a best-effort to detect a macro definition. It returns the position in `tts` where we +/// stopped checking because we detected we were not in a macro definition anymore. +/// +/// Arguments: +/// - `sess` is used to emit diagnostics and lints +/// - `node_id` is used to emit lints +/// - `macro_rules` specifies whether the macro is `macro_rules` +/// - `tts` is checked as a list of (LHS) => {RHS} +/// - `macros` is the stack of outer macros +/// - `valid` is set in case of errors +fn check_nested_macro( + sess: &ParseSess, + node_id: NodeId, + macro_rules: bool, + tts: &[TokenTree], + macros: &Stack<'_, MacroState<'_>>, + valid: &mut bool, +) -> usize { + let n = tts.len(); + let mut i = 0; + let separator = if macro_rules { TokenKind::Semi } else { TokenKind::Comma }; + loop { + // We expect 3 token trees: `(LHS) => {RHS}`. The separator is checked after. + if i + 2 >= n + || !tts[i].is_delimited() + || !tts[i + 1].is_token(&TokenKind::FatArrow) + || !tts[i + 2].is_delimited() + { + break; + } + let lhs = &tts[i]; + let rhs = &tts[i + 2]; + let mut binders = Binders::default(); + check_binders(sess, node_id, lhs, macros, &mut binders, &Stack::Empty, valid); + check_occurrences(sess, node_id, rhs, macros, &binders, &Stack::Empty, valid); + // Since the last semicolon is optional for `macro_rules` macros and decl_macro are not terminated, + // we increment our checked position by how many token trees we already checked (the 3 + // above) before checking for the separator. + i += 3; + if i == n || !tts[i].is_token(&separator) { + break; + } + // We increment our checked position for the semicolon. + i += 1; + } + i +} + +/// Checks that a meta-variable occurrence is valid. +/// +/// Arguments: +/// - `sess` is used to emit diagnostics and lints +/// - `node_id` is used to emit lints +/// - `macros` is the stack of possible outer macros +/// - `binders` contains the binders of the associated LHS +/// - `ops` is the stack of Kleene operators from the RHS +/// - `span` is the span of the meta-variable to check +/// - `name` is the name of the meta-variable to check +fn check_ops_is_prefix( + sess: &ParseSess, + node_id: NodeId, + macros: &Stack<'_, MacroState<'_>>, + binders: &Binders, + ops: &Stack<'_, KleeneToken>, + span: Span, + name: MacroRulesNormalizedIdent, +) { + let macros = macros.push(MacroState { binders, ops: ops.into() }); + // Accumulates the stacks the operators of each state until (and including when) the + // meta-variable is found. The innermost stack is first. + let mut acc: SmallVec<[&SmallVec<[KleeneToken; 1]>; 1]> = SmallVec::new(); + for state in ¯os { + acc.push(&state.ops); + if let Some(binder) = state.binders.get(&name) { + // This variable concatenates the stack of operators from the RHS of the LHS where the + // meta-variable was defined to where it is used (in possibly nested macros). The + // outermost operator is first. + let mut occurrence_ops: SmallVec<[KleeneToken; 2]> = SmallVec::new(); + // We need to iterate from the end to start with outermost stack. + for ops in acc.iter().rev() { + occurrence_ops.extend_from_slice(ops); + } + ops_is_prefix(sess, node_id, span, name, &binder.ops, &occurrence_ops); + return; + } + } + buffer_lint(sess, span.into(), node_id, &format!("unknown macro variable `{}`", name)); +} + +/// Returns whether `binder_ops` is a prefix of `occurrence_ops`. +/// +/// The stack of Kleene operators of a meta-variable occurrence just needs to have the stack of +/// Kleene operators of its binder as a prefix. +/// +/// Consider $i in the following example: +/// ```ignore (illustrative) +/// ( $( $i:ident = $($j:ident),+ );* ) => { $($( $i += $j; )+)* } +/// ``` +/// It occurs under the Kleene stack ["*", "+"] and is bound under ["*"] only. +/// +/// Arguments: +/// - `sess` is used to emit diagnostics and lints +/// - `node_id` is used to emit lints +/// - `span` is the span of the meta-variable being check +/// - `name` is the name of the meta-variable being check +/// - `binder_ops` is the stack of Kleene operators for the binder +/// - `occurrence_ops` is the stack of Kleene operators for the occurrence +fn ops_is_prefix( + sess: &ParseSess, + node_id: NodeId, + span: Span, + name: MacroRulesNormalizedIdent, + binder_ops: &[KleeneToken], + occurrence_ops: &[KleeneToken], +) { + for (i, binder) in binder_ops.iter().enumerate() { + if i >= occurrence_ops.len() { + let mut span = MultiSpan::from_span(span); + span.push_span_label(binder.span, "expected repetition"); + let message = &format!("variable '{}' is still repeating at this depth", name); + buffer_lint(sess, span, node_id, message); + return; + } + let occurrence = &occurrence_ops[i]; + if occurrence.op != binder.op { + let mut span = MultiSpan::from_span(span); + span.push_span_label(binder.span, "expected repetition"); + span.push_span_label(occurrence.span, "conflicting repetition"); + let message = "meta-variable repeats with different Kleene operator"; + buffer_lint(sess, span, node_id, message); + return; + } + } +} + +fn buffer_lint(sess: &ParseSess, span: MultiSpan, node_id: NodeId, message: &str) { + // Macros loaded from other crates have dummy node ids. + if node_id != DUMMY_NODE_ID { + sess.buffer_lint(&META_VARIABLE_MISUSE, span, node_id, message); + } +} diff --git a/compiler/rustc_expand/src/mbe/macro_parser.rs b/compiler/rustc_expand/src/mbe/macro_parser.rs new file mode 100644 index 000000000..4fa91dfea --- /dev/null +++ b/compiler/rustc_expand/src/mbe/macro_parser.rs @@ -0,0 +1,704 @@ +//! This is an NFA-based parser, which calls out to the main Rust parser for named non-terminals +//! (which it commits to fully when it hits one in a grammar). There's a set of current NFA threads +//! and a set of next ones. Instead of NTs, we have a special case for Kleene star. The big-O, in +//! pathological cases, is worse than traditional use of NFA or Earley parsing, but it's an easier +//! fit for Macro-by-Example-style rules. +//! +//! (In order to prevent the pathological case, we'd need to lazily construct the resulting +//! `NamedMatch`es at the very end. It'd be a pain, and require more memory to keep around old +//! matcher positions, but it would also save overhead) +//! +//! We don't say this parser uses the Earley algorithm, because it's unnecessarily inaccurate. +//! The macro parser restricts itself to the features of finite state automata. Earley parsers +//! can be described as an extension of NFAs with completion rules, prediction rules, and recursion. +//! +//! Quick intro to how the parser works: +//! +//! A "matcher position" (a.k.a. "position" or "mp") is a dot in the middle of a matcher, usually +//! written as a `·`. For example `· a $( a )* a b` is one, as is `a $( · a )* a b`. +//! +//! The parser walks through the input a token at a time, maintaining a list +//! of threads consistent with the current position in the input string: `cur_mps`. +//! +//! As it processes them, it fills up `eof_mps` with threads that would be valid if +//! the macro invocation is now over, `bb_mps` with threads that are waiting on +//! a Rust non-terminal like `$e:expr`, and `next_mps` with threads that are waiting +//! on a particular token. Most of the logic concerns moving the · through the +//! repetitions indicated by Kleene stars. The rules for moving the · without +//! consuming any input are called epsilon transitions. It only advances or calls +//! out to the real Rust parser when no `cur_mps` threads remain. +//! +//! Example: +//! +//! ```text, ignore +//! Start parsing a a a a b against [· a $( a )* a b]. +//! +//! Remaining input: a a a a b +//! next: [· a $( a )* a b] +//! +//! - - - Advance over an a. - - - +//! +//! Remaining input: a a a b +//! cur: [a · $( a )* a b] +//! Descend/Skip (first position). +//! next: [a $( · a )* a b] [a $( a )* · a b]. +//! +//! - - - Advance over an a. - - - +//! +//! Remaining input: a a b +//! cur: [a $( a · )* a b] [a $( a )* a · b] +//! Follow epsilon transition: Finish/Repeat (first position) +//! next: [a $( a )* · a b] [a $( · a )* a b] [a $( a )* a · b] +//! +//! - - - Advance over an a. - - - (this looks exactly like the last step) +//! +//! Remaining input: a b +//! cur: [a $( a · )* a b] [a $( a )* a · b] +//! Follow epsilon transition: Finish/Repeat (first position) +//! next: [a $( a )* · a b] [a $( · a )* a b] [a $( a )* a · b] +//! +//! - - - Advance over an a. - - - (this looks exactly like the last step) +//! +//! Remaining input: b +//! cur: [a $( a · )* a b] [a $( a )* a · b] +//! Follow epsilon transition: Finish/Repeat (first position) +//! next: [a $( a )* · a b] [a $( · a )* a b] [a $( a )* a · b] +//! +//! - - - Advance over a b. - - - +//! +//! Remaining input: '' +//! eof: [a $( a )* a b ·] +//! ``` + +pub(crate) use NamedMatch::*; +pub(crate) use ParseResult::*; + +use crate::mbe::{KleeneOp, TokenTree}; + +use rustc_ast::token::{self, DocComment, Nonterminal, NonterminalKind, Token}; +use rustc_lint_defs::pluralize; +use rustc_parse::parser::{NtOrTt, Parser}; +use rustc_span::symbol::MacroRulesNormalizedIdent; +use rustc_span::Span; + +use rustc_data_structures::fx::FxHashMap; +use rustc_data_structures::sync::Lrc; +use rustc_span::symbol::Ident; +use std::borrow::Cow; +use std::collections::hash_map::Entry::{Occupied, Vacant}; + +/// A unit within a matcher that a `MatcherPos` can refer to. Similar to (and derived from) +/// `mbe::TokenTree`, but designed specifically for fast and easy traversal during matching. +/// Notable differences to `mbe::TokenTree`: +/// - It is non-recursive, i.e. there is no nesting. +/// - The end pieces of each sequence (the separator, if present, and the Kleene op) are +/// represented explicitly, as is the very end of the matcher. +/// +/// This means a matcher can be represented by `&[MatcherLoc]`, and traversal mostly involves +/// simply incrementing the current matcher position index by one. +pub(super) enum MatcherLoc { + Token { + token: Token, + }, + Delimited, + Sequence { + op: KleeneOp, + num_metavar_decls: usize, + idx_first_after: usize, + next_metavar: usize, + seq_depth: usize, + }, + SequenceKleeneOpNoSep { + op: KleeneOp, + idx_first: usize, + }, + SequenceSep { + separator: Token, + }, + SequenceKleeneOpAfterSep { + idx_first: usize, + }, + MetaVarDecl { + span: Span, + bind: Ident, + kind: Option<NonterminalKind>, + next_metavar: usize, + seq_depth: usize, + }, + Eof, +} + +pub(super) fn compute_locs(matcher: &[TokenTree]) -> Vec<MatcherLoc> { + fn inner( + tts: &[TokenTree], + locs: &mut Vec<MatcherLoc>, + next_metavar: &mut usize, + seq_depth: usize, + ) { + for tt in tts { + match tt { + TokenTree::Token(token) => { + locs.push(MatcherLoc::Token { token: token.clone() }); + } + TokenTree::Delimited(span, delimited) => { + let open_token = Token::new(token::OpenDelim(delimited.delim), span.open); + let close_token = Token::new(token::CloseDelim(delimited.delim), span.close); + + locs.push(MatcherLoc::Delimited); + locs.push(MatcherLoc::Token { token: open_token }); + inner(&delimited.tts, locs, next_metavar, seq_depth); + locs.push(MatcherLoc::Token { token: close_token }); + } + TokenTree::Sequence(_, seq) => { + // We can't determine `idx_first_after` and construct the final + // `MatcherLoc::Sequence` until after `inner()` is called and the sequence end + // pieces are processed. So we push a dummy value (`Eof` is cheapest to + // construct) now, and overwrite it with the proper value below. + let dummy = MatcherLoc::Eof; + locs.push(dummy); + + let next_metavar_orig = *next_metavar; + let op = seq.kleene.op; + let idx_first = locs.len(); + let idx_seq = idx_first - 1; + inner(&seq.tts, locs, next_metavar, seq_depth + 1); + + if let Some(separator) = &seq.separator { + locs.push(MatcherLoc::SequenceSep { separator: separator.clone() }); + locs.push(MatcherLoc::SequenceKleeneOpAfterSep { idx_first }); + } else { + locs.push(MatcherLoc::SequenceKleeneOpNoSep { op, idx_first }); + } + + // Overwrite the dummy value pushed above with the proper value. + locs[idx_seq] = MatcherLoc::Sequence { + op, + num_metavar_decls: seq.num_captures, + idx_first_after: locs.len(), + next_metavar: next_metavar_orig, + seq_depth, + }; + } + &TokenTree::MetaVarDecl(span, bind, kind) => { + locs.push(MatcherLoc::MetaVarDecl { + span, + bind, + kind, + next_metavar: *next_metavar, + seq_depth, + }); + *next_metavar += 1; + } + TokenTree::MetaVar(..) | TokenTree::MetaVarExpr(..) => unreachable!(), + } + } + } + + let mut locs = vec![]; + let mut next_metavar = 0; + inner(matcher, &mut locs, &mut next_metavar, /* seq_depth */ 0); + + // A final entry is needed for eof. + locs.push(MatcherLoc::Eof); + + locs +} + +/// A single matcher position, representing the state of matching. +struct MatcherPos { + /// The index into `TtParser::locs`, which represents the "dot". + idx: usize, + + /// The matches made against metavar decls so far. On a successful match, this vector ends up + /// with one element per metavar decl in the matcher. Each element records token trees matched + /// against the relevant metavar by the black box parser. An element will be a `MatchedSeq` if + /// the corresponding metavar decl is within a sequence. + /// + /// It is critical to performance that this is an `Lrc`, because it gets cloned frequently when + /// processing sequences. Mostly for sequence-ending possibilities that must be tried but end + /// up failing. + matches: Lrc<Vec<NamedMatch>>, +} + +// This type is used a lot. Make sure it doesn't unintentionally get bigger. +#[cfg(all(target_arch = "x86_64", target_pointer_width = "64"))] +rustc_data_structures::static_assert_size!(MatcherPos, 16); + +impl MatcherPos { + /// Adds `m` as a named match for the `metavar_idx`-th metavar. There are only two call sites, + /// and both are hot enough to be always worth inlining. + #[inline(always)] + fn push_match(&mut self, metavar_idx: usize, seq_depth: usize, m: NamedMatch) { + let matches = Lrc::make_mut(&mut self.matches); + match seq_depth { + 0 => { + // We are not within a sequence. Just append `m`. + assert_eq!(metavar_idx, matches.len()); + matches.push(m); + } + _ => { + // We are within a sequence. Find the final `MatchedSeq` at the appropriate depth + // and append `m` to its vector. + let mut curr = &mut matches[metavar_idx]; + for _ in 0..seq_depth - 1 { + match curr { + MatchedSeq(seq) => curr = seq.last_mut().unwrap(), + _ => unreachable!(), + } + } + match curr { + MatchedSeq(seq) => seq.push(m), + _ => unreachable!(), + } + } + } + } +} + +enum EofMatcherPositions { + None, + One(MatcherPos), + Multiple, +} + +/// Represents the possible results of an attempted parse. +pub(crate) enum ParseResult<T> { + /// Parsed successfully. + Success(T), + /// Arm failed to match. If the second parameter is `token::Eof`, it indicates an unexpected + /// end of macro invocation. Otherwise, it indicates that no rules expected the given token. + Failure(Token, &'static str), + /// Fatal error (malformed macro?). Abort compilation. + Error(rustc_span::Span, String), + ErrorReported, +} + +/// A `ParseResult` where the `Success` variant contains a mapping of +/// `MacroRulesNormalizedIdent`s to `NamedMatch`es. This represents the mapping +/// of metavars to the token trees they bind to. +pub(crate) type NamedParseResult = ParseResult<FxHashMap<MacroRulesNormalizedIdent, NamedMatch>>; + +/// Count how many metavars declarations are in `matcher`. +pub(super) fn count_metavar_decls(matcher: &[TokenTree]) -> usize { + matcher + .iter() + .map(|tt| match tt { + TokenTree::MetaVarDecl(..) => 1, + TokenTree::Sequence(_, seq) => seq.num_captures, + TokenTree::Delimited(_, delim) => count_metavar_decls(&delim.tts), + TokenTree::Token(..) => 0, + TokenTree::MetaVar(..) | TokenTree::MetaVarExpr(..) => unreachable!(), + }) + .sum() +} + +/// `NamedMatch` is a pattern-match result for a single metavar. All +/// `MatchedNonterminal`s in the `NamedMatch` have the same non-terminal type +/// (expr, item, etc). +/// +/// The in-memory structure of a particular `NamedMatch` represents the match +/// that occurred when a particular subset of a matcher was applied to a +/// particular token tree. +/// +/// The width of each `MatchedSeq` in the `NamedMatch`, and the identity of +/// the `MatchedNtNonTts`s, will depend on the token tree it was applied +/// to: each `MatchedSeq` corresponds to a single repetition in the originating +/// token tree. The depth of the `NamedMatch` structure will therefore depend +/// only on the nesting depth of repetitions in the originating token tree it +/// was derived from. +/// +/// In layperson's terms: `NamedMatch` will form a tree representing nested matches of a particular +/// meta variable. For example, if we are matching the following macro against the following +/// invocation... +/// +/// ```rust +/// macro_rules! foo { +/// ($($($x:ident),+);+) => {} +/// } +/// +/// foo!(a, b, c, d; a, b, c, d, e); +/// ``` +/// +/// Then, the tree will have the following shape: +/// +/// ```ignore (private-internal) +/// # use NamedMatch::*; +/// MatchedSeq([ +/// MatchedSeq([ +/// MatchedNonterminal(a), +/// MatchedNonterminal(b), +/// MatchedNonterminal(c), +/// MatchedNonterminal(d), +/// ]), +/// MatchedSeq([ +/// MatchedNonterminal(a), +/// MatchedNonterminal(b), +/// MatchedNonterminal(c), +/// MatchedNonterminal(d), +/// MatchedNonterminal(e), +/// ]) +/// ]) +/// ``` +#[derive(Debug, Clone)] +pub(crate) enum NamedMatch { + MatchedSeq(Vec<NamedMatch>), + + // A metavar match of type `tt`. + MatchedTokenTree(rustc_ast::tokenstream::TokenTree), + + // A metavar match of any type other than `tt`. + MatchedNonterminal(Lrc<Nonterminal>), +} + +/// Performs a token equality check, ignoring syntax context (that is, an unhygienic comparison) +fn token_name_eq(t1: &Token, t2: &Token) -> bool { + if let (Some((ident1, is_raw1)), Some((ident2, is_raw2))) = (t1.ident(), t2.ident()) { + ident1.name == ident2.name && is_raw1 == is_raw2 + } else if let (Some(ident1), Some(ident2)) = (t1.lifetime(), t2.lifetime()) { + ident1.name == ident2.name + } else { + t1.kind == t2.kind + } +} + +// Note: the vectors could be created and dropped within `parse_tt`, but to avoid excess +// allocations we have a single vector for each kind that is cleared and reused repeatedly. +pub struct TtParser { + macro_name: Ident, + + /// The set of current mps to be processed. This should be empty by the end of a successful + /// execution of `parse_tt_inner`. + cur_mps: Vec<MatcherPos>, + + /// The set of newly generated mps. These are used to replenish `cur_mps` in the function + /// `parse_tt`. + next_mps: Vec<MatcherPos>, + + /// The set of mps that are waiting for the black-box parser. + bb_mps: Vec<MatcherPos>, + + /// Pre-allocate an empty match array, so it can be cloned cheaply for macros with many rules + /// that have no metavars. + empty_matches: Lrc<Vec<NamedMatch>>, +} + +impl TtParser { + pub(super) fn new(macro_name: Ident) -> TtParser { + TtParser { + macro_name, + cur_mps: vec![], + next_mps: vec![], + bb_mps: vec![], + empty_matches: Lrc::new(vec![]), + } + } + + /// Process the matcher positions of `cur_mps` until it is empty. In the process, this will + /// produce more mps in `next_mps` and `bb_mps`. + /// + /// # Returns + /// + /// `Some(result)` if everything is finished, `None` otherwise. Note that matches are kept + /// track of through the mps generated. + fn parse_tt_inner( + &mut self, + matcher: &[MatcherLoc], + token: &Token, + ) -> Option<NamedParseResult> { + // Matcher positions that would be valid if the macro invocation was over now. Only + // modified if `token == Eof`. + let mut eof_mps = EofMatcherPositions::None; + + while let Some(mut mp) = self.cur_mps.pop() { + match &matcher[mp.idx] { + MatcherLoc::Token { token: t } => { + // If it's a doc comment, we just ignore it and move on to the next tt in the + // matcher. This is a bug, but #95267 showed that existing programs rely on + // this behaviour, and changing it would require some care and a transition + // period. + // + // If the token matches, we can just advance the parser. + // + // Otherwise, this match has failed, there is nothing to do, and hopefully + // another mp in `cur_mps` will match. + if matches!(t, Token { kind: DocComment(..), .. }) { + mp.idx += 1; + self.cur_mps.push(mp); + } else if token_name_eq(&t, token) { + mp.idx += 1; + self.next_mps.push(mp); + } + } + MatcherLoc::Delimited => { + // Entering the delimeter is trivial. + mp.idx += 1; + self.cur_mps.push(mp); + } + &MatcherLoc::Sequence { + op, + num_metavar_decls, + idx_first_after, + next_metavar, + seq_depth, + } => { + // Install an empty vec for each metavar within the sequence. + for metavar_idx in next_metavar..next_metavar + num_metavar_decls { + mp.push_match(metavar_idx, seq_depth, MatchedSeq(vec![])); + } + + if op == KleeneOp::ZeroOrMore || op == KleeneOp::ZeroOrOne { + // Try zero matches of this sequence, by skipping over it. + self.cur_mps.push(MatcherPos { + idx: idx_first_after, + matches: mp.matches.clone(), // a cheap clone + }); + } + + // Try one or more matches of this sequence, by entering it. + mp.idx += 1; + self.cur_mps.push(mp); + } + &MatcherLoc::SequenceKleeneOpNoSep { op, idx_first } => { + // We are past the end of a sequence with no separator. Try ending the + // sequence. If that's not possible, `ending_mp` will fail quietly when it is + // processed next time around the loop. + let ending_mp = MatcherPos { + idx: mp.idx + 1, // +1 skips the Kleene op + matches: mp.matches.clone(), // a cheap clone + }; + self.cur_mps.push(ending_mp); + + if op != KleeneOp::ZeroOrOne { + // Try another repetition. + mp.idx = idx_first; + self.cur_mps.push(mp); + } + } + MatcherLoc::SequenceSep { separator } => { + // We are past the end of a sequence with a separator but we haven't seen the + // separator yet. Try ending the sequence. If that's not possible, `ending_mp` + // will fail quietly when it is processed next time around the loop. + let ending_mp = MatcherPos { + idx: mp.idx + 2, // +2 skips the separator and the Kleene op + matches: mp.matches.clone(), // a cheap clone + }; + self.cur_mps.push(ending_mp); + + if token_name_eq(token, separator) { + // The separator matches the current token. Advance past it. + mp.idx += 1; + self.next_mps.push(mp); + } + } + &MatcherLoc::SequenceKleeneOpAfterSep { idx_first } => { + // We are past the sequence separator. This can't be a `?` Kleene op, because + // they don't permit separators. Try another repetition. + mp.idx = idx_first; + self.cur_mps.push(mp); + } + &MatcherLoc::MetaVarDecl { span, kind, .. } => { + // Built-in nonterminals never start with these tokens, so we can eliminate + // them from consideration. We use the span of the metavariable declaration + // to determine any edition-specific matching behavior for non-terminals. + if let Some(kind) = kind { + if Parser::nonterminal_may_begin_with(kind, token) { + self.bb_mps.push(mp); + } + } else { + // E.g. `$e` instead of `$e:expr`, reported as a hard error if actually used. + // Both this check and the one in `nameize` are necessary, surprisingly. + return Some(Error(span, "missing fragment specifier".to_string())); + } + } + MatcherLoc::Eof => { + // We are past the matcher's end, and not in a sequence. Try to end things. + debug_assert_eq!(mp.idx, matcher.len() - 1); + if *token == token::Eof { + eof_mps = match eof_mps { + EofMatcherPositions::None => EofMatcherPositions::One(mp), + EofMatcherPositions::One(_) | EofMatcherPositions::Multiple => { + EofMatcherPositions::Multiple + } + } + } + } + } + } + + // If we reached the end of input, check that there is EXACTLY ONE possible matcher. + // Otherwise, either the parse is ambiguous (which is an error) or there is a syntax error. + if *token == token::Eof { + Some(match eof_mps { + EofMatcherPositions::One(mut eof_mp) => { + // Need to take ownership of the matches from within the `Lrc`. + Lrc::make_mut(&mut eof_mp.matches); + let matches = Lrc::try_unwrap(eof_mp.matches).unwrap().into_iter(); + self.nameize(matcher, matches) + } + EofMatcherPositions::Multiple => { + Error(token.span, "ambiguity: multiple successful parses".to_string()) + } + EofMatcherPositions::None => Failure( + Token::new( + token::Eof, + if token.span.is_dummy() { token.span } else { token.span.shrink_to_hi() }, + ), + "missing tokens in macro arguments", + ), + }) + } else { + None + } + } + + /// Match the token stream from `parser` against `matcher`. + pub(super) fn parse_tt( + &mut self, + parser: &mut Cow<'_, Parser<'_>>, + matcher: &[MatcherLoc], + ) -> NamedParseResult { + // A queue of possible matcher positions. We initialize it with the matcher position in + // which the "dot" is before the first token of the first token tree in `matcher`. + // `parse_tt_inner` then processes all of these possible matcher positions and produces + // possible next positions into `next_mps`. After some post-processing, the contents of + // `next_mps` replenish `cur_mps` and we start over again. + self.cur_mps.clear(); + self.cur_mps.push(MatcherPos { idx: 0, matches: self.empty_matches.clone() }); + + loop { + self.next_mps.clear(); + self.bb_mps.clear(); + + // Process `cur_mps` until either we have finished the input or we need to get some + // parsing from the black-box parser done. + if let Some(res) = self.parse_tt_inner(matcher, &parser.token) { + return res; + } + + // `parse_tt_inner` handled all of `cur_mps`, so it's empty. + assert!(self.cur_mps.is_empty()); + + // Error messages here could be improved with links to original rules. + match (self.next_mps.len(), self.bb_mps.len()) { + (0, 0) => { + // There are no possible next positions AND we aren't waiting for the black-box + // parser: syntax error. + return Failure( + parser.token.clone(), + "no rules expected this token in macro call", + ); + } + + (_, 0) => { + // Dump all possible `next_mps` into `cur_mps` for the next iteration. Then + // process the next token. + self.cur_mps.append(&mut self.next_mps); + parser.to_mut().bump(); + } + + (0, 1) => { + // We need to call the black-box parser to get some nonterminal. + let mut mp = self.bb_mps.pop().unwrap(); + let loc = &matcher[mp.idx]; + if let &MatcherLoc::MetaVarDecl { + span, + kind: Some(kind), + next_metavar, + seq_depth, + .. + } = loc + { + // We use the span of the metavariable declaration to determine any + // edition-specific matching behavior for non-terminals. + let nt = match parser.to_mut().parse_nonterminal(kind) { + Err(mut err) => { + err.span_label( + span, + format!( + "while parsing argument for this `{kind}` macro fragment" + ), + ) + .emit(); + return ErrorReported; + } + Ok(nt) => nt, + }; + let m = match nt { + NtOrTt::Nt(nt) => MatchedNonterminal(Lrc::new(nt)), + NtOrTt::Tt(tt) => MatchedTokenTree(tt), + }; + mp.push_match(next_metavar, seq_depth, m); + mp.idx += 1; + } else { + unreachable!() + } + self.cur_mps.push(mp); + } + + (_, _) => { + // Too many possibilities! + return self.ambiguity_error(matcher, parser.token.span); + } + } + + assert!(!self.cur_mps.is_empty()); + } + } + + fn ambiguity_error( + &self, + matcher: &[MatcherLoc], + token_span: rustc_span::Span, + ) -> NamedParseResult { + let nts = self + .bb_mps + .iter() + .map(|mp| match &matcher[mp.idx] { + MatcherLoc::MetaVarDecl { bind, kind: Some(kind), .. } => { + format!("{} ('{}')", kind, bind) + } + _ => unreachable!(), + }) + .collect::<Vec<String>>() + .join(" or "); + + Error( + token_span, + format!( + "local ambiguity when calling macro `{}`: multiple parsing options: {}", + self.macro_name, + match self.next_mps.len() { + 0 => format!("built-in NTs {}.", nts), + n => format!("built-in NTs {} or {n} other option{s}.", nts, s = pluralize!(n)), + } + ), + ) + } + + fn nameize<I: Iterator<Item = NamedMatch>>( + &self, + matcher: &[MatcherLoc], + mut res: I, + ) -> NamedParseResult { + // Make that each metavar has _exactly one_ binding. If so, insert the binding into the + // `NamedParseResult`. Otherwise, it's an error. + let mut ret_val = FxHashMap::default(); + for loc in matcher { + if let &MatcherLoc::MetaVarDecl { span, bind, kind, .. } = loc { + if kind.is_some() { + match ret_val.entry(MacroRulesNormalizedIdent::new(bind)) { + Vacant(spot) => spot.insert(res.next().unwrap()), + Occupied(..) => { + return Error(span, format!("duplicated bind name: {}", bind)); + } + }; + } else { + // E.g. `$e` instead of `$e:expr`, reported as a hard error if actually used. + // Both this check and the one in `parse_tt_inner` are necessary, surprisingly. + return Error(span, "missing fragment specifier".to_string()); + } + } + } + Success(ret_val) + } +} diff --git a/compiler/rustc_expand/src/mbe/macro_rules.rs b/compiler/rustc_expand/src/mbe/macro_rules.rs new file mode 100644 index 000000000..f7e1575af --- /dev/null +++ b/compiler/rustc_expand/src/mbe/macro_rules.rs @@ -0,0 +1,1420 @@ +use crate::base::{DummyResult, ExtCtxt, MacResult, TTMacroExpander}; +use crate::base::{SyntaxExtension, SyntaxExtensionKind}; +use crate::expand::{ensure_complete_parse, parse_ast_fragment, AstFragment, AstFragmentKind}; +use crate::mbe; +use crate::mbe::macro_check; +use crate::mbe::macro_parser::{Error, ErrorReported, Failure, Success, TtParser}; +use crate::mbe::macro_parser::{MatchedSeq, MatchedTokenTree, MatcherLoc}; +use crate::mbe::transcribe::transcribe; + +use rustc_ast as ast; +use rustc_ast::token::{self, Delimiter, NonterminalKind, Token, TokenKind, TokenKind::*}; +use rustc_ast::tokenstream::{DelimSpan, TokenStream}; +use rustc_ast::{NodeId, DUMMY_NODE_ID}; +use rustc_ast_pretty::pprust; +use rustc_attr::{self as attr, TransparencyError}; +use rustc_data_structures::fx::{FxHashMap, FxIndexMap}; +use rustc_errors::{Applicability, Diagnostic, DiagnosticBuilder, ErrorGuaranteed}; +use rustc_feature::Features; +use rustc_lint_defs::builtin::{ + RUST_2021_INCOMPATIBLE_OR_PATTERNS, SEMICOLON_IN_EXPRESSIONS_FROM_MACROS, +}; +use rustc_lint_defs::BuiltinLintDiagnostics; +use rustc_parse::parser::Parser; +use rustc_session::parse::ParseSess; +use rustc_session::Session; +use rustc_span::edition::Edition; +use rustc_span::hygiene::Transparency; +use rustc_span::source_map::SourceMap; +use rustc_span::symbol::{kw, sym, Ident, MacroRulesNormalizedIdent}; +use rustc_span::Span; + +use std::borrow::Cow; +use std::collections::hash_map::Entry; +use std::{mem, slice}; +use tracing::debug; + +pub(crate) struct ParserAnyMacro<'a> { + parser: Parser<'a>, + + /// Span of the expansion site of the macro this parser is for + site_span: Span, + /// The ident of the macro we're parsing + macro_ident: Ident, + lint_node_id: NodeId, + is_trailing_mac: bool, + arm_span: Span, + /// Whether or not this macro is defined in the current crate + is_local: bool, +} + +pub(crate) fn annotate_err_with_kind(err: &mut Diagnostic, kind: AstFragmentKind, span: Span) { + match kind { + AstFragmentKind::Ty => { + err.span_label(span, "this macro call doesn't expand to a type"); + } + AstFragmentKind::Pat => { + err.span_label(span, "this macro call doesn't expand to a pattern"); + } + _ => {} + }; +} + +fn emit_frag_parse_err( + mut e: DiagnosticBuilder<'_, rustc_errors::ErrorGuaranteed>, + parser: &Parser<'_>, + orig_parser: &mut Parser<'_>, + site_span: Span, + arm_span: Span, + kind: AstFragmentKind, +) { + // FIXME(davidtwco): avoid depending on the error message text + if parser.token == token::Eof && e.message[0].0.expect_str().ends_with(", found `<eof>`") { + if !e.span.is_dummy() { + // early end of macro arm (#52866) + e.replace_span_with(parser.sess.source_map().next_point(parser.token.span)); + } + let msg = &e.message[0]; + e.message[0] = ( + rustc_errors::DiagnosticMessage::Str(format!( + "macro expansion ends with an incomplete expression: {}", + msg.0.expect_str().replace(", found `<eof>`", ""), + )), + msg.1, + ); + } + if e.span.is_dummy() { + // Get around lack of span in error (#30128) + e.replace_span_with(site_span); + if !parser.sess.source_map().is_imported(arm_span) { + e.span_label(arm_span, "in this macro arm"); + } + } else if parser.sess.source_map().is_imported(parser.token.span) { + e.span_label(site_span, "in this macro invocation"); + } + match kind { + // Try a statement if an expression is wanted but failed and suggest adding `;` to call. + AstFragmentKind::Expr => match parse_ast_fragment(orig_parser, AstFragmentKind::Stmts) { + Err(err) => err.cancel(), + Ok(_) => { + e.note( + "the macro call doesn't expand to an expression, but it can expand to a statement", + ); + e.span_suggestion_verbose( + site_span.shrink_to_hi(), + "add `;` to interpret the expansion as a statement", + ";", + Applicability::MaybeIncorrect, + ); + } + }, + _ => annotate_err_with_kind(&mut e, kind, site_span), + }; + e.emit(); +} + +impl<'a> ParserAnyMacro<'a> { + pub(crate) fn make(mut self: Box<ParserAnyMacro<'a>>, kind: AstFragmentKind) -> AstFragment { + let ParserAnyMacro { + site_span, + macro_ident, + ref mut parser, + lint_node_id, + arm_span, + is_trailing_mac, + is_local, + } = *self; + let snapshot = &mut parser.create_snapshot_for_diagnostic(); + let fragment = match parse_ast_fragment(parser, kind) { + Ok(f) => f, + Err(err) => { + emit_frag_parse_err(err, parser, snapshot, site_span, arm_span, kind); + return kind.dummy(site_span); + } + }; + + // We allow semicolons at the end of expressions -- e.g., the semicolon in + // `macro_rules! m { () => { panic!(); } }` isn't parsed by `.parse_expr()`, + // but `m!()` is allowed in expression positions (cf. issue #34706). + if kind == AstFragmentKind::Expr && parser.token == token::Semi { + if is_local { + parser.sess.buffer_lint_with_diagnostic( + SEMICOLON_IN_EXPRESSIONS_FROM_MACROS, + parser.token.span, + lint_node_id, + "trailing semicolon in macro used in expression position", + BuiltinLintDiagnostics::TrailingMacro(is_trailing_mac, macro_ident), + ); + } + parser.bump(); + } + + // Make sure we don't have any tokens left to parse so we don't silently drop anything. + let path = ast::Path::from_ident(macro_ident.with_span_pos(site_span)); + ensure_complete_parse(parser, &path, kind.name(), site_span); + fragment + } +} + +struct MacroRulesMacroExpander { + node_id: NodeId, + name: Ident, + span: Span, + transparency: Transparency, + lhses: Vec<Vec<MatcherLoc>>, + rhses: Vec<mbe::TokenTree>, + valid: bool, +} + +impl TTMacroExpander for MacroRulesMacroExpander { + fn expand<'cx>( + &self, + cx: &'cx mut ExtCtxt<'_>, + sp: Span, + input: TokenStream, + ) -> Box<dyn MacResult + 'cx> { + if !self.valid { + return DummyResult::any(sp); + } + expand_macro( + cx, + sp, + self.span, + self.node_id, + self.name, + self.transparency, + input, + &self.lhses, + &self.rhses, + ) + } +} + +fn macro_rules_dummy_expander<'cx>( + _: &'cx mut ExtCtxt<'_>, + span: Span, + _: TokenStream, +) -> Box<dyn MacResult + 'cx> { + DummyResult::any(span) +} + +fn trace_macros_note(cx_expansions: &mut FxIndexMap<Span, Vec<String>>, sp: Span, message: String) { + let sp = sp.macro_backtrace().last().map_or(sp, |trace| trace.call_site); + cx_expansions.entry(sp).or_default().push(message); +} + +/// Expands the rules based macro defined by `lhses` and `rhses` for a given +/// input `arg`. +fn expand_macro<'cx>( + cx: &'cx mut ExtCtxt<'_>, + sp: Span, + def_span: Span, + node_id: NodeId, + name: Ident, + transparency: Transparency, + arg: TokenStream, + lhses: &[Vec<MatcherLoc>], + rhses: &[mbe::TokenTree], +) -> Box<dyn MacResult + 'cx> { + let sess = &cx.sess.parse_sess; + // Macros defined in the current crate have a real node id, + // whereas macros from an external crate have a dummy id. + let is_local = node_id != DUMMY_NODE_ID; + + if cx.trace_macros() { + let msg = format!("expanding `{}! {{ {} }}`", name, pprust::tts_to_string(&arg)); + trace_macros_note(&mut cx.expansions, sp, msg); + } + + // Which arm's failure should we report? (the one furthest along) + let mut best_failure: Option<(Token, &str)> = None; + + // We create a base parser that can be used for the "black box" parts. + // Every iteration needs a fresh copy of that parser. However, the parser + // is not mutated on many of the iterations, particularly when dealing with + // macros like this: + // + // macro_rules! foo { + // ("a") => (A); + // ("b") => (B); + // ("c") => (C); + // // ... etc. (maybe hundreds more) + // } + // + // as seen in the `html5ever` benchmark. We use a `Cow` so that the base + // parser is only cloned when necessary (upon mutation). Furthermore, we + // reinitialize the `Cow` with the base parser at the start of every + // iteration, so that any mutated parsers are not reused. This is all quite + // hacky, but speeds up the `html5ever` benchmark significantly. (Issue + // 68836 suggests a more comprehensive but more complex change to deal with + // this situation.) + let parser = parser_from_cx(sess, arg.clone()); + + // Try each arm's matchers. + let mut tt_parser = TtParser::new(name); + for (i, lhs) in lhses.iter().enumerate() { + // Take a snapshot of the state of pre-expansion gating at this point. + // This is used so that if a matcher is not `Success(..)`ful, + // then the spans which became gated when parsing the unsuccessful matcher + // are not recorded. On the first `Success(..)`ful matcher, the spans are merged. + let mut gated_spans_snapshot = mem::take(&mut *sess.gated_spans.spans.borrow_mut()); + + match tt_parser.parse_tt(&mut Cow::Borrowed(&parser), lhs) { + Success(named_matches) => { + // The matcher was `Success(..)`ful. + // Merge the gated spans from parsing the matcher with the pre-existing ones. + sess.gated_spans.merge(gated_spans_snapshot); + + let (rhs, rhs_span): (&mbe::Delimited, DelimSpan) = match &rhses[i] { + mbe::TokenTree::Delimited(span, delimited) => (&delimited, *span), + _ => cx.span_bug(sp, "malformed macro rhs"), + }; + let arm_span = rhses[i].span(); + + let rhs_spans = rhs.tts.iter().map(|t| t.span()).collect::<Vec<_>>(); + // rhs has holes ( `$id` and `$(...)` that need filled) + let mut tts = match transcribe(cx, &named_matches, &rhs, rhs_span, transparency) { + Ok(tts) => tts, + Err(mut err) => { + err.emit(); + return DummyResult::any(arm_span); + } + }; + + // Replace all the tokens for the corresponding positions in the macro, to maintain + // proper positions in error reporting, while maintaining the macro_backtrace. + if rhs_spans.len() == tts.len() { + tts = tts.map_enumerated(|i, tt| { + let mut tt = tt.clone(); + let mut sp = rhs_spans[i]; + sp = sp.with_ctxt(tt.span().ctxt()); + tt.set_span(sp); + tt + }); + } + + if cx.trace_macros() { + let msg = format!("to `{}`", pprust::tts_to_string(&tts)); + trace_macros_note(&mut cx.expansions, sp, msg); + } + + let mut p = Parser::new(sess, tts, false, None); + p.last_type_ascription = cx.current_expansion.prior_type_ascription; + + if is_local { + cx.resolver.record_macro_rule_usage(node_id, i); + } + + // Let the context choose how to interpret the result. + // Weird, but useful for X-macros. + return Box::new(ParserAnyMacro { + parser: p, + + // Pass along the original expansion site and the name of the macro + // so we can print a useful error message if the parse of the expanded + // macro leaves unparsed tokens. + site_span: sp, + macro_ident: name, + lint_node_id: cx.current_expansion.lint_node_id, + is_trailing_mac: cx.current_expansion.is_trailing_mac, + arm_span, + is_local, + }); + } + Failure(token, msg) => match best_failure { + Some((ref best_token, _)) if best_token.span.lo() >= token.span.lo() => {} + _ => best_failure = Some((token, msg)), + }, + Error(err_sp, ref msg) => { + let span = err_sp.substitute_dummy(sp); + cx.struct_span_err(span, &msg).emit(); + return DummyResult::any(span); + } + ErrorReported => return DummyResult::any(sp), + } + + // The matcher was not `Success(..)`ful. + // Restore to the state before snapshotting and maybe try again. + mem::swap(&mut gated_spans_snapshot, &mut sess.gated_spans.spans.borrow_mut()); + } + drop(parser); + + let (token, label) = best_failure.expect("ran no matchers"); + let span = token.span.substitute_dummy(sp); + let mut err = cx.struct_span_err(span, &parse_failure_msg(&token)); + err.span_label(span, label); + if !def_span.is_dummy() && !cx.source_map().is_imported(def_span) { + err.span_label(cx.source_map().guess_head_span(def_span), "when calling this macro"); + } + annotate_doc_comment(&mut err, sess.source_map(), span); + // Check whether there's a missing comma in this macro call, like `println!("{}" a);` + if let Some((arg, comma_span)) = arg.add_comma() { + for lhs in lhses { + let parser = parser_from_cx(sess, arg.clone()); + if let Success(_) = tt_parser.parse_tt(&mut Cow::Borrowed(&parser), lhs) { + if comma_span.is_dummy() { + err.note("you might be missing a comma"); + } else { + err.span_suggestion_short( + comma_span, + "missing comma here", + ", ", + Applicability::MachineApplicable, + ); + } + } + } + } + err.emit(); + cx.trace_macros_diag(); + DummyResult::any(sp) +} + +// Note that macro-by-example's input is also matched against a token tree: +// $( $lhs:tt => $rhs:tt );+ +// +// Holy self-referential! + +/// Converts a macro item into a syntax extension. +pub fn compile_declarative_macro( + sess: &Session, + features: &Features, + def: &ast::Item, + edition: Edition, +) -> (SyntaxExtension, Vec<(usize, Span)>) { + debug!("compile_declarative_macro: {:?}", def); + let mk_syn_ext = |expander| { + SyntaxExtension::new( + sess, + SyntaxExtensionKind::LegacyBang(expander), + def.span, + Vec::new(), + edition, + def.ident.name, + &def.attrs, + ) + }; + let dummy_syn_ext = || (mk_syn_ext(Box::new(macro_rules_dummy_expander)), Vec::new()); + + let diag = &sess.parse_sess.span_diagnostic; + let lhs_nm = Ident::new(sym::lhs, def.span); + let rhs_nm = Ident::new(sym::rhs, def.span); + let tt_spec = Some(NonterminalKind::TT); + + // Parse the macro_rules! invocation + let (macro_rules, body) = match &def.kind { + ast::ItemKind::MacroDef(def) => (def.macro_rules, def.body.inner_tokens()), + _ => unreachable!(), + }; + + // The pattern that macro_rules matches. + // The grammar for macro_rules! is: + // $( $lhs:tt => $rhs:tt );+ + // ...quasiquoting this would be nice. + // These spans won't matter, anyways + let argument_gram = vec![ + mbe::TokenTree::Sequence( + DelimSpan::dummy(), + mbe::SequenceRepetition { + tts: vec![ + mbe::TokenTree::MetaVarDecl(def.span, lhs_nm, tt_spec), + mbe::TokenTree::token(token::FatArrow, def.span), + mbe::TokenTree::MetaVarDecl(def.span, rhs_nm, tt_spec), + ], + separator: Some(Token::new( + if macro_rules { token::Semi } else { token::Comma }, + def.span, + )), + kleene: mbe::KleeneToken::new(mbe::KleeneOp::OneOrMore, def.span), + num_captures: 2, + }, + ), + // to phase into semicolon-termination instead of semicolon-separation + mbe::TokenTree::Sequence( + DelimSpan::dummy(), + mbe::SequenceRepetition { + tts: vec![mbe::TokenTree::token( + if macro_rules { token::Semi } else { token::Comma }, + def.span, + )], + separator: None, + kleene: mbe::KleeneToken::new(mbe::KleeneOp::ZeroOrMore, def.span), + num_captures: 0, + }, + ), + ]; + // Convert it into `MatcherLoc` form. + let argument_gram = mbe::macro_parser::compute_locs(&argument_gram); + + let parser = Parser::new(&sess.parse_sess, body, true, rustc_parse::MACRO_ARGUMENTS); + let mut tt_parser = + TtParser::new(Ident::with_dummy_span(if macro_rules { kw::MacroRules } else { kw::Macro })); + let argument_map = match tt_parser.parse_tt(&mut Cow::Borrowed(&parser), &argument_gram) { + Success(m) => m, + Failure(token, msg) => { + let s = parse_failure_msg(&token); + let sp = token.span.substitute_dummy(def.span); + let mut err = sess.parse_sess.span_diagnostic.struct_span_err(sp, &s); + err.span_label(sp, msg); + annotate_doc_comment(&mut err, sess.source_map(), sp); + err.emit(); + return dummy_syn_ext(); + } + Error(sp, msg) => { + sess.parse_sess + .span_diagnostic + .struct_span_err(sp.substitute_dummy(def.span), &msg) + .emit(); + return dummy_syn_ext(); + } + ErrorReported => { + return dummy_syn_ext(); + } + }; + + let mut valid = true; + + // Extract the arguments: + let lhses = match argument_map[&MacroRulesNormalizedIdent::new(lhs_nm)] { + MatchedSeq(ref s) => s + .iter() + .map(|m| { + if let MatchedTokenTree(ref tt) = *m { + let tt = mbe::quoted::parse( + TokenStream::new(vec![tt.clone()]), + true, + &sess.parse_sess, + def.id, + features, + edition, + ) + .pop() + .unwrap(); + valid &= check_lhs_nt_follows(&sess.parse_sess, &def, &tt); + return tt; + } + sess.parse_sess.span_diagnostic.span_bug(def.span, "wrong-structured lhs") + }) + .collect::<Vec<mbe::TokenTree>>(), + _ => sess.parse_sess.span_diagnostic.span_bug(def.span, "wrong-structured lhs"), + }; + + let rhses = match argument_map[&MacroRulesNormalizedIdent::new(rhs_nm)] { + MatchedSeq(ref s) => s + .iter() + .map(|m| { + if let MatchedTokenTree(ref tt) = *m { + return mbe::quoted::parse( + TokenStream::new(vec![tt.clone()]), + false, + &sess.parse_sess, + def.id, + features, + edition, + ) + .pop() + .unwrap(); + } + sess.parse_sess.span_diagnostic.span_bug(def.span, "wrong-structured lhs") + }) + .collect::<Vec<mbe::TokenTree>>(), + _ => sess.parse_sess.span_diagnostic.span_bug(def.span, "wrong-structured rhs"), + }; + + for rhs in &rhses { + valid &= check_rhs(&sess.parse_sess, rhs); + } + + // don't abort iteration early, so that errors for multiple lhses can be reported + for lhs in &lhses { + valid &= check_lhs_no_empty_seq(&sess.parse_sess, slice::from_ref(lhs)); + } + + valid &= macro_check::check_meta_variables(&sess.parse_sess, def.id, def.span, &lhses, &rhses); + + let (transparency, transparency_error) = attr::find_transparency(&def.attrs, macro_rules); + match transparency_error { + Some(TransparencyError::UnknownTransparency(value, span)) => { + diag.span_err(span, &format!("unknown macro transparency: `{}`", value)); + } + Some(TransparencyError::MultipleTransparencyAttrs(old_span, new_span)) => { + diag.span_err(vec![old_span, new_span], "multiple macro transparency attributes"); + } + None => {} + } + + // Compute the spans of the macro rules for unused rule linting. + // To avoid warning noise, only consider the rules of this + // macro for the lint, if all rules are valid. + // Also, we are only interested in non-foreign macros. + let rule_spans = if valid && def.id != DUMMY_NODE_ID { + lhses + .iter() + .zip(rhses.iter()) + .enumerate() + // If the rhs contains an invocation like compile_error!, + // don't consider the rule for the unused rule lint. + .filter(|(_idx, (_lhs, rhs))| !has_compile_error_macro(rhs)) + // We only take the span of the lhs here, + // so that the spans of created warnings are smaller. + .map(|(idx, (lhs, _rhs))| (idx, lhs.span())) + .collect::<Vec<_>>() + } else { + Vec::new() + }; + + // Convert the lhses into `MatcherLoc` form, which is better for doing the + // actual matching. Unless the matcher is invalid. + let lhses = if valid { + lhses + .iter() + .map(|lhs| { + // Ignore the delimiters around the matcher. + match lhs { + mbe::TokenTree::Delimited(_, delimited) => { + mbe::macro_parser::compute_locs(&delimited.tts) + } + _ => sess.parse_sess.span_diagnostic.span_bug(def.span, "malformed macro lhs"), + } + }) + .collect() + } else { + vec![] + }; + + let expander = Box::new(MacroRulesMacroExpander { + name: def.ident, + span: def.span, + node_id: def.id, + transparency, + lhses, + rhses, + valid, + }); + (mk_syn_ext(expander), rule_spans) +} + +#[derive(SessionSubdiagnostic)] +enum ExplainDocComment { + #[label(expand::explain_doc_comment_inner)] + Inner { + #[primary_span] + span: Span, + }, + #[label(expand::explain_doc_comment_outer)] + Outer { + #[primary_span] + span: Span, + }, +} + +fn annotate_doc_comment( + err: &mut DiagnosticBuilder<'_, ErrorGuaranteed>, + sm: &SourceMap, + span: Span, +) { + if let Ok(src) = sm.span_to_snippet(span) { + if src.starts_with("///") || src.starts_with("/**") { + err.subdiagnostic(ExplainDocComment::Outer { span }); + } else if src.starts_with("//!") || src.starts_with("/*!") { + err.subdiagnostic(ExplainDocComment::Inner { span }); + } + } +} + +fn check_lhs_nt_follows(sess: &ParseSess, def: &ast::Item, lhs: &mbe::TokenTree) -> bool { + // lhs is going to be like TokenTree::Delimited(...), where the + // entire lhs is those tts. Or, it can be a "bare sequence", not wrapped in parens. + if let mbe::TokenTree::Delimited(_, delimited) = lhs { + check_matcher(sess, def, &delimited.tts) + } else { + let msg = "invalid macro matcher; matchers must be contained in balanced delimiters"; + sess.span_diagnostic.span_err(lhs.span(), msg); + false + } + // we don't abort on errors on rejection, the driver will do that for us + // after parsing/expansion. we can report every error in every macro this way. +} + +/// Checks that the lhs contains no repetition which could match an empty token +/// tree, because then the matcher would hang indefinitely. +fn check_lhs_no_empty_seq(sess: &ParseSess, tts: &[mbe::TokenTree]) -> bool { + use mbe::TokenTree; + for tt in tts { + match *tt { + TokenTree::Token(..) + | TokenTree::MetaVar(..) + | TokenTree::MetaVarDecl(..) + | TokenTree::MetaVarExpr(..) => (), + TokenTree::Delimited(_, ref del) => { + if !check_lhs_no_empty_seq(sess, &del.tts) { + return false; + } + } + TokenTree::Sequence(span, ref seq) => { + if seq.separator.is_none() + && seq.tts.iter().all(|seq_tt| match *seq_tt { + TokenTree::MetaVarDecl(_, _, Some(NonterminalKind::Vis)) => true, + TokenTree::Sequence(_, ref sub_seq) => { + sub_seq.kleene.op == mbe::KleeneOp::ZeroOrMore + || sub_seq.kleene.op == mbe::KleeneOp::ZeroOrOne + } + _ => false, + }) + { + let sp = span.entire(); + sess.span_diagnostic.span_err(sp, "repetition matches empty token tree"); + return false; + } + if !check_lhs_no_empty_seq(sess, &seq.tts) { + return false; + } + } + } + } + + true +} + +fn check_rhs(sess: &ParseSess, rhs: &mbe::TokenTree) -> bool { + match *rhs { + mbe::TokenTree::Delimited(..) => return true, + _ => { + sess.span_diagnostic.span_err(rhs.span(), "macro rhs must be delimited"); + } + } + false +} + +fn check_matcher(sess: &ParseSess, def: &ast::Item, matcher: &[mbe::TokenTree]) -> bool { + let first_sets = FirstSets::new(matcher); + let empty_suffix = TokenSet::empty(); + let err = sess.span_diagnostic.err_count(); + check_matcher_core(sess, def, &first_sets, matcher, &empty_suffix); + err == sess.span_diagnostic.err_count() +} + +fn has_compile_error_macro(rhs: &mbe::TokenTree) -> bool { + match rhs { + mbe::TokenTree::Delimited(_sp, d) => { + let has_compile_error = d.tts.array_windows::<3>().any(|[ident, bang, args]| { + if let mbe::TokenTree::Token(ident) = ident && + let TokenKind::Ident(ident, _) = ident.kind && + ident == sym::compile_error && + let mbe::TokenTree::Token(bang) = bang && + let TokenKind::Not = bang.kind && + let mbe::TokenTree::Delimited(_, del) = args && + del.delim != Delimiter::Invisible + { + true + } else { + false + } + }); + if has_compile_error { true } else { d.tts.iter().any(has_compile_error_macro) } + } + _ => false, + } +} + +// `The FirstSets` for a matcher is a mapping from subsequences in the +// matcher to the FIRST set for that subsequence. +// +// This mapping is partially precomputed via a backwards scan over the +// token trees of the matcher, which provides a mapping from each +// repetition sequence to its *first* set. +// +// (Hypothetically, sequences should be uniquely identifiable via their +// spans, though perhaps that is false, e.g., for macro-generated macros +// that do not try to inject artificial span information. My plan is +// to try to catch such cases ahead of time and not include them in +// the precomputed mapping.) +struct FirstSets<'tt> { + // this maps each TokenTree::Sequence `$(tt ...) SEP OP` that is uniquely identified by its + // span in the original matcher to the First set for the inner sequence `tt ...`. + // + // If two sequences have the same span in a matcher, then map that + // span to None (invalidating the mapping here and forcing the code to + // use a slow path). + first: FxHashMap<Span, Option<TokenSet<'tt>>>, +} + +impl<'tt> FirstSets<'tt> { + fn new(tts: &'tt [mbe::TokenTree]) -> FirstSets<'tt> { + use mbe::TokenTree; + + let mut sets = FirstSets { first: FxHashMap::default() }; + build_recur(&mut sets, tts); + return sets; + + // walks backward over `tts`, returning the FIRST for `tts` + // and updating `sets` at the same time for all sequence + // substructure we find within `tts`. + fn build_recur<'tt>(sets: &mut FirstSets<'tt>, tts: &'tt [TokenTree]) -> TokenSet<'tt> { + let mut first = TokenSet::empty(); + for tt in tts.iter().rev() { + match *tt { + TokenTree::Token(..) + | TokenTree::MetaVar(..) + | TokenTree::MetaVarDecl(..) + | TokenTree::MetaVarExpr(..) => { + first.replace_with(TtHandle::TtRef(tt)); + } + TokenTree::Delimited(span, ref delimited) => { + build_recur(sets, &delimited.tts); + first.replace_with(TtHandle::from_token_kind( + token::OpenDelim(delimited.delim), + span.open, + )); + } + TokenTree::Sequence(sp, ref seq_rep) => { + let subfirst = build_recur(sets, &seq_rep.tts); + + match sets.first.entry(sp.entire()) { + Entry::Vacant(vac) => { + vac.insert(Some(subfirst.clone())); + } + Entry::Occupied(mut occ) => { + // if there is already an entry, then a span must have collided. + // This should not happen with typical macro_rules macros, + // but syntax extensions need not maintain distinct spans, + // so distinct syntax trees can be assigned the same span. + // In such a case, the map cannot be trusted; so mark this + // entry as unusable. + occ.insert(None); + } + } + + // If the sequence contents can be empty, then the first + // token could be the separator token itself. + + if let (Some(sep), true) = (&seq_rep.separator, subfirst.maybe_empty) { + first.add_one_maybe(TtHandle::from_token(sep.clone())); + } + + // Reverse scan: Sequence comes before `first`. + if subfirst.maybe_empty + || seq_rep.kleene.op == mbe::KleeneOp::ZeroOrMore + || seq_rep.kleene.op == mbe::KleeneOp::ZeroOrOne + { + // If sequence is potentially empty, then + // union them (preserving first emptiness). + first.add_all(&TokenSet { maybe_empty: true, ..subfirst }); + } else { + // Otherwise, sequence guaranteed + // non-empty; replace first. + first = subfirst; + } + } + } + } + + first + } + } + + // walks forward over `tts` until all potential FIRST tokens are + // identified. + fn first(&self, tts: &'tt [mbe::TokenTree]) -> TokenSet<'tt> { + use mbe::TokenTree; + + let mut first = TokenSet::empty(); + for tt in tts.iter() { + assert!(first.maybe_empty); + match *tt { + TokenTree::Token(..) + | TokenTree::MetaVar(..) + | TokenTree::MetaVarDecl(..) + | TokenTree::MetaVarExpr(..) => { + first.add_one(TtHandle::TtRef(tt)); + return first; + } + TokenTree::Delimited(span, ref delimited) => { + first.add_one(TtHandle::from_token_kind( + token::OpenDelim(delimited.delim), + span.open, + )); + return first; + } + TokenTree::Sequence(sp, ref seq_rep) => { + let subfirst_owned; + let subfirst = match self.first.get(&sp.entire()) { + Some(&Some(ref subfirst)) => subfirst, + Some(&None) => { + subfirst_owned = self.first(&seq_rep.tts); + &subfirst_owned + } + None => { + panic!("We missed a sequence during FirstSets construction"); + } + }; + + // If the sequence contents can be empty, then the first + // token could be the separator token itself. + if let (Some(sep), true) = (&seq_rep.separator, subfirst.maybe_empty) { + first.add_one_maybe(TtHandle::from_token(sep.clone())); + } + + assert!(first.maybe_empty); + first.add_all(subfirst); + if subfirst.maybe_empty + || seq_rep.kleene.op == mbe::KleeneOp::ZeroOrMore + || seq_rep.kleene.op == mbe::KleeneOp::ZeroOrOne + { + // Continue scanning for more first + // tokens, but also make sure we + // restore empty-tracking state. + first.maybe_empty = true; + continue; + } else { + return first; + } + } + } + } + + // we only exit the loop if `tts` was empty or if every + // element of `tts` matches the empty sequence. + assert!(first.maybe_empty); + first + } +} + +// Most `mbe::TokenTree`s are pre-existing in the matcher, but some are defined +// implicitly, such as opening/closing delimiters and sequence repetition ops. +// This type encapsulates both kinds. It implements `Clone` while avoiding the +// need for `mbe::TokenTree` to implement `Clone`. +#[derive(Debug)] +enum TtHandle<'tt> { + /// This is used in most cases. + TtRef(&'tt mbe::TokenTree), + + /// This is only used for implicit token trees. The `mbe::TokenTree` *must* + /// be `mbe::TokenTree::Token`. No other variants are allowed. We store an + /// `mbe::TokenTree` rather than a `Token` so that `get()` can return a + /// `&mbe::TokenTree`. + Token(mbe::TokenTree), +} + +impl<'tt> TtHandle<'tt> { + fn from_token(tok: Token) -> Self { + TtHandle::Token(mbe::TokenTree::Token(tok)) + } + + fn from_token_kind(kind: TokenKind, span: Span) -> Self { + TtHandle::from_token(Token::new(kind, span)) + } + + // Get a reference to a token tree. + fn get(&'tt self) -> &'tt mbe::TokenTree { + match self { + TtHandle::TtRef(tt) => tt, + TtHandle::Token(token_tt) => &token_tt, + } + } +} + +impl<'tt> PartialEq for TtHandle<'tt> { + fn eq(&self, other: &TtHandle<'tt>) -> bool { + self.get() == other.get() + } +} + +impl<'tt> Clone for TtHandle<'tt> { + fn clone(&self) -> Self { + match self { + TtHandle::TtRef(tt) => TtHandle::TtRef(tt), + + // This variant *must* contain a `mbe::TokenTree::Token`, and not + // any other variant of `mbe::TokenTree`. + TtHandle::Token(mbe::TokenTree::Token(tok)) => { + TtHandle::Token(mbe::TokenTree::Token(tok.clone())) + } + + _ => unreachable!(), + } + } +} + +// A set of `mbe::TokenTree`s, which may include `TokenTree::Match`s +// (for macro-by-example syntactic variables). It also carries the +// `maybe_empty` flag; that is true if and only if the matcher can +// match an empty token sequence. +// +// The First set is computed on submatchers like `$($a:expr b),* $(c)* d`, +// which has corresponding FIRST = {$a:expr, c, d}. +// Likewise, `$($a:expr b),* $(c)+ d` has FIRST = {$a:expr, c}. +// +// (Notably, we must allow for *-op to occur zero times.) +#[derive(Clone, Debug)] +struct TokenSet<'tt> { + tokens: Vec<TtHandle<'tt>>, + maybe_empty: bool, +} + +impl<'tt> TokenSet<'tt> { + // Returns a set for the empty sequence. + fn empty() -> Self { + TokenSet { tokens: Vec::new(), maybe_empty: true } + } + + // Returns the set `{ tok }` for the single-token (and thus + // non-empty) sequence [tok]. + fn singleton(tt: TtHandle<'tt>) -> Self { + TokenSet { tokens: vec![tt], maybe_empty: false } + } + + // Changes self to be the set `{ tok }`. + // Since `tok` is always present, marks self as non-empty. + fn replace_with(&mut self, tt: TtHandle<'tt>) { + self.tokens.clear(); + self.tokens.push(tt); + self.maybe_empty = false; + } + + // Changes self to be the empty set `{}`; meant for use when + // the particular token does not matter, but we want to + // record that it occurs. + fn replace_with_irrelevant(&mut self) { + self.tokens.clear(); + self.maybe_empty = false; + } + + // Adds `tok` to the set for `self`, marking sequence as non-empy. + fn add_one(&mut self, tt: TtHandle<'tt>) { + if !self.tokens.contains(&tt) { + self.tokens.push(tt); + } + self.maybe_empty = false; + } + + // Adds `tok` to the set for `self`. (Leaves `maybe_empty` flag alone.) + fn add_one_maybe(&mut self, tt: TtHandle<'tt>) { + if !self.tokens.contains(&tt) { + self.tokens.push(tt); + } + } + + // Adds all elements of `other` to this. + // + // (Since this is a set, we filter out duplicates.) + // + // If `other` is potentially empty, then preserves the previous + // setting of the empty flag of `self`. If `other` is guaranteed + // non-empty, then `self` is marked non-empty. + fn add_all(&mut self, other: &Self) { + for tt in &other.tokens { + if !self.tokens.contains(tt) { + self.tokens.push(tt.clone()); + } + } + if !other.maybe_empty { + self.maybe_empty = false; + } + } +} + +// Checks that `matcher` is internally consistent and that it +// can legally be followed by a token `N`, for all `N` in `follow`. +// (If `follow` is empty, then it imposes no constraint on +// the `matcher`.) +// +// Returns the set of NT tokens that could possibly come last in +// `matcher`. (If `matcher` matches the empty sequence, then +// `maybe_empty` will be set to true.) +// +// Requires that `first_sets` is pre-computed for `matcher`; +// see `FirstSets::new`. +fn check_matcher_core<'tt>( + sess: &ParseSess, + def: &ast::Item, + first_sets: &FirstSets<'tt>, + matcher: &'tt [mbe::TokenTree], + follow: &TokenSet<'tt>, +) -> TokenSet<'tt> { + use mbe::TokenTree; + + let mut last = TokenSet::empty(); + + // 2. For each token and suffix [T, SUFFIX] in M: + // ensure that T can be followed by SUFFIX, and if SUFFIX may be empty, + // then ensure T can also be followed by any element of FOLLOW. + 'each_token: for i in 0..matcher.len() { + let token = &matcher[i]; + let suffix = &matcher[i + 1..]; + + let build_suffix_first = || { + let mut s = first_sets.first(suffix); + if s.maybe_empty { + s.add_all(follow); + } + s + }; + + // (we build `suffix_first` on demand below; you can tell + // which cases are supposed to fall through by looking for the + // initialization of this variable.) + let suffix_first; + + // First, update `last` so that it corresponds to the set + // of NT tokens that might end the sequence `... token`. + match *token { + TokenTree::Token(..) + | TokenTree::MetaVar(..) + | TokenTree::MetaVarDecl(..) + | TokenTree::MetaVarExpr(..) => { + if token_can_be_followed_by_any(token) { + // don't need to track tokens that work with any, + last.replace_with_irrelevant(); + // ... and don't need to check tokens that can be + // followed by anything against SUFFIX. + continue 'each_token; + } else { + last.replace_with(TtHandle::TtRef(token)); + suffix_first = build_suffix_first(); + } + } + TokenTree::Delimited(span, ref d) => { + let my_suffix = TokenSet::singleton(TtHandle::from_token_kind( + token::CloseDelim(d.delim), + span.close, + )); + check_matcher_core(sess, def, first_sets, &d.tts, &my_suffix); + // don't track non NT tokens + last.replace_with_irrelevant(); + + // also, we don't need to check delimited sequences + // against SUFFIX + continue 'each_token; + } + TokenTree::Sequence(_, ref seq_rep) => { + suffix_first = build_suffix_first(); + // The trick here: when we check the interior, we want + // to include the separator (if any) as a potential + // (but not guaranteed) element of FOLLOW. So in that + // case, we make a temp copy of suffix and stuff + // delimiter in there. + // + // FIXME: Should I first scan suffix_first to see if + // delimiter is already in it before I go through the + // work of cloning it? But then again, this way I may + // get a "tighter" span? + let mut new; + let my_suffix = if let Some(sep) = &seq_rep.separator { + new = suffix_first.clone(); + new.add_one_maybe(TtHandle::from_token(sep.clone())); + &new + } else { + &suffix_first + }; + + // At this point, `suffix_first` is built, and + // `my_suffix` is some TokenSet that we can use + // for checking the interior of `seq_rep`. + let next = check_matcher_core(sess, def, first_sets, &seq_rep.tts, my_suffix); + if next.maybe_empty { + last.add_all(&next); + } else { + last = next; + } + + // the recursive call to check_matcher_core already ran the 'each_last + // check below, so we can just keep going forward here. + continue 'each_token; + } + } + + // (`suffix_first` guaranteed initialized once reaching here.) + + // Now `last` holds the complete set of NT tokens that could + // end the sequence before SUFFIX. Check that every one works with `suffix`. + for tt in &last.tokens { + if let &TokenTree::MetaVarDecl(span, name, Some(kind)) = tt.get() { + for next_token in &suffix_first.tokens { + let next_token = next_token.get(); + + // Check if the old pat is used and the next token is `|` + // to warn about incompatibility with Rust 2021. + // We only emit this lint if we're parsing the original + // definition of this macro_rules, not while (re)parsing + // the macro when compiling another crate that is using the + // macro. (See #86567.) + // Macros defined in the current crate have a real node id, + // whereas macros from an external crate have a dummy id. + if def.id != DUMMY_NODE_ID + && matches!(kind, NonterminalKind::PatParam { inferred: true }) + && matches!(next_token, TokenTree::Token(token) if token.kind == BinOp(token::BinOpToken::Or)) + { + // It is suggestion to use pat_param, for example: $x:pat -> $x:pat_param. + let suggestion = quoted_tt_to_string(&TokenTree::MetaVarDecl( + span, + name, + Some(NonterminalKind::PatParam { inferred: false }), + )); + sess.buffer_lint_with_diagnostic( + &RUST_2021_INCOMPATIBLE_OR_PATTERNS, + span, + ast::CRATE_NODE_ID, + "the meaning of the `pat` fragment specifier is changing in Rust 2021, which may affect this macro", + BuiltinLintDiagnostics::OrPatternsBackCompat(span, suggestion), + ); + } + match is_in_follow(next_token, kind) { + IsInFollow::Yes => {} + IsInFollow::No(possible) => { + let may_be = if last.tokens.len() == 1 && suffix_first.tokens.len() == 1 + { + "is" + } else { + "may be" + }; + + let sp = next_token.span(); + let mut err = sess.span_diagnostic.struct_span_err( + sp, + &format!( + "`${name}:{frag}` {may_be} followed by `{next}`, which \ + is not allowed for `{frag}` fragments", + name = name, + frag = kind, + next = quoted_tt_to_string(next_token), + may_be = may_be + ), + ); + err.span_label(sp, format!("not allowed after `{}` fragments", kind)); + + if kind == NonterminalKind::PatWithOr + && sess.edition.rust_2021() + && next_token.is_token(&BinOp(token::BinOpToken::Or)) + { + let suggestion = quoted_tt_to_string(&TokenTree::MetaVarDecl( + span, + name, + Some(NonterminalKind::PatParam { inferred: false }), + )); + err.span_suggestion( + span, + "try a `pat_param` fragment specifier instead", + suggestion, + Applicability::MaybeIncorrect, + ); + } + + let msg = "allowed there are: "; + match possible { + &[] => {} + &[t] => { + err.note(&format!( + "only {} is allowed after `{}` fragments", + t, kind, + )); + } + ts => { + err.note(&format!( + "{}{} or {}", + msg, + ts[..ts.len() - 1] + .iter() + .copied() + .collect::<Vec<_>>() + .join(", "), + ts[ts.len() - 1], + )); + } + } + err.emit(); + } + } + } + } + } + } + last +} + +fn token_can_be_followed_by_any(tok: &mbe::TokenTree) -> bool { + if let mbe::TokenTree::MetaVarDecl(_, _, Some(kind)) = *tok { + frag_can_be_followed_by_any(kind) + } else { + // (Non NT's can always be followed by anything in matchers.) + true + } +} + +/// Returns `true` if a fragment of type `frag` can be followed by any sort of +/// token. We use this (among other things) as a useful approximation +/// for when `frag` can be followed by a repetition like `$(...)*` or +/// `$(...)+`. In general, these can be a bit tricky to reason about, +/// so we adopt a conservative position that says that any fragment +/// specifier which consumes at most one token tree can be followed by +/// a fragment specifier (indeed, these fragments can be followed by +/// ANYTHING without fear of future compatibility hazards). +fn frag_can_be_followed_by_any(kind: NonterminalKind) -> bool { + matches!( + kind, + NonterminalKind::Item // always terminated by `}` or `;` + | NonterminalKind::Block // exactly one token tree + | NonterminalKind::Ident // exactly one token tree + | NonterminalKind::Literal // exactly one token tree + | NonterminalKind::Meta // exactly one token tree + | NonterminalKind::Lifetime // exactly one token tree + | NonterminalKind::TT // exactly one token tree + ) +} + +enum IsInFollow { + Yes, + No(&'static [&'static str]), +} + +/// Returns `true` if `frag` can legally be followed by the token `tok`. For +/// fragments that can consume an unbounded number of tokens, `tok` +/// must be within a well-defined follow set. This is intended to +/// guarantee future compatibility: for example, without this rule, if +/// we expanded `expr` to include a new binary operator, we might +/// break macros that were relying on that binary operator as a +/// separator. +// when changing this do not forget to update doc/book/macros.md! +fn is_in_follow(tok: &mbe::TokenTree, kind: NonterminalKind) -> IsInFollow { + use mbe::TokenTree; + + if let TokenTree::Token(Token { kind: token::CloseDelim(_), .. }) = *tok { + // closing a token tree can never be matched by any fragment; + // iow, we always require that `(` and `)` match, etc. + IsInFollow::Yes + } else { + match kind { + NonterminalKind::Item => { + // since items *must* be followed by either a `;` or a `}`, we can + // accept anything after them + IsInFollow::Yes + } + NonterminalKind::Block => { + // anything can follow block, the braces provide an easy boundary to + // maintain + IsInFollow::Yes + } + NonterminalKind::Stmt | NonterminalKind::Expr => { + const TOKENS: &[&str] = &["`=>`", "`,`", "`;`"]; + match tok { + TokenTree::Token(token) => match token.kind { + FatArrow | Comma | Semi => IsInFollow::Yes, + _ => IsInFollow::No(TOKENS), + }, + _ => IsInFollow::No(TOKENS), + } + } + NonterminalKind::PatParam { .. } => { + const TOKENS: &[&str] = &["`=>`", "`,`", "`=`", "`|`", "`if`", "`in`"]; + match tok { + TokenTree::Token(token) => match token.kind { + FatArrow | Comma | Eq | BinOp(token::Or) => IsInFollow::Yes, + Ident(name, false) if name == kw::If || name == kw::In => IsInFollow::Yes, + _ => IsInFollow::No(TOKENS), + }, + _ => IsInFollow::No(TOKENS), + } + } + NonterminalKind::PatWithOr { .. } => { + const TOKENS: &[&str] = &["`=>`", "`,`", "`=`", "`if`", "`in`"]; + match tok { + TokenTree::Token(token) => match token.kind { + FatArrow | Comma | Eq => IsInFollow::Yes, + Ident(name, false) if name == kw::If || name == kw::In => IsInFollow::Yes, + _ => IsInFollow::No(TOKENS), + }, + _ => IsInFollow::No(TOKENS), + } + } + NonterminalKind::Path | NonterminalKind::Ty => { + const TOKENS: &[&str] = &[ + "`{`", "`[`", "`=>`", "`,`", "`>`", "`=`", "`:`", "`;`", "`|`", "`as`", + "`where`", + ]; + match tok { + TokenTree::Token(token) => match token.kind { + OpenDelim(Delimiter::Brace) + | OpenDelim(Delimiter::Bracket) + | Comma + | FatArrow + | Colon + | Eq + | Gt + | BinOp(token::Shr) + | Semi + | BinOp(token::Or) => IsInFollow::Yes, + Ident(name, false) if name == kw::As || name == kw::Where => { + IsInFollow::Yes + } + _ => IsInFollow::No(TOKENS), + }, + TokenTree::MetaVarDecl(_, _, Some(NonterminalKind::Block)) => IsInFollow::Yes, + _ => IsInFollow::No(TOKENS), + } + } + NonterminalKind::Ident | NonterminalKind::Lifetime => { + // being a single token, idents and lifetimes are harmless + IsInFollow::Yes + } + NonterminalKind::Literal => { + // literals may be of a single token, or two tokens (negative numbers) + IsInFollow::Yes + } + NonterminalKind::Meta | NonterminalKind::TT => { + // being either a single token or a delimited sequence, tt is + // harmless + IsInFollow::Yes + } + NonterminalKind::Vis => { + // Explicitly disallow `priv`, on the off chance it comes back. + const TOKENS: &[&str] = &["`,`", "an ident", "a type"]; + match tok { + TokenTree::Token(token) => match token.kind { + Comma => IsInFollow::Yes, + Ident(name, is_raw) if is_raw || name != kw::Priv => IsInFollow::Yes, + _ => { + if token.can_begin_type() { + IsInFollow::Yes + } else { + IsInFollow::No(TOKENS) + } + } + }, + TokenTree::MetaVarDecl( + _, + _, + Some(NonterminalKind::Ident | NonterminalKind::Ty | NonterminalKind::Path), + ) => IsInFollow::Yes, + _ => IsInFollow::No(TOKENS), + } + } + } + } +} + +fn quoted_tt_to_string(tt: &mbe::TokenTree) -> String { + match *tt { + mbe::TokenTree::Token(ref token) => pprust::token_to_string(&token).into(), + mbe::TokenTree::MetaVar(_, name) => format!("${}", name), + mbe::TokenTree::MetaVarDecl(_, name, Some(kind)) => format!("${}:{}", name, kind), + mbe::TokenTree::MetaVarDecl(_, name, None) => format!("${}:", name), + _ => panic!( + "{}", + "unexpected mbe::TokenTree::{Sequence or Delimited} \ + in follow set checker" + ), + } +} + +fn parser_from_cx(sess: &ParseSess, tts: TokenStream) -> Parser<'_> { + Parser::new(sess, tts, true, rustc_parse::MACRO_ARGUMENTS) +} + +/// Generates an appropriate parsing failure message. For EOF, this is "unexpected end...". For +/// other tokens, this is "unexpected token...". +fn parse_failure_msg(tok: &Token) -> String { + match tok.kind { + token::Eof => "unexpected end of macro invocation".to_string(), + _ => format!("no rules expected the token `{}`", pprust::token_to_string(tok),), + } +} diff --git a/compiler/rustc_expand/src/mbe/metavar_expr.rs b/compiler/rustc_expand/src/mbe/metavar_expr.rs new file mode 100644 index 000000000..fc808401a --- /dev/null +++ b/compiler/rustc_expand/src/mbe/metavar_expr.rs @@ -0,0 +1,161 @@ +use rustc_ast::token::{self, Delimiter}; +use rustc_ast::tokenstream::{CursorRef, TokenStream, TokenTree}; +use rustc_ast::{LitIntType, LitKind}; +use rustc_ast_pretty::pprust; +use rustc_errors::{Applicability, PResult}; +use rustc_session::parse::ParseSess; +use rustc_span::symbol::Ident; +use rustc_span::Span; + +/// A meta-variable expression, for expansions based on properties of meta-variables. +#[derive(Debug, Clone, PartialEq, Encodable, Decodable)] +pub(crate) enum MetaVarExpr { + /// The number of repetitions of an identifier, optionally limited to a number + /// of outer-most repetition depths. If the depth limit is `None` then the depth is unlimited. + Count(Ident, Option<usize>), + + /// Ignore a meta-variable for repetition without expansion. + Ignore(Ident), + + /// The index of the repetition at a particular depth, where 0 is the inner-most + /// repetition. The `usize` is the depth. + Index(usize), + + /// The length of the repetition at a particular depth, where 0 is the inner-most + /// repetition. The `usize` is the depth. + Length(usize), +} + +impl MetaVarExpr { + /// Attempt to parse a meta-variable expression from a token stream. + pub(crate) fn parse<'sess>( + input: &TokenStream, + outer_span: Span, + sess: &'sess ParseSess, + ) -> PResult<'sess, MetaVarExpr> { + let mut tts = input.trees(); + let ident = parse_ident(&mut tts, sess, outer_span)?; + let Some(TokenTree::Delimited(_, Delimiter::Parenthesis, args)) = tts.next() else { + let msg = "meta-variable expression parameter must be wrapped in parentheses"; + return Err(sess.span_diagnostic.struct_span_err(ident.span, msg)); + }; + check_trailing_token(&mut tts, sess)?; + let mut iter = args.trees(); + let rslt = match &*ident.as_str() { + "count" => parse_count(&mut iter, sess, ident.span)?, + "ignore" => MetaVarExpr::Ignore(parse_ident(&mut iter, sess, ident.span)?), + "index" => MetaVarExpr::Index(parse_depth(&mut iter, sess, ident.span)?), + "length" => MetaVarExpr::Length(parse_depth(&mut iter, sess, ident.span)?), + _ => { + let err_msg = "unrecognized meta-variable expression"; + let mut err = sess.span_diagnostic.struct_span_err(ident.span, err_msg); + err.span_suggestion( + ident.span, + "supported expressions are count, ignore, index and length", + "", + Applicability::MachineApplicable, + ); + return Err(err); + } + }; + check_trailing_token(&mut iter, sess)?; + Ok(rslt) + } + + pub(crate) fn ident(&self) -> Option<Ident> { + match *self { + MetaVarExpr::Count(ident, _) | MetaVarExpr::Ignore(ident) => Some(ident), + MetaVarExpr::Index(..) | MetaVarExpr::Length(..) => None, + } + } +} + +// Checks if there are any remaining tokens. For example, `${ignore(ident ... a b c ...)}` +fn check_trailing_token<'sess>( + iter: &mut CursorRef<'_>, + sess: &'sess ParseSess, +) -> PResult<'sess, ()> { + if let Some(tt) = iter.next() { + let mut diag = sess + .span_diagnostic + .struct_span_err(tt.span(), &format!("unexpected token: {}", pprust::tt_to_string(tt))); + diag.span_note(tt.span(), "meta-variable expression must not have trailing tokens"); + Err(diag) + } else { + Ok(()) + } +} + +/// Parse a meta-variable `count` expression: `count(ident[, depth])` +fn parse_count<'sess>( + iter: &mut CursorRef<'_>, + sess: &'sess ParseSess, + span: Span, +) -> PResult<'sess, MetaVarExpr> { + let ident = parse_ident(iter, sess, span)?; + let depth = if try_eat_comma(iter) { Some(parse_depth(iter, sess, span)?) } else { None }; + Ok(MetaVarExpr::Count(ident, depth)) +} + +/// Parses the depth used by index(depth) and length(depth). +fn parse_depth<'sess>( + iter: &mut CursorRef<'_>, + sess: &'sess ParseSess, + span: Span, +) -> PResult<'sess, usize> { + let Some(tt) = iter.next() else { return Ok(0) }; + let TokenTree::Token(token::Token { + kind: token::TokenKind::Literal(lit), .. + }, _) = tt else { + return Err(sess.span_diagnostic.struct_span_err( + span, + "meta-variable expression depth must be a literal" + )); + }; + if let Ok(lit_kind) = LitKind::from_lit_token(*lit) + && let LitKind::Int(n_u128, LitIntType::Unsuffixed) = lit_kind + && let Ok(n_usize) = usize::try_from(n_u128) + { + Ok(n_usize) + } + else { + let msg = "only unsuffixes integer literals are supported in meta-variable expressions"; + Err(sess.span_diagnostic.struct_span_err(span, msg)) + } +} + +/// Parses an generic ident +fn parse_ident<'sess>( + iter: &mut CursorRef<'_>, + sess: &'sess ParseSess, + span: Span, +) -> PResult<'sess, Ident> { + if let Some(tt) = iter.next() && let TokenTree::Token(token, _) = tt { + if let Some((elem, false)) = token.ident() { + return Ok(elem); + } + let token_str = pprust::token_to_string(token); + let mut err = sess.span_diagnostic.struct_span_err( + span, + &format!("expected identifier, found `{}`", &token_str) + ); + err.span_suggestion( + token.span, + &format!("try removing `{}`", &token_str), + "", + Applicability::MaybeIncorrect, + ); + return Err(err); + } + Err(sess.span_diagnostic.struct_span_err(span, "expected identifier")) +} + +/// Tries to move the iterator forward returning `true` if there is a comma. If not, then the +/// iterator is not modified and the result is `false`. +fn try_eat_comma(iter: &mut CursorRef<'_>) -> bool { + if let Some(TokenTree::Token(token::Token { kind: token::Comma, .. }, _)) = iter.look_ahead(0) { + let _ = iter.next(); + return true; + } + false +} diff --git a/compiler/rustc_expand/src/mbe/quoted.rs b/compiler/rustc_expand/src/mbe/quoted.rs new file mode 100644 index 000000000..ee17d54f6 --- /dev/null +++ b/compiler/rustc_expand/src/mbe/quoted.rs @@ -0,0 +1,366 @@ +use crate::mbe::macro_parser::count_metavar_decls; +use crate::mbe::{Delimited, KleeneOp, KleeneToken, MetaVarExpr, SequenceRepetition, TokenTree}; + +use rustc_ast::token::{self, Delimiter, Token}; +use rustc_ast::{tokenstream, NodeId}; +use rustc_ast_pretty::pprust; +use rustc_feature::Features; +use rustc_session::parse::{feature_err, ParseSess}; +use rustc_span::symbol::{kw, sym, Ident}; + +use rustc_span::edition::Edition; +use rustc_span::{Span, SyntaxContext}; + +const VALID_FRAGMENT_NAMES_MSG: &str = "valid fragment specifiers are \ + `ident`, `block`, `stmt`, `expr`, `pat`, `ty`, `lifetime`, \ + `literal`, `path`, `meta`, `tt`, `item` and `vis`"; + +/// Takes a `tokenstream::TokenStream` and returns a `Vec<self::TokenTree>`. Specifically, this +/// takes a generic `TokenStream`, such as is used in the rest of the compiler, and returns a +/// collection of `TokenTree` for use in parsing a macro. +/// +/// # Parameters +/// +/// - `input`: a token stream to read from, the contents of which we are parsing. +/// - `parsing_patterns`: `parse` can be used to parse either the "patterns" or the "body" of a +/// macro. Both take roughly the same form _except_ that: +/// - In a pattern, metavars are declared with their "matcher" type. For example `$var:expr` or +/// `$id:ident`. In this example, `expr` and `ident` are "matchers". They are not present in the +/// body of a macro rule -- just in the pattern. +/// - Metavariable expressions are only valid in the "body", not the "pattern". +/// - `sess`: the parsing session. Any errors will be emitted to this session. +/// - `node_id`: the NodeId of the macro we are parsing. +/// - `features`: language features so we can do feature gating. +/// +/// # Returns +/// +/// A collection of `self::TokenTree`. There may also be some errors emitted to `sess`. +pub(super) fn parse( + input: tokenstream::TokenStream, + parsing_patterns: bool, + sess: &ParseSess, + node_id: NodeId, + features: &Features, + edition: Edition, +) -> Vec<TokenTree> { + // Will contain the final collection of `self::TokenTree` + let mut result = Vec::new(); + + // For each token tree in `input`, parse the token into a `self::TokenTree`, consuming + // additional trees if need be. + let mut trees = input.into_trees(); + while let Some(tree) = trees.next() { + // Given the parsed tree, if there is a metavar and we are expecting matchers, actually + // parse out the matcher (i.e., in `$id:ident` this would parse the `:` and `ident`). + let tree = parse_tree(tree, &mut trees, parsing_patterns, sess, node_id, features, edition); + match tree { + TokenTree::MetaVar(start_sp, ident) if parsing_patterns => { + let span = match trees.next() { + Some(tokenstream::TokenTree::Token(Token { kind: token::Colon, span }, _)) => { + match trees.next() { + Some(tokenstream::TokenTree::Token(token, _)) => match token.ident() { + Some((frag, _)) => { + let span = token.span.with_lo(start_sp.lo()); + + let kind = + token::NonterminalKind::from_symbol(frag.name, || { + // FIXME(#85708) - once we properly decode a foreign + // crate's `SyntaxContext::root`, then we can replace + // this with just `span.edition()`. A + // `SyntaxContext::root()` from the current crate will + // have the edition of the current crate, and a + // `SyntaxContext::root()` from a foreign crate will + // have the edition of that crate (which we manually + // retrieve via the `edition` parameter). + if span.ctxt() == SyntaxContext::root() { + edition + } else { + span.edition() + } + }) + .unwrap_or_else( + || { + let msg = format!( + "invalid fragment specifier `{}`", + frag.name + ); + sess.span_diagnostic + .struct_span_err(span, &msg) + .help(VALID_FRAGMENT_NAMES_MSG) + .emit(); + token::NonterminalKind::Ident + }, + ); + result.push(TokenTree::MetaVarDecl(span, ident, Some(kind))); + continue; + } + _ => token.span, + }, + tree => tree.as_ref().map_or(span, tokenstream::TokenTree::span), + } + } + tree => tree.as_ref().map_or(start_sp, tokenstream::TokenTree::span), + }; + + result.push(TokenTree::MetaVarDecl(span, ident, None)); + } + + // Not a metavar or no matchers allowed, so just return the tree + _ => result.push(tree), + } + } + result +} + +/// Asks for the `macro_metavar_expr` feature if it is not already declared +fn maybe_emit_macro_metavar_expr_feature(features: &Features, sess: &ParseSess, span: Span) { + if !features.macro_metavar_expr { + let msg = "meta-variable expressions are unstable"; + feature_err(&sess, sym::macro_metavar_expr, span, msg).emit(); + } +} + +/// Takes a `tokenstream::TokenTree` and returns a `self::TokenTree`. Specifically, this takes a +/// generic `TokenTree`, such as is used in the rest of the compiler, and returns a `TokenTree` +/// for use in parsing a macro. +/// +/// Converting the given tree may involve reading more tokens. +/// +/// # Parameters +/// +/// - `tree`: the tree we wish to convert. +/// - `outer_trees`: an iterator over trees. We may need to read more tokens from it in order to finish +/// converting `tree` +/// - `parsing_patterns`: same as [parse]. +/// - `sess`: the parsing session. Any errors will be emitted to this session. +/// - `features`: language features so we can do feature gating. +fn parse_tree( + tree: tokenstream::TokenTree, + outer_trees: &mut impl Iterator<Item = tokenstream::TokenTree>, + parsing_patterns: bool, + sess: &ParseSess, + node_id: NodeId, + features: &Features, + edition: Edition, +) -> TokenTree { + // Depending on what `tree` is, we could be parsing different parts of a macro + match tree { + // `tree` is a `$` token. Look at the next token in `trees` + tokenstream::TokenTree::Token(Token { kind: token::Dollar, span }, _) => { + // FIXME: Handle `Invisible`-delimited groups in a more systematic way + // during parsing. + let mut next = outer_trees.next(); + let mut trees: Box<dyn Iterator<Item = tokenstream::TokenTree>>; + if let Some(tokenstream::TokenTree::Delimited(_, Delimiter::Invisible, tts)) = next { + trees = Box::new(tts.into_trees()); + next = trees.next(); + } else { + trees = Box::new(outer_trees); + } + + match next { + // `tree` is followed by a delimited set of token trees. + Some(tokenstream::TokenTree::Delimited(delim_span, delim, tts)) => { + if parsing_patterns { + if delim != Delimiter::Parenthesis { + span_dollar_dollar_or_metavar_in_the_lhs_err( + sess, + &Token { kind: token::OpenDelim(delim), span: delim_span.entire() }, + ); + } + } else { + match delim { + Delimiter::Brace => { + // The delimiter is `{`. This indicates the beginning + // of a meta-variable expression (e.g. `${count(ident)}`). + // Try to parse the meta-variable expression. + match MetaVarExpr::parse(&tts, delim_span.entire(), sess) { + Err(mut err) => { + err.emit(); + // Returns early the same read `$` to avoid spanning + // unrelated diagnostics that could be performed afterwards + return TokenTree::token(token::Dollar, span); + } + Ok(elem) => { + maybe_emit_macro_metavar_expr_feature( + features, + sess, + delim_span.entire(), + ); + return TokenTree::MetaVarExpr(delim_span, elem); + } + } + } + Delimiter::Parenthesis => {} + _ => { + let tok = pprust::token_kind_to_string(&token::OpenDelim(delim)); + let msg = format!("expected `(` or `{{`, found `{}`", tok); + sess.span_diagnostic.span_err(delim_span.entire(), &msg); + } + } + } + // If we didn't find a metavar expression above, then we must have a + // repetition sequence in the macro (e.g. `$(pat)*`). Parse the + // contents of the sequence itself + let sequence = parse(tts, parsing_patterns, sess, node_id, features, edition); + // Get the Kleene operator and optional separator + let (separator, kleene) = + parse_sep_and_kleene_op(&mut trees, delim_span.entire(), sess); + // Count the number of captured "names" (i.e., named metavars) + let num_captures = + if parsing_patterns { count_metavar_decls(&sequence) } else { 0 }; + TokenTree::Sequence( + delim_span, + SequenceRepetition { tts: sequence, separator, kleene, num_captures }, + ) + } + + // `tree` is followed by an `ident`. This could be `$meta_var` or the `$crate` + // special metavariable that names the crate of the invocation. + Some(tokenstream::TokenTree::Token(token, _)) if token.is_ident() => { + let (ident, is_raw) = token.ident().unwrap(); + let span = ident.span.with_lo(span.lo()); + if ident.name == kw::Crate && !is_raw { + TokenTree::token(token::Ident(kw::DollarCrate, is_raw), span) + } else { + TokenTree::MetaVar(span, ident) + } + } + + // `tree` is followed by another `$`. This is an escaped `$`. + Some(tokenstream::TokenTree::Token(Token { kind: token::Dollar, span }, _)) => { + if parsing_patterns { + span_dollar_dollar_or_metavar_in_the_lhs_err( + sess, + &Token { kind: token::Dollar, span }, + ); + } else { + maybe_emit_macro_metavar_expr_feature(features, sess, span); + } + TokenTree::token(token::Dollar, span) + } + + // `tree` is followed by some other token. This is an error. + Some(tokenstream::TokenTree::Token(token, _)) => { + let msg = format!( + "expected identifier, found `{}`", + pprust::token_to_string(&token), + ); + sess.span_diagnostic.span_err(token.span, &msg); + TokenTree::MetaVar(token.span, Ident::empty()) + } + + // There are no more tokens. Just return the `$` we already have. + None => TokenTree::token(token::Dollar, span), + } + } + + // `tree` is an arbitrary token. Keep it. + tokenstream::TokenTree::Token(token, _) => TokenTree::Token(token), + + // `tree` is the beginning of a delimited set of tokens (e.g., `(` or `{`). We need to + // descend into the delimited set and further parse it. + tokenstream::TokenTree::Delimited(span, delim, tts) => TokenTree::Delimited( + span, + Delimited { + delim, + tts: parse(tts, parsing_patterns, sess, node_id, features, edition), + }, + ), + } +} + +/// Takes a token and returns `Some(KleeneOp)` if the token is `+` `*` or `?`. Otherwise, return +/// `None`. +fn kleene_op(token: &Token) -> Option<KleeneOp> { + match token.kind { + token::BinOp(token::Star) => Some(KleeneOp::ZeroOrMore), + token::BinOp(token::Plus) => Some(KleeneOp::OneOrMore), + token::Question => Some(KleeneOp::ZeroOrOne), + _ => None, + } +} + +/// Parse the next token tree of the input looking for a KleeneOp. Returns +/// +/// - Ok(Ok((op, span))) if the next token tree is a KleeneOp +/// - Ok(Err(tok, span)) if the next token tree is a token but not a KleeneOp +/// - Err(span) if the next token tree is not a token +fn parse_kleene_op( + input: &mut impl Iterator<Item = tokenstream::TokenTree>, + span: Span, +) -> Result<Result<(KleeneOp, Span), Token>, Span> { + match input.next() { + Some(tokenstream::TokenTree::Token(token, _)) => match kleene_op(&token) { + Some(op) => Ok(Ok((op, token.span))), + None => Ok(Err(token)), + }, + tree => Err(tree.as_ref().map_or(span, tokenstream::TokenTree::span)), + } +} + +/// Attempt to parse a single Kleene star, possibly with a separator. +/// +/// For example, in a pattern such as `$(a),*`, `a` is the pattern to be repeated, `,` is the +/// separator, and `*` is the Kleene operator. This function is specifically concerned with parsing +/// the last two tokens of such a pattern: namely, the optional separator and the Kleene operator +/// itself. Note that here we are parsing the _macro_ itself, rather than trying to match some +/// stream of tokens in an invocation of a macro. +/// +/// This function will take some input iterator `input` corresponding to `span` and a parsing +/// session `sess`. If the next one (or possibly two) tokens in `input` correspond to a Kleene +/// operator and separator, then a tuple with `(separator, KleeneOp)` is returned. Otherwise, an +/// error with the appropriate span is emitted to `sess` and a dummy value is returned. +fn parse_sep_and_kleene_op( + input: &mut impl Iterator<Item = tokenstream::TokenTree>, + span: Span, + sess: &ParseSess, +) -> (Option<Token>, KleeneToken) { + // We basically look at two token trees here, denoted as #1 and #2 below + let span = match parse_kleene_op(input, span) { + // #1 is a `?`, `+`, or `*` KleeneOp + Ok(Ok((op, span))) => return (None, KleeneToken::new(op, span)), + + // #1 is a separator followed by #2, a KleeneOp + Ok(Err(token)) => match parse_kleene_op(input, token.span) { + // #2 is the `?` Kleene op, which does not take a separator (error) + Ok(Ok((KleeneOp::ZeroOrOne, span))) => { + // Error! + sess.span_diagnostic.span_err( + token.span, + "the `?` macro repetition operator does not take a separator", + ); + + // Return a dummy + return (None, KleeneToken::new(KleeneOp::ZeroOrMore, span)); + } + + // #2 is a KleeneOp :D + Ok(Ok((op, span))) => return (Some(token), KleeneToken::new(op, span)), + + // #2 is a random token or not a token at all :( + Ok(Err(Token { span, .. })) | Err(span) => span, + }, + + // #1 is not a token + Err(span) => span, + }; + + // If we ever get to this point, we have experienced an "unexpected token" error + sess.span_diagnostic.span_err(span, "expected one of: `*`, `+`, or `?`"); + + // Return a dummy + (None, KleeneToken::new(KleeneOp::ZeroOrMore, span)) +} + +// `$$` or a meta-variable is the lhs of a macro but shouldn't. +// +// For example, `macro_rules! foo { ( ${length()} ) => {} }` +fn span_dollar_dollar_or_metavar_in_the_lhs_err<'sess>(sess: &'sess ParseSess, token: &Token) { + sess.span_diagnostic + .span_err(token.span, &format!("unexpected token: {}", pprust::token_to_string(token))); + sess.span_diagnostic.span_note_without_error( + token.span, + "`$$` and meta-variable expressions are not allowed inside macro parameter definitions", + ); +} diff --git a/compiler/rustc_expand/src/mbe/transcribe.rs b/compiler/rustc_expand/src/mbe/transcribe.rs new file mode 100644 index 000000000..e47ea83ac --- /dev/null +++ b/compiler/rustc_expand/src/mbe/transcribe.rs @@ -0,0 +1,580 @@ +use crate::base::ExtCtxt; +use crate::mbe::macro_parser::{MatchedNonterminal, MatchedSeq, MatchedTokenTree, NamedMatch}; +use crate::mbe::{self, MetaVarExpr}; +use rustc_ast::mut_visit::{self, MutVisitor}; +use rustc_ast::token::{self, Delimiter, Token, TokenKind}; +use rustc_ast::tokenstream::{DelimSpan, Spacing, TokenStream, TokenTree}; +use rustc_data_structures::fx::FxHashMap; +use rustc_errors::{pluralize, PResult}; +use rustc_errors::{DiagnosticBuilder, ErrorGuaranteed}; +use rustc_span::hygiene::{LocalExpnId, Transparency}; +use rustc_span::symbol::{sym, Ident, MacroRulesNormalizedIdent}; +use rustc_span::Span; + +use smallvec::{smallvec, SmallVec}; +use std::mem; + +// A Marker adds the given mark to the syntax context. +struct Marker(LocalExpnId, Transparency); + +impl MutVisitor for Marker { + const VISIT_TOKENS: bool = true; + + fn visit_span(&mut self, span: &mut Span) { + *span = span.apply_mark(self.0.to_expn_id(), self.1) + } +} + +/// An iterator over the token trees in a delimited token tree (`{ ... }`) or a sequence (`$(...)`). +enum Frame<'a> { + Delimited { tts: &'a [mbe::TokenTree], idx: usize, delim: Delimiter, span: DelimSpan }, + Sequence { tts: &'a [mbe::TokenTree], idx: usize, sep: Option<Token> }, +} + +impl<'a> Frame<'a> { + /// Construct a new frame around the delimited set of tokens. + fn new(src: &'a mbe::Delimited, span: DelimSpan) -> Frame<'a> { + Frame::Delimited { tts: &src.tts, idx: 0, delim: src.delim, span } + } +} + +impl<'a> Iterator for Frame<'a> { + type Item = &'a mbe::TokenTree; + + fn next(&mut self) -> Option<&'a mbe::TokenTree> { + match self { + Frame::Delimited { tts, ref mut idx, .. } + | Frame::Sequence { tts, ref mut idx, .. } => { + let res = tts.get(*idx); + *idx += 1; + res + } + } + } +} + +/// This can do Macro-By-Example transcription. +/// - `interp` is a map of meta-variables to the tokens (non-terminals) they matched in the +/// invocation. We are assuming we already know there is a match. +/// - `src` is the RHS of the MBE, that is, the "example" we are filling in. +/// +/// For example, +/// +/// ```rust +/// macro_rules! foo { +/// ($id:ident) => { println!("{}", stringify!($id)); } +/// } +/// +/// foo!(bar); +/// ``` +/// +/// `interp` would contain `$id => bar` and `src` would contain `println!("{}", stringify!($id));`. +/// +/// `transcribe` would return a `TokenStream` containing `println!("{}", stringify!(bar));`. +/// +/// Along the way, we do some additional error checking. +pub(super) fn transcribe<'a>( + cx: &ExtCtxt<'a>, + interp: &FxHashMap<MacroRulesNormalizedIdent, NamedMatch>, + src: &mbe::Delimited, + src_span: DelimSpan, + transparency: Transparency, +) -> PResult<'a, TokenStream> { + // Nothing for us to transcribe... + if src.tts.is_empty() { + return Ok(TokenStream::default()); + } + + // We descend into the RHS (`src`), expanding things as we go. This stack contains the things + // we have yet to expand/are still expanding. We start the stack off with the whole RHS. + let mut stack: SmallVec<[Frame<'_>; 1]> = smallvec![Frame::new(&src, src_span)]; + + // As we descend in the RHS, we will need to be able to match nested sequences of matchers. + // `repeats` keeps track of where we are in matching at each level, with the last element being + // the most deeply nested sequence. This is used as a stack. + let mut repeats = Vec::new(); + + // `result` contains resulting token stream from the TokenTree we just finished processing. At + // the end, this will contain the full result of transcription, but at arbitrary points during + // `transcribe`, `result` will contain subsets of the final result. + // + // Specifically, as we descend into each TokenTree, we will push the existing results onto the + // `result_stack` and clear `results`. We will then produce the results of transcribing the + // TokenTree into `results`. Then, as we unwind back out of the `TokenTree`, we will pop the + // `result_stack` and append `results` too it to produce the new `results` up to that point. + // + // Thus, if we try to pop the `result_stack` and it is empty, we have reached the top-level + // again, and we are done transcribing. + let mut result: Vec<TokenTree> = Vec::new(); + let mut result_stack = Vec::new(); + let mut marker = Marker(cx.current_expansion.id, transparency); + + loop { + // Look at the last frame on the stack. + // If it still has a TokenTree we have not looked at yet, use that tree. + let Some(tree) = stack.last_mut().unwrap().next() else { + // This else-case never produces a value for `tree` (it `continue`s or `return`s). + + // Otherwise, if we have just reached the end of a sequence and we can keep repeating, + // go back to the beginning of the sequence. + if let Frame::Sequence { idx, sep, .. } = stack.last_mut().unwrap() { + let (repeat_idx, repeat_len) = repeats.last_mut().unwrap(); + *repeat_idx += 1; + if repeat_idx < repeat_len { + *idx = 0; + if let Some(sep) = sep { + result.push(TokenTree::Token(sep.clone(), Spacing::Alone)); + } + continue; + } + } + + // We are done with the top of the stack. Pop it. Depending on what it was, we do + // different things. Note that the outermost item must be the delimited, wrapped RHS + // that was passed in originally to `transcribe`. + match stack.pop().unwrap() { + // Done with a sequence. Pop from repeats. + Frame::Sequence { .. } => { + repeats.pop(); + } + + // We are done processing a Delimited. If this is the top-level delimited, we are + // done. Otherwise, we unwind the result_stack to append what we have produced to + // any previous results. + Frame::Delimited { delim, span, .. } => { + if result_stack.is_empty() { + // No results left to compute! We are back at the top-level. + return Ok(TokenStream::new(result)); + } + + // Step back into the parent Delimited. + let tree = TokenTree::Delimited(span, delim, TokenStream::new(result)); + result = result_stack.pop().unwrap(); + result.push(tree); + } + } + continue; + }; + + // At this point, we know we are in the middle of a TokenTree (the last one on `stack`). + // `tree` contains the next `TokenTree` to be processed. + match tree { + // We are descending into a sequence. We first make sure that the matchers in the RHS + // and the matches in `interp` have the same shape. Otherwise, either the caller or the + // macro writer has made a mistake. + seq @ mbe::TokenTree::Sequence(_, delimited) => { + match lockstep_iter_size(&seq, interp, &repeats) { + LockstepIterSize::Unconstrained => { + return Err(cx.struct_span_err( + seq.span(), /* blame macro writer */ + "attempted to repeat an expression containing no syntax variables \ + matched as repeating at this depth", + )); + } + + LockstepIterSize::Contradiction(msg) => { + // FIXME: this really ought to be caught at macro definition time... It + // happens when two meta-variables are used in the same repetition in a + // sequence, but they come from different sequence matchers and repeat + // different amounts. + return Err(cx.struct_span_err(seq.span(), &msg)); + } + + LockstepIterSize::Constraint(len, _) => { + // We do this to avoid an extra clone above. We know that this is a + // sequence already. + let mbe::TokenTree::Sequence(sp, seq) = seq else { + unreachable!() + }; + + // Is the repetition empty? + if len == 0 { + if seq.kleene.op == mbe::KleeneOp::OneOrMore { + // FIXME: this really ought to be caught at macro definition + // time... It happens when the Kleene operator in the matcher and + // the body for the same meta-variable do not match. + return Err(cx.struct_span_err( + sp.entire(), + "this must repeat at least once", + )); + } + } else { + // 0 is the initial counter (we have done 0 repetitions so far). `len` + // is the total number of repetitions we should generate. + repeats.push((0, len)); + + // The first time we encounter the sequence we push it to the stack. It + // then gets reused (see the beginning of the loop) until we are done + // repeating. + stack.push(Frame::Sequence { + idx: 0, + sep: seq.separator.clone(), + tts: &delimited.tts, + }); + } + } + } + } + + // Replace the meta-var with the matched token tree from the invocation. + mbe::TokenTree::MetaVar(mut sp, mut original_ident) => { + // Find the matched nonterminal from the macro invocation, and use it to replace + // the meta-var. + let ident = MacroRulesNormalizedIdent::new(original_ident); + if let Some(cur_matched) = lookup_cur_matched(ident, interp, &repeats) { + match cur_matched { + MatchedTokenTree(ref tt) => { + // `tt`s are emitted into the output stream directly as "raw tokens", + // without wrapping them into groups. + let token = tt.clone(); + result.push(token); + } + MatchedNonterminal(ref nt) => { + // Other variables are emitted into the output stream as groups with + // `Delimiter::Invisible` to maintain parsing priorities. + // `Interpolated` is currently used for such groups in rustc parser. + marker.visit_span(&mut sp); + let token = TokenTree::token_alone(token::Interpolated(nt.clone()), sp); + result.push(token); + } + MatchedSeq(..) => { + // We were unable to descend far enough. This is an error. + return Err(cx.struct_span_err( + sp, /* blame the macro writer */ + &format!("variable '{}' is still repeating at this depth", ident), + )); + } + } + } else { + // If we aren't able to match the meta-var, we push it back into the result but + // with modified syntax context. (I believe this supports nested macros). + marker.visit_span(&mut sp); + marker.visit_ident(&mut original_ident); + result.push(TokenTree::token_alone(token::Dollar, sp)); + result.push(TokenTree::Token( + Token::from_ast_ident(original_ident), + Spacing::Alone, + )); + } + } + + // Replace meta-variable expressions with the result of their expansion. + mbe::TokenTree::MetaVarExpr(sp, expr) => { + transcribe_metavar_expr(cx, expr, interp, &mut marker, &repeats, &mut result, &sp)?; + } + + // If we are entering a new delimiter, we push its contents to the `stack` to be + // processed, and we push all of the currently produced results to the `result_stack`. + // We will produce all of the results of the inside of the `Delimited` and then we will + // jump back out of the Delimited, pop the result_stack and add the new results back to + // the previous results (from outside the Delimited). + mbe::TokenTree::Delimited(mut span, delimited) => { + mut_visit::visit_delim_span(&mut span, &mut marker); + stack.push(Frame::Delimited { + tts: &delimited.tts, + delim: delimited.delim, + idx: 0, + span, + }); + result_stack.push(mem::take(&mut result)); + } + + // Nothing much to do here. Just push the token to the result, being careful to + // preserve syntax context. + mbe::TokenTree::Token(token) => { + let mut token = token.clone(); + mut_visit::visit_token(&mut token, &mut marker); + let tt = TokenTree::Token(token, Spacing::Alone); + result.push(tt); + } + + // There should be no meta-var declarations in the invocation of a macro. + mbe::TokenTree::MetaVarDecl(..) => panic!("unexpected `TokenTree::MetaVarDecl"), + } + } +} + +/// Lookup the meta-var named `ident` and return the matched token tree from the invocation using +/// the set of matches `interpolations`. +/// +/// See the definition of `repeats` in the `transcribe` function. `repeats` is used to descend +/// into the right place in nested matchers. If we attempt to descend too far, the macro writer has +/// made a mistake, and we return `None`. +fn lookup_cur_matched<'a>( + ident: MacroRulesNormalizedIdent, + interpolations: &'a FxHashMap<MacroRulesNormalizedIdent, NamedMatch>, + repeats: &[(usize, usize)], +) -> Option<&'a NamedMatch> { + interpolations.get(&ident).map(|matched| { + let mut matched = matched; + for &(idx, _) in repeats { + match matched { + MatchedTokenTree(_) | MatchedNonterminal(_) => break, + MatchedSeq(ref ads) => matched = ads.get(idx).unwrap(), + } + } + + matched + }) +} + +/// An accumulator over a TokenTree to be used with `fold`. During transcription, we need to make +/// sure that the size of each sequence and all of its nested sequences are the same as the sizes +/// of all the matched (nested) sequences in the macro invocation. If they don't match, somebody +/// has made a mistake (either the macro writer or caller). +#[derive(Clone)] +enum LockstepIterSize { + /// No constraints on length of matcher. This is true for any TokenTree variants except a + /// `MetaVar` with an actual `MatchedSeq` (as opposed to a `MatchedNonterminal`). + Unconstrained, + + /// A `MetaVar` with an actual `MatchedSeq`. The length of the match and the name of the + /// meta-var are returned. + Constraint(usize, MacroRulesNormalizedIdent), + + /// Two `Constraint`s on the same sequence had different lengths. This is an error. + Contradiction(String), +} + +impl LockstepIterSize { + /// Find incompatibilities in matcher/invocation sizes. + /// - `Unconstrained` is compatible with everything. + /// - `Contradiction` is incompatible with everything. + /// - `Constraint(len)` is only compatible with other constraints of the same length. + fn with(self, other: LockstepIterSize) -> LockstepIterSize { + match self { + LockstepIterSize::Unconstrained => other, + LockstepIterSize::Contradiction(_) => self, + LockstepIterSize::Constraint(l_len, ref l_id) => match other { + LockstepIterSize::Unconstrained => self, + LockstepIterSize::Contradiction(_) => other, + LockstepIterSize::Constraint(r_len, _) if l_len == r_len => self, + LockstepIterSize::Constraint(r_len, r_id) => { + let msg = format!( + "meta-variable `{}` repeats {} time{}, but `{}` repeats {} time{}", + l_id, + l_len, + pluralize!(l_len), + r_id, + r_len, + pluralize!(r_len), + ); + LockstepIterSize::Contradiction(msg) + } + }, + } + } +} + +/// Given a `tree`, make sure that all sequences have the same length as the matches for the +/// appropriate meta-vars in `interpolations`. +/// +/// Note that if `repeats` does not match the exact correct depth of a meta-var, +/// `lookup_cur_matched` will return `None`, which is why this still works even in the presence of +/// multiple nested matcher sequences. +/// +/// Example: `$($($x $y)+*);+` -- we need to make sure that `x` and `y` repeat the same amount as +/// each other at the given depth when the macro was invoked. If they don't it might mean they were +/// declared at unequal depths or there was a compile bug. For example, if we have 3 repetitions of +/// the outer sequence and 4 repetitions of the inner sequence for `x`, we should have the same for +/// `y`; otherwise, we can't transcribe them both at the given depth. +fn lockstep_iter_size( + tree: &mbe::TokenTree, + interpolations: &FxHashMap<MacroRulesNormalizedIdent, NamedMatch>, + repeats: &[(usize, usize)], +) -> LockstepIterSize { + use mbe::TokenTree; + match *tree { + TokenTree::Delimited(_, ref delimited) => { + delimited.tts.iter().fold(LockstepIterSize::Unconstrained, |size, tt| { + size.with(lockstep_iter_size(tt, interpolations, repeats)) + }) + } + TokenTree::Sequence(_, ref seq) => { + seq.tts.iter().fold(LockstepIterSize::Unconstrained, |size, tt| { + size.with(lockstep_iter_size(tt, interpolations, repeats)) + }) + } + TokenTree::MetaVar(_, name) | TokenTree::MetaVarDecl(_, name, _) => { + let name = MacroRulesNormalizedIdent::new(name); + match lookup_cur_matched(name, interpolations, repeats) { + Some(matched) => match matched { + MatchedTokenTree(_) | MatchedNonterminal(_) => LockstepIterSize::Unconstrained, + MatchedSeq(ref ads) => LockstepIterSize::Constraint(ads.len(), name), + }, + _ => LockstepIterSize::Unconstrained, + } + } + TokenTree::MetaVarExpr(_, ref expr) => { + let default_rslt = LockstepIterSize::Unconstrained; + let Some(ident) = expr.ident() else { return default_rslt; }; + let name = MacroRulesNormalizedIdent::new(ident); + match lookup_cur_matched(name, interpolations, repeats) { + Some(MatchedSeq(ref ads)) => { + default_rslt.with(LockstepIterSize::Constraint(ads.len(), name)) + } + _ => default_rslt, + } + } + TokenTree::Token(..) => LockstepIterSize::Unconstrained, + } +} + +/// Used solely by the `count` meta-variable expression, counts the outer-most repetitions at a +/// given optional nested depth. +/// +/// For example, a macro parameter of `$( { $( $foo:ident ),* } )*` called with `{ a, b } { c }`: +/// +/// * `[ $( ${count(foo)} ),* ]` will return [2, 1] with a, b = 2 and c = 1 +/// * `[ $( ${count(foo, 0)} ),* ]` will be the same as `[ $( ${count(foo)} ),* ]` +/// * `[ $( ${count(foo, 1)} ),* ]` will return an error because `${count(foo, 1)}` is +/// declared inside a single repetition and the index `1` implies two nested repetitions. +fn count_repetitions<'a>( + cx: &ExtCtxt<'a>, + depth_opt: Option<usize>, + mut matched: &NamedMatch, + repeats: &[(usize, usize)], + sp: &DelimSpan, +) -> PResult<'a, usize> { + // Recursively count the number of matches in `matched` at given depth + // (or at the top-level of `matched` if no depth is given). + fn count<'a>( + cx: &ExtCtxt<'a>, + declared_lhs_depth: usize, + depth_opt: Option<usize>, + matched: &NamedMatch, + sp: &DelimSpan, + ) -> PResult<'a, usize> { + match matched { + MatchedTokenTree(_) | MatchedNonterminal(_) => { + if declared_lhs_depth == 0 { + return Err(cx.struct_span_err( + sp.entire(), + "`count` can not be placed inside the inner-most repetition", + )); + } + match depth_opt { + None => Ok(1), + Some(_) => Err(out_of_bounds_err(cx, declared_lhs_depth, sp.entire(), "count")), + } + } + MatchedSeq(ref named_matches) => { + let new_declared_lhs_depth = declared_lhs_depth + 1; + match depth_opt { + None => named_matches + .iter() + .map(|elem| count(cx, new_declared_lhs_depth, None, elem, sp)) + .sum(), + Some(0) => Ok(named_matches.len()), + Some(depth) => named_matches + .iter() + .map(|elem| count(cx, new_declared_lhs_depth, Some(depth - 1), elem, sp)) + .sum(), + } + } + } + } + // `repeats` records all of the nested levels at which we are currently + // matching meta-variables. The meta-var-expr `count($x)` only counts + // matches that occur in this "subtree" of the `NamedMatch` where we + // are currently transcribing, so we need to descend to that subtree + // before we start counting. `matched` contains the various levels of the + // tree as we descend, and its final value is the subtree we are currently at. + for &(idx, _) in repeats { + if let MatchedSeq(ref ads) = matched { + matched = &ads[idx]; + } + } + count(cx, 0, depth_opt, matched, sp) +} + +/// Returns a `NamedMatch` item declared on the LHS given an arbitrary [Ident] +fn matched_from_ident<'ctx, 'interp, 'rslt>( + cx: &ExtCtxt<'ctx>, + ident: Ident, + interp: &'interp FxHashMap<MacroRulesNormalizedIdent, NamedMatch>, +) -> PResult<'ctx, &'rslt NamedMatch> +where + 'interp: 'rslt, +{ + let span = ident.span; + let key = MacroRulesNormalizedIdent::new(ident); + interp.get(&key).ok_or_else(|| { + cx.struct_span_err( + span, + &format!("variable `{}` is not recognized in meta-variable expression", key), + ) + }) +} + +/// Used by meta-variable expressions when an user input is out of the actual declared bounds. For +/// example, index(999999) in an repetition of only three elements. +fn out_of_bounds_err<'a>( + cx: &ExtCtxt<'a>, + max: usize, + span: Span, + ty: &str, +) -> DiagnosticBuilder<'a, ErrorGuaranteed> { + let msg = if max == 0 { + format!( + "meta-variable expression `{ty}` with depth parameter \ + must be called inside of a macro repetition" + ) + } else { + format!( + "depth parameter on meta-variable expression `{ty}` \ + must be less than {max}" + ) + }; + cx.struct_span_err(span, &msg) +} + +fn transcribe_metavar_expr<'a>( + cx: &ExtCtxt<'a>, + expr: &MetaVarExpr, + interp: &FxHashMap<MacroRulesNormalizedIdent, NamedMatch>, + marker: &mut Marker, + repeats: &[(usize, usize)], + result: &mut Vec<TokenTree>, + sp: &DelimSpan, +) -> PResult<'a, ()> { + let mut visited_span = || { + let mut span = sp.entire(); + marker.visit_span(&mut span); + span + }; + match *expr { + MetaVarExpr::Count(original_ident, depth_opt) => { + let matched = matched_from_ident(cx, original_ident, interp)?; + let count = count_repetitions(cx, depth_opt, matched, &repeats, sp)?; + let tt = TokenTree::token_alone( + TokenKind::lit(token::Integer, sym::integer(count), None), + visited_span(), + ); + result.push(tt); + } + MetaVarExpr::Ignore(original_ident) => { + // Used to ensure that `original_ident` is present in the LHS + let _ = matched_from_ident(cx, original_ident, interp)?; + } + MetaVarExpr::Index(depth) => match repeats.iter().nth_back(depth) { + Some((index, _)) => { + result.push(TokenTree::token_alone( + TokenKind::lit(token::Integer, sym::integer(*index), None), + visited_span(), + )); + } + None => return Err(out_of_bounds_err(cx, repeats.len(), sp.entire(), "index")), + }, + MetaVarExpr::Length(depth) => match repeats.iter().nth_back(depth) { + Some((_, length)) => { + result.push(TokenTree::token_alone( + TokenKind::lit(token::Integer, sym::integer(*length), None), + visited_span(), + )); + } + None => return Err(out_of_bounds_err(cx, repeats.len(), sp.entire(), "length")), + }, + } + Ok(()) +} diff --git a/compiler/rustc_expand/src/module.rs b/compiler/rustc_expand/src/module.rs new file mode 100644 index 000000000..0315d1163 --- /dev/null +++ b/compiler/rustc_expand/src/module.rs @@ -0,0 +1,298 @@ +use crate::base::ModuleData; +use rustc_ast::ptr::P; +use rustc_ast::{token, Attribute, Inline, Item, ModSpans}; +use rustc_errors::{struct_span_err, DiagnosticBuilder, ErrorGuaranteed}; +use rustc_parse::new_parser_from_file; +use rustc_parse::validate_attr; +use rustc_session::parse::ParseSess; +use rustc_session::Session; +use rustc_span::symbol::{sym, Ident}; +use rustc_span::Span; + +use std::path::{self, Path, PathBuf}; + +#[derive(Copy, Clone)] +pub enum DirOwnership { + Owned { + // None if `mod.rs`, `Some("foo")` if we're in `foo.rs`. + relative: Option<Ident>, + }, + UnownedViaBlock, +} + +// Public for rustfmt usage. +pub struct ModulePathSuccess { + pub file_path: PathBuf, + pub dir_ownership: DirOwnership, +} + +pub(crate) struct ParsedExternalMod { + pub items: Vec<P<Item>>, + pub spans: ModSpans, + pub file_path: PathBuf, + pub dir_path: PathBuf, + pub dir_ownership: DirOwnership, +} + +pub enum ModError<'a> { + CircularInclusion(Vec<PathBuf>), + ModInBlock(Option<Ident>), + FileNotFound(Ident, PathBuf, PathBuf), + MultipleCandidates(Ident, PathBuf, PathBuf), + ParserError(DiagnosticBuilder<'a, ErrorGuaranteed>), +} + +pub(crate) fn parse_external_mod( + sess: &Session, + ident: Ident, + span: Span, // The span to blame on errors. + module: &ModuleData, + mut dir_ownership: DirOwnership, + attrs: &mut Vec<Attribute>, +) -> ParsedExternalMod { + // We bail on the first error, but that error does not cause a fatal error... (1) + let result: Result<_, ModError<'_>> = try { + // Extract the file path and the new ownership. + let mp = mod_file_path(sess, ident, &attrs, &module.dir_path, dir_ownership)?; + dir_ownership = mp.dir_ownership; + + // Ensure file paths are acyclic. + if let Some(pos) = module.file_path_stack.iter().position(|p| p == &mp.file_path) { + Err(ModError::CircularInclusion(module.file_path_stack[pos..].to_vec()))?; + } + + // Actually parse the external file as a module. + let mut parser = new_parser_from_file(&sess.parse_sess, &mp.file_path, Some(span)); + let (mut inner_attrs, items, inner_span) = + parser.parse_mod(&token::Eof).map_err(|err| ModError::ParserError(err))?; + attrs.append(&mut inner_attrs); + (items, inner_span, mp.file_path) + }; + // (1) ...instead, we return a dummy module. + let (items, spans, file_path) = + result.map_err(|err| err.report(sess, span)).unwrap_or_default(); + + // Extract the directory path for submodules of the module. + let dir_path = file_path.parent().unwrap_or(&file_path).to_owned(); + + ParsedExternalMod { items, spans, file_path, dir_path, dir_ownership } +} + +pub(crate) fn mod_dir_path( + sess: &Session, + ident: Ident, + attrs: &[Attribute], + module: &ModuleData, + mut dir_ownership: DirOwnership, + inline: Inline, +) -> (PathBuf, DirOwnership) { + match inline { + Inline::Yes if let Some(file_path) = mod_file_path_from_attr(sess, attrs, &module.dir_path) => { + // For inline modules file path from `#[path]` is actually the directory path + // for historical reasons, so we don't pop the last segment here. + (file_path, DirOwnership::Owned { relative: None }) + } + Inline::Yes => { + // We have to push on the current module name in the case of relative + // paths in order to ensure that any additional module paths from inline + // `mod x { ... }` come after the relative extension. + // + // For example, a `mod z { ... }` inside `x/y.rs` should set the current + // directory path to `/x/y/z`, not `/x/z` with a relative offset of `y`. + let mut dir_path = module.dir_path.clone(); + if let DirOwnership::Owned { relative } = &mut dir_ownership { + if let Some(ident) = relative.take() { + // Remove the relative offset. + dir_path.push(ident.as_str()); + } + } + dir_path.push(ident.as_str()); + + (dir_path, dir_ownership) + } + Inline::No => { + // FIXME: This is a subset of `parse_external_mod` without actual parsing, + // check whether the logic for unloaded, loaded and inline modules can be unified. + let file_path = mod_file_path(sess, ident, &attrs, &module.dir_path, dir_ownership) + .map(|mp| { + dir_ownership = mp.dir_ownership; + mp.file_path + }) + .unwrap_or_default(); + + // Extract the directory path for submodules of the module. + let dir_path = file_path.parent().unwrap_or(&file_path).to_owned(); + + (dir_path, dir_ownership) + } + } +} + +fn mod_file_path<'a>( + sess: &'a Session, + ident: Ident, + attrs: &[Attribute], + dir_path: &Path, + dir_ownership: DirOwnership, +) -> Result<ModulePathSuccess, ModError<'a>> { + if let Some(file_path) = mod_file_path_from_attr(sess, attrs, dir_path) { + // All `#[path]` files are treated as though they are a `mod.rs` file. + // This means that `mod foo;` declarations inside `#[path]`-included + // files are siblings, + // + // Note that this will produce weirdness when a file named `foo.rs` is + // `#[path]` included and contains a `mod foo;` declaration. + // If you encounter this, it's your own darn fault :P + let dir_ownership = DirOwnership::Owned { relative: None }; + return Ok(ModulePathSuccess { file_path, dir_ownership }); + } + + let relative = match dir_ownership { + DirOwnership::Owned { relative } => relative, + DirOwnership::UnownedViaBlock => None, + }; + let result = default_submod_path(&sess.parse_sess, ident, relative, dir_path); + match dir_ownership { + DirOwnership::Owned { .. } => result, + DirOwnership::UnownedViaBlock => Err(ModError::ModInBlock(match result { + Ok(_) | Err(ModError::MultipleCandidates(..)) => Some(ident), + _ => None, + })), + } +} + +/// Derive a submodule path from the first found `#[path = "path_string"]`. +/// The provided `dir_path` is joined with the `path_string`. +fn mod_file_path_from_attr( + sess: &Session, + attrs: &[Attribute], + dir_path: &Path, +) -> Option<PathBuf> { + // Extract path string from first `#[path = "path_string"]` attribute. + let first_path = attrs.iter().find(|at| at.has_name(sym::path))?; + let Some(path_sym) = first_path.value_str() else { + // This check is here mainly to catch attempting to use a macro, + // such as #[path = concat!(...)]. This isn't currently supported + // because otherwise the InvocationCollector would need to defer + // loading a module until the #[path] attribute was expanded, and + // it doesn't support that (and would likely add a bit of + // complexity). Usually bad forms are checked in AstValidator (via + // `check_builtin_attribute`), but by the time that runs the macro + // is expanded, and it doesn't give an error. + validate_attr::emit_fatal_malformed_builtin_attribute( + &sess.parse_sess, + first_path, + sym::path, + ); + }; + + let path_str = path_sym.as_str(); + + // On windows, the base path might have the form + // `\\?\foo\bar` in which case it does not tolerate + // mixed `/` and `\` separators, so canonicalize + // `/` to `\`. + #[cfg(windows)] + let path_str = path_str.replace("/", "\\"); + + Some(dir_path.join(path_str)) +} + +/// Returns a path to a module. +// Public for rustfmt usage. +pub fn default_submod_path<'a>( + sess: &'a ParseSess, + ident: Ident, + relative: Option<Ident>, + dir_path: &Path, +) -> Result<ModulePathSuccess, ModError<'a>> { + // If we're in a foo.rs file instead of a mod.rs file, + // we need to look for submodules in + // `./foo/<ident>.rs` and `./foo/<ident>/mod.rs` rather than + // `./<ident>.rs` and `./<ident>/mod.rs`. + let relative_prefix_string; + let relative_prefix = if let Some(ident) = relative { + relative_prefix_string = format!("{}{}", ident.name, path::MAIN_SEPARATOR); + &relative_prefix_string + } else { + "" + }; + + let default_path_str = format!("{}{}.rs", relative_prefix, ident.name); + let secondary_path_str = + format!("{}{}{}mod.rs", relative_prefix, ident.name, path::MAIN_SEPARATOR); + let default_path = dir_path.join(&default_path_str); + let secondary_path = dir_path.join(&secondary_path_str); + let default_exists = sess.source_map().file_exists(&default_path); + let secondary_exists = sess.source_map().file_exists(&secondary_path); + + match (default_exists, secondary_exists) { + (true, false) => Ok(ModulePathSuccess { + file_path: default_path, + dir_ownership: DirOwnership::Owned { relative: Some(ident) }, + }), + (false, true) => Ok(ModulePathSuccess { + file_path: secondary_path, + dir_ownership: DirOwnership::Owned { relative: None }, + }), + (false, false) => Err(ModError::FileNotFound(ident, default_path, secondary_path)), + (true, true) => Err(ModError::MultipleCandidates(ident, default_path, secondary_path)), + } +} + +impl ModError<'_> { + fn report(self, sess: &Session, span: Span) -> ErrorGuaranteed { + let diag = &sess.parse_sess.span_diagnostic; + match self { + ModError::CircularInclusion(file_paths) => { + let mut msg = String::from("circular modules: "); + for file_path in &file_paths { + msg.push_str(&file_path.display().to_string()); + msg.push_str(" -> "); + } + msg.push_str(&file_paths[0].display().to_string()); + diag.struct_span_err(span, &msg) + } + ModError::ModInBlock(ident) => { + let msg = "cannot declare a non-inline module inside a block unless it has a path attribute"; + let mut err = diag.struct_span_err(span, msg); + if let Some(ident) = ident { + let note = + format!("maybe `use` the module `{}` instead of redeclaring it", ident); + err.span_note(span, ¬e); + } + err + } + ModError::FileNotFound(ident, default_path, secondary_path) => { + let mut err = struct_span_err!( + diag, + span, + E0583, + "file not found for module `{}`", + ident, + ); + err.help(&format!( + "to create the module `{}`, create file \"{}\" or \"{}\"", + ident, + default_path.display(), + secondary_path.display(), + )); + err + } + ModError::MultipleCandidates(ident, default_path, secondary_path) => { + let mut err = struct_span_err!( + diag, + span, + E0761, + "file for module `{}` found at both \"{}\" and \"{}\"", + ident, + default_path.display(), + secondary_path.display(), + ); + err.help("delete or rename one of them to remove the ambiguity"); + err + } + ModError::ParserError(err) => err, + }.emit() + } +} diff --git a/compiler/rustc_expand/src/mut_visit/tests.rs b/compiler/rustc_expand/src/mut_visit/tests.rs new file mode 100644 index 000000000..8974d45b4 --- /dev/null +++ b/compiler/rustc_expand/src/mut_visit/tests.rs @@ -0,0 +1,72 @@ +use crate::tests::{matches_codepattern, string_to_crate}; + +use rustc_ast as ast; +use rustc_ast::mut_visit::MutVisitor; +use rustc_ast_pretty::pprust; +use rustc_span::create_default_session_globals_then; +use rustc_span::symbol::Ident; + +// This version doesn't care about getting comments or doc-strings in. +fn print_crate_items(krate: &ast::Crate) -> String { + krate.items.iter().map(|i| pprust::item_to_string(i)).collect::<Vec<_>>().join(" ") +} + +// Change every identifier to "zz". +struct ToZzIdentMutVisitor; + +impl MutVisitor for ToZzIdentMutVisitor { + const VISIT_TOKENS: bool = true; + + fn visit_ident(&mut self, ident: &mut Ident) { + *ident = Ident::from_str("zz"); + } +} + +// Maybe add to `expand.rs`. +macro_rules! assert_pred { + ($pred:expr, $predname:expr, $a:expr , $b:expr) => {{ + let pred_val = $pred; + let a_val = $a; + let b_val = $b; + if !(pred_val(&a_val, &b_val)) { + panic!("expected args satisfying {}, got {} and {}", $predname, a_val, b_val); + } + }}; +} + +// Make sure idents get transformed everywhere. +#[test] +fn ident_transformation() { + create_default_session_globals_then(|| { + let mut zz_visitor = ToZzIdentMutVisitor; + let mut krate = + string_to_crate("#[a] mod b {fn c (d : e, f : g) {h!(i,j,k);l;m}}".to_string()); + zz_visitor.visit_crate(&mut krate); + assert_pred!( + matches_codepattern, + "matches_codepattern", + print_crate_items(&krate), + "#[zz]mod zz{fn zz(zz:zz,zz:zz){zz!(zz,zz,zz);zz;zz}}".to_string() + ); + }) +} + +// Make sure idents get transformed even inside macro defs. +#[test] +fn ident_transformation_in_defs() { + create_default_session_globals_then(|| { + let mut zz_visitor = ToZzIdentMutVisitor; + let mut krate = string_to_crate( + "macro_rules! a {(b $c:expr $(d $e:token)f+ => \ + (g $(d $d $e)+))} " + .to_string(), + ); + zz_visitor.visit_crate(&mut krate); + assert_pred!( + matches_codepattern, + "matches_codepattern", + print_crate_items(&krate), + "macro_rules! zz{(zz$zz:zz$(zz $zz:zz)zz+=>(zz$(zz$zz$zz)+))}".to_string() + ); + }) +} diff --git a/compiler/rustc_expand/src/parse/tests.rs b/compiler/rustc_expand/src/parse/tests.rs new file mode 100644 index 000000000..a3c631d33 --- /dev/null +++ b/compiler/rustc_expand/src/parse/tests.rs @@ -0,0 +1,358 @@ +use crate::tests::{matches_codepattern, string_to_stream, with_error_checking_parse}; + +use rustc_ast::ptr::P; +use rustc_ast::token::{self, Delimiter, Token}; +use rustc_ast::tokenstream::{DelimSpan, TokenStream, TokenTree}; +use rustc_ast::visit; +use rustc_ast::{self as ast, PatKind}; +use rustc_ast_pretty::pprust::item_to_string; +use rustc_errors::PResult; +use rustc_parse::new_parser_from_source_str; +use rustc_parse::parser::ForceCollect; +use rustc_session::parse::ParseSess; +use rustc_span::create_default_session_globals_then; +use rustc_span::source_map::FilePathMapping; +use rustc_span::symbol::{kw, sym, Symbol}; +use rustc_span::{BytePos, FileName, Pos, Span}; + +use std::path::PathBuf; + +fn sess() -> ParseSess { + ParseSess::new(FilePathMapping::empty()) +} + +/// Parses an item. +/// +/// Returns `Ok(Some(item))` when successful, `Ok(None)` when no item was found, and `Err` +/// when a syntax error occurred. +fn parse_item_from_source_str( + name: FileName, + source: String, + sess: &ParseSess, +) -> PResult<'_, Option<P<ast::Item>>> { + new_parser_from_source_str(sess, name, source).parse_item(ForceCollect::No) +} + +// Produces a `rustc_span::span`. +fn sp(a: u32, b: u32) -> Span { + Span::with_root_ctxt(BytePos(a), BytePos(b)) +} + +/// Parses a string, return an expression. +fn string_to_expr(source_str: String) -> P<ast::Expr> { + with_error_checking_parse(source_str, &sess(), |p| p.parse_expr()) +} + +/// Parses a string, returns an item. +fn string_to_item(source_str: String) -> Option<P<ast::Item>> { + with_error_checking_parse(source_str, &sess(), |p| p.parse_item(ForceCollect::No)) +} + +#[should_panic] +#[test] +fn bad_path_expr_1() { + create_default_session_globals_then(|| { + string_to_expr("::abc::def::return".to_string()); + }) +} + +// Checks the token-tree-ization of macros. +#[test] +fn string_to_tts_macro() { + create_default_session_globals_then(|| { + let tts: Vec<_> = + string_to_stream("macro_rules! zip (($a)=>($a))".to_string()).into_trees().collect(); + let tts: &[TokenTree] = &tts[..]; + + match tts { + [ + TokenTree::Token(Token { kind: token::Ident(name_macro_rules, false), .. }, _), + TokenTree::Token(Token { kind: token::Not, .. }, _), + TokenTree::Token(Token { kind: token::Ident(name_zip, false), .. }, _), + TokenTree::Delimited(_, macro_delim, macro_tts), + ] if name_macro_rules == &kw::MacroRules && name_zip.as_str() == "zip" => { + let tts = ¯o_tts.trees().collect::<Vec<_>>(); + match &tts[..] { + [ + TokenTree::Delimited(_, first_delim, first_tts), + TokenTree::Token(Token { kind: token::FatArrow, .. }, _), + TokenTree::Delimited(_, second_delim, second_tts), + ] if macro_delim == &Delimiter::Parenthesis => { + let tts = &first_tts.trees().collect::<Vec<_>>(); + match &tts[..] { + [ + TokenTree::Token(Token { kind: token::Dollar, .. }, _), + TokenTree::Token(Token { kind: token::Ident(name, false), .. }, _), + ] if first_delim == &Delimiter::Parenthesis && name.as_str() == "a" => { + } + _ => panic!("value 3: {:?} {:?}", first_delim, first_tts), + } + let tts = &second_tts.trees().collect::<Vec<_>>(); + match &tts[..] { + [ + TokenTree::Token(Token { kind: token::Dollar, .. }, _), + TokenTree::Token(Token { kind: token::Ident(name, false), .. }, _), + ] if second_delim == &Delimiter::Parenthesis + && name.as_str() == "a" => {} + _ => panic!("value 4: {:?} {:?}", second_delim, second_tts), + } + } + _ => panic!("value 2: {:?} {:?}", macro_delim, macro_tts), + } + } + _ => panic!("value: {:?}", tts), + } + }) +} + +#[test] +fn string_to_tts_1() { + create_default_session_globals_then(|| { + let tts = string_to_stream("fn a (b : i32) { b; }".to_string()); + + let expected = TokenStream::new(vec![ + TokenTree::token_alone(token::Ident(kw::Fn, false), sp(0, 2)), + TokenTree::token_alone(token::Ident(Symbol::intern("a"), false), sp(3, 4)), + TokenTree::Delimited( + DelimSpan::from_pair(sp(5, 6), sp(13, 14)), + Delimiter::Parenthesis, + TokenStream::new(vec![ + TokenTree::token_alone(token::Ident(Symbol::intern("b"), false), sp(6, 7)), + TokenTree::token_alone(token::Colon, sp(8, 9)), + TokenTree::token_alone(token::Ident(sym::i32, false), sp(10, 13)), + ]) + .into(), + ), + TokenTree::Delimited( + DelimSpan::from_pair(sp(15, 16), sp(20, 21)), + Delimiter::Brace, + TokenStream::new(vec![ + TokenTree::token_joint(token::Ident(Symbol::intern("b"), false), sp(17, 18)), + TokenTree::token_alone(token::Semi, sp(18, 19)), + ]) + .into(), + ), + ]); + + assert_eq!(tts, expected); + }) +} + +#[test] +fn parse_use() { + create_default_session_globals_then(|| { + let use_s = "use foo::bar::baz;"; + let vitem = string_to_item(use_s.to_string()).unwrap(); + let vitem_s = item_to_string(&vitem); + assert_eq!(&vitem_s[..], use_s); + + let use_s = "use foo::bar as baz;"; + let vitem = string_to_item(use_s.to_string()).unwrap(); + let vitem_s = item_to_string(&vitem); + assert_eq!(&vitem_s[..], use_s); + }) +} + +#[test] +fn parse_extern_crate() { + create_default_session_globals_then(|| { + let ex_s = "extern crate foo;"; + let vitem = string_to_item(ex_s.to_string()).unwrap(); + let vitem_s = item_to_string(&vitem); + assert_eq!(&vitem_s[..], ex_s); + + let ex_s = "extern crate foo as bar;"; + let vitem = string_to_item(ex_s.to_string()).unwrap(); + let vitem_s = item_to_string(&vitem); + assert_eq!(&vitem_s[..], ex_s); + }) +} + +fn get_spans_of_pat_idents(src: &str) -> Vec<Span> { + let item = string_to_item(src.to_string()).unwrap(); + + struct PatIdentVisitor { + spans: Vec<Span>, + } + impl<'a> visit::Visitor<'a> for PatIdentVisitor { + fn visit_pat(&mut self, p: &'a ast::Pat) { + match p.kind { + PatKind::Ident(_, ref ident, _) => { + self.spans.push(ident.span.clone()); + } + _ => { + visit::walk_pat(self, p); + } + } + } + } + let mut v = PatIdentVisitor { spans: Vec::new() }; + visit::walk_item(&mut v, &item); + return v.spans; +} + +#[test] +fn span_of_self_arg_pat_idents_are_correct() { + create_default_session_globals_then(|| { + let srcs = [ + "impl z { fn a (&self, &myarg: i32) {} }", + "impl z { fn a (&mut self, &myarg: i32) {} }", + "impl z { fn a (&'a self, &myarg: i32) {} }", + "impl z { fn a (self, &myarg: i32) {} }", + "impl z { fn a (self: Foo, &myarg: i32) {} }", + ]; + + for src in srcs { + let spans = get_spans_of_pat_idents(src); + let (lo, hi) = (spans[0].lo(), spans[0].hi()); + assert!( + "self" == &src[lo.to_usize()..hi.to_usize()], + "\"{}\" != \"self\". src=\"{}\"", + &src[lo.to_usize()..hi.to_usize()], + src + ) + } + }) +} + +#[test] +fn parse_exprs() { + create_default_session_globals_then(|| { + // just make sure that they parse.... + string_to_expr("3 + 4".to_string()); + string_to_expr("a::z.froob(b,&(987+3))".to_string()); + }) +} + +#[test] +fn attrs_fix_bug() { + create_default_session_globals_then(|| { + string_to_item( + "pub fn mk_file_writer(path: &Path, flags: &[FileFlag]) + -> Result<Box<Writer>, String> { +#[cfg(windows)] +fn wb() -> c_int { + (O_WRONLY | libc::consts::os::extra::O_BINARY) as c_int +} + +#[cfg(unix)] +fn wb() -> c_int { O_WRONLY as c_int } + +let mut fflags: c_int = wb(); +}" + .to_string(), + ); + }) +} + +#[test] +fn crlf_doc_comments() { + create_default_session_globals_then(|| { + let sess = sess(); + + let name_1 = FileName::Custom("crlf_source_1".to_string()); + let source = "/// doc comment\r\nfn foo() {}".to_string(); + let item = parse_item_from_source_str(name_1, source, &sess).unwrap().unwrap(); + let doc = item.attrs.iter().filter_map(|at| at.doc_str()).next().unwrap(); + assert_eq!(doc.as_str(), " doc comment"); + + let name_2 = FileName::Custom("crlf_source_2".to_string()); + let source = "/// doc comment\r\n/// line 2\r\nfn foo() {}".to_string(); + let item = parse_item_from_source_str(name_2, source, &sess).unwrap().unwrap(); + let docs = item.attrs.iter().filter_map(|at| at.doc_str()).collect::<Vec<_>>(); + let b: &[_] = &[Symbol::intern(" doc comment"), Symbol::intern(" line 2")]; + assert_eq!(&docs[..], b); + + let name_3 = FileName::Custom("clrf_source_3".to_string()); + let source = "/** doc comment\r\n * with CRLF */\r\nfn foo() {}".to_string(); + let item = parse_item_from_source_str(name_3, source, &sess).unwrap().unwrap(); + let doc = item.attrs.iter().filter_map(|at| at.doc_str()).next().unwrap(); + assert_eq!(doc.as_str(), " doc comment\n * with CRLF "); + }); +} + +#[test] +fn ttdelim_span() { + fn parse_expr_from_source_str( + name: FileName, + source: String, + sess: &ParseSess, + ) -> PResult<'_, P<ast::Expr>> { + new_parser_from_source_str(sess, name, source).parse_expr() + } + + create_default_session_globals_then(|| { + let sess = sess(); + let expr = parse_expr_from_source_str( + PathBuf::from("foo").into(), + "foo!( fn main() { body } )".to_string(), + &sess, + ) + .unwrap(); + + let tts: Vec<_> = match expr.kind { + ast::ExprKind::MacCall(ref mac) => mac.args.inner_tokens().into_trees().collect(), + _ => panic!("not a macro"), + }; + + let span = tts.iter().rev().next().unwrap().span(); + + match sess.source_map().span_to_snippet(span) { + Ok(s) => assert_eq!(&s[..], "{ body }"), + Err(_) => panic!("could not get snippet"), + } + }); +} + +// This tests that when parsing a string (rather than a file) we don't try +// and read in a file for a module declaration and just parse a stub. +// See `recurse_into_file_modules` in the parser. +#[test] +fn out_of_line_mod() { + create_default_session_globals_then(|| { + let item = parse_item_from_source_str( + PathBuf::from("foo").into(), + "mod foo { struct S; mod this_does_not_exist; }".to_owned(), + &sess(), + ) + .unwrap() + .unwrap(); + + if let ast::ItemKind::Mod(_, ref mod_kind) = item.kind { + assert!(matches!(mod_kind, ast::ModKind::Loaded(items, ..) if items.len() == 2)); + } else { + panic!(); + } + }); +} + +#[test] +fn eqmodws() { + assert_eq!(matches_codepattern("", ""), true); + assert_eq!(matches_codepattern("", "a"), false); + assert_eq!(matches_codepattern("a", ""), false); + assert_eq!(matches_codepattern("a", "a"), true); + assert_eq!(matches_codepattern("a b", "a \n\t\r b"), true); + assert_eq!(matches_codepattern("a b ", "a \n\t\r b"), true); + assert_eq!(matches_codepattern("a b", "a \n\t\r b "), false); + assert_eq!(matches_codepattern("a b", "a b"), true); + assert_eq!(matches_codepattern("ab", "a b"), false); + assert_eq!(matches_codepattern("a b", "ab"), true); + assert_eq!(matches_codepattern(" a b", "ab"), true); +} + +#[test] +fn pattern_whitespace() { + assert_eq!(matches_codepattern("", "\x0C"), false); + assert_eq!(matches_codepattern("a b ", "a \u{0085}\n\t\r b"), true); + assert_eq!(matches_codepattern("a b", "a \u{0085}\n\t\r b "), false); +} + +#[test] +fn non_pattern_whitespace() { + // These have the property 'White_Space' but not 'Pattern_White_Space' + assert_eq!(matches_codepattern("a b", "a\u{2002}b"), false); + assert_eq!(matches_codepattern("a b", "a\u{2002}b"), false); + assert_eq!(matches_codepattern("\u{205F}a b", "ab"), false); + assert_eq!(matches_codepattern("a \u{3000}b", "ab"), false); +} diff --git a/compiler/rustc_expand/src/placeholders.rs b/compiler/rustc_expand/src/placeholders.rs new file mode 100644 index 000000000..0d5d6ee07 --- /dev/null +++ b/compiler/rustc_expand/src/placeholders.rs @@ -0,0 +1,373 @@ +use crate::expand::{AstFragment, AstFragmentKind}; + +use rustc_ast as ast; +use rustc_ast::mut_visit::*; +use rustc_ast::ptr::P; +use rustc_span::source_map::DUMMY_SP; +use rustc_span::symbol::Ident; + +use smallvec::{smallvec, SmallVec}; + +use rustc_data_structures::fx::FxHashMap; + +pub fn placeholder( + kind: AstFragmentKind, + id: ast::NodeId, + vis: Option<ast::Visibility>, +) -> AstFragment { + fn mac_placeholder() -> ast::MacCall { + ast::MacCall { + path: ast::Path { span: DUMMY_SP, segments: Vec::new(), tokens: None }, + args: P(ast::MacArgs::Empty), + prior_type_ascription: None, + } + } + + let ident = Ident::empty(); + let attrs = Vec::new(); + let vis = vis.unwrap_or(ast::Visibility { + span: DUMMY_SP, + kind: ast::VisibilityKind::Inherited, + tokens: None, + }); + let span = DUMMY_SP; + let expr_placeholder = || { + P(ast::Expr { + id, + span, + attrs: ast::AttrVec::new(), + kind: ast::ExprKind::MacCall(mac_placeholder()), + tokens: None, + }) + }; + let ty = + || P(ast::Ty { id, kind: ast::TyKind::MacCall(mac_placeholder()), span, tokens: None }); + let pat = + || P(ast::Pat { id, kind: ast::PatKind::MacCall(mac_placeholder()), span, tokens: None }); + + match kind { + AstFragmentKind::Crate => AstFragment::Crate(ast::Crate { + attrs: Default::default(), + items: Default::default(), + spans: ast::ModSpans { inner_span: span, ..Default::default() }, + id, + is_placeholder: true, + }), + AstFragmentKind::Expr => AstFragment::Expr(expr_placeholder()), + AstFragmentKind::OptExpr => AstFragment::OptExpr(Some(expr_placeholder())), + AstFragmentKind::Items => AstFragment::Items(smallvec![P(ast::Item { + id, + span, + ident, + vis, + attrs, + kind: ast::ItemKind::MacCall(mac_placeholder()), + tokens: None, + })]), + AstFragmentKind::TraitItems => AstFragment::TraitItems(smallvec![P(ast::AssocItem { + id, + span, + ident, + vis, + attrs, + kind: ast::AssocItemKind::MacCall(mac_placeholder()), + tokens: None, + })]), + AstFragmentKind::ImplItems => AstFragment::ImplItems(smallvec![P(ast::AssocItem { + id, + span, + ident, + vis, + attrs, + kind: ast::AssocItemKind::MacCall(mac_placeholder()), + tokens: None, + })]), + AstFragmentKind::ForeignItems => { + AstFragment::ForeignItems(smallvec![P(ast::ForeignItem { + id, + span, + ident, + vis, + attrs, + kind: ast::ForeignItemKind::MacCall(mac_placeholder()), + tokens: None, + })]) + } + AstFragmentKind::Pat => AstFragment::Pat(P(ast::Pat { + id, + span, + kind: ast::PatKind::MacCall(mac_placeholder()), + tokens: None, + })), + AstFragmentKind::Ty => AstFragment::Ty(P(ast::Ty { + id, + span, + kind: ast::TyKind::MacCall(mac_placeholder()), + tokens: None, + })), + AstFragmentKind::Stmts => AstFragment::Stmts(smallvec![{ + let mac = P(ast::MacCallStmt { + mac: mac_placeholder(), + style: ast::MacStmtStyle::Braces, + attrs: ast::AttrVec::new(), + tokens: None, + }); + ast::Stmt { id, span, kind: ast::StmtKind::MacCall(mac) } + }]), + AstFragmentKind::Arms => AstFragment::Arms(smallvec![ast::Arm { + attrs: Default::default(), + body: expr_placeholder(), + guard: None, + id, + pat: pat(), + span, + is_placeholder: true, + }]), + AstFragmentKind::ExprFields => AstFragment::ExprFields(smallvec![ast::ExprField { + attrs: Default::default(), + expr: expr_placeholder(), + id, + ident, + is_shorthand: false, + span, + is_placeholder: true, + }]), + AstFragmentKind::PatFields => AstFragment::PatFields(smallvec![ast::PatField { + attrs: Default::default(), + id, + ident, + is_shorthand: false, + pat: pat(), + span, + is_placeholder: true, + }]), + AstFragmentKind::GenericParams => AstFragment::GenericParams(smallvec![{ + ast::GenericParam { + attrs: Default::default(), + bounds: Default::default(), + id, + ident, + is_placeholder: true, + kind: ast::GenericParamKind::Lifetime, + colon_span: None, + } + }]), + AstFragmentKind::Params => AstFragment::Params(smallvec![ast::Param { + attrs: Default::default(), + id, + pat: pat(), + span, + ty: ty(), + is_placeholder: true, + }]), + AstFragmentKind::FieldDefs => AstFragment::FieldDefs(smallvec![ast::FieldDef { + attrs: Default::default(), + id, + ident: None, + span, + ty: ty(), + vis, + is_placeholder: true, + }]), + AstFragmentKind::Variants => AstFragment::Variants(smallvec![ast::Variant { + attrs: Default::default(), + data: ast::VariantData::Struct(Default::default(), false), + disr_expr: None, + id, + ident, + span, + vis, + is_placeholder: true, + }]), + } +} + +#[derive(Default)] +pub struct PlaceholderExpander { + expanded_fragments: FxHashMap<ast::NodeId, AstFragment>, +} + +impl PlaceholderExpander { + pub fn add(&mut self, id: ast::NodeId, mut fragment: AstFragment) { + fragment.mut_visit_with(self); + self.expanded_fragments.insert(id, fragment); + } + + fn remove(&mut self, id: ast::NodeId) -> AstFragment { + self.expanded_fragments.remove(&id).unwrap() + } +} + +impl MutVisitor for PlaceholderExpander { + fn flat_map_arm(&mut self, arm: ast::Arm) -> SmallVec<[ast::Arm; 1]> { + if arm.is_placeholder { + self.remove(arm.id).make_arms() + } else { + noop_flat_map_arm(arm, self) + } + } + + fn flat_map_expr_field(&mut self, field: ast::ExprField) -> SmallVec<[ast::ExprField; 1]> { + if field.is_placeholder { + self.remove(field.id).make_expr_fields() + } else { + noop_flat_map_expr_field(field, self) + } + } + + fn flat_map_pat_field(&mut self, fp: ast::PatField) -> SmallVec<[ast::PatField; 1]> { + if fp.is_placeholder { + self.remove(fp.id).make_pat_fields() + } else { + noop_flat_map_pat_field(fp, self) + } + } + + fn flat_map_generic_param( + &mut self, + param: ast::GenericParam, + ) -> SmallVec<[ast::GenericParam; 1]> { + if param.is_placeholder { + self.remove(param.id).make_generic_params() + } else { + noop_flat_map_generic_param(param, self) + } + } + + fn flat_map_param(&mut self, p: ast::Param) -> SmallVec<[ast::Param; 1]> { + if p.is_placeholder { + self.remove(p.id).make_params() + } else { + noop_flat_map_param(p, self) + } + } + + fn flat_map_field_def(&mut self, sf: ast::FieldDef) -> SmallVec<[ast::FieldDef; 1]> { + if sf.is_placeholder { + self.remove(sf.id).make_field_defs() + } else { + noop_flat_map_field_def(sf, self) + } + } + + fn flat_map_variant(&mut self, variant: ast::Variant) -> SmallVec<[ast::Variant; 1]> { + if variant.is_placeholder { + self.remove(variant.id).make_variants() + } else { + noop_flat_map_variant(variant, self) + } + } + + fn flat_map_item(&mut self, item: P<ast::Item>) -> SmallVec<[P<ast::Item>; 1]> { + match item.kind { + ast::ItemKind::MacCall(_) => self.remove(item.id).make_items(), + _ => noop_flat_map_item(item, self), + } + } + + fn flat_map_trait_item(&mut self, item: P<ast::AssocItem>) -> SmallVec<[P<ast::AssocItem>; 1]> { + match item.kind { + ast::AssocItemKind::MacCall(_) => self.remove(item.id).make_trait_items(), + _ => noop_flat_map_assoc_item(item, self), + } + } + + fn flat_map_impl_item(&mut self, item: P<ast::AssocItem>) -> SmallVec<[P<ast::AssocItem>; 1]> { + match item.kind { + ast::AssocItemKind::MacCall(_) => self.remove(item.id).make_impl_items(), + _ => noop_flat_map_assoc_item(item, self), + } + } + + fn flat_map_foreign_item( + &mut self, + item: P<ast::ForeignItem>, + ) -> SmallVec<[P<ast::ForeignItem>; 1]> { + match item.kind { + ast::ForeignItemKind::MacCall(_) => self.remove(item.id).make_foreign_items(), + _ => noop_flat_map_foreign_item(item, self), + } + } + + fn visit_expr(&mut self, expr: &mut P<ast::Expr>) { + match expr.kind { + ast::ExprKind::MacCall(_) => *expr = self.remove(expr.id).make_expr(), + _ => noop_visit_expr(expr, self), + } + } + + fn filter_map_expr(&mut self, expr: P<ast::Expr>) -> Option<P<ast::Expr>> { + match expr.kind { + ast::ExprKind::MacCall(_) => self.remove(expr.id).make_opt_expr(), + _ => noop_filter_map_expr(expr, self), + } + } + + fn flat_map_stmt(&mut self, stmt: ast::Stmt) -> SmallVec<[ast::Stmt; 1]> { + let (style, mut stmts) = match stmt.kind { + ast::StmtKind::MacCall(mac) => (mac.style, self.remove(stmt.id).make_stmts()), + _ => return noop_flat_map_stmt(stmt, self), + }; + + if style == ast::MacStmtStyle::Semicolon { + // Implement the proposal described in + // https://github.com/rust-lang/rust/issues/61733#issuecomment-509626449 + // + // The macro invocation expands to the list of statements. If the + // list of statements is empty, then 'parse' the trailing semicolon + // on the original invocation as an empty statement. That is: + // + // `empty();` is parsed as a single `StmtKind::Empty` + // + // If the list of statements is non-empty, see if the final + // statement already has a trailing semicolon. + // + // If it doesn't have a semicolon, then 'parse' the trailing + // semicolon from the invocation as part of the final statement, + // using `stmt.add_trailing_semicolon()` + // + // If it does have a semicolon, then 'parse' the trailing semicolon + // from the invocation as a new StmtKind::Empty + + // FIXME: We will need to preserve the original semicolon token and + // span as part of #15701 + let empty_stmt = + ast::Stmt { id: ast::DUMMY_NODE_ID, kind: ast::StmtKind::Empty, span: DUMMY_SP }; + + if let Some(stmt) = stmts.pop() { + if stmt.has_trailing_semicolon() { + stmts.push(stmt); + stmts.push(empty_stmt); + } else { + stmts.push(stmt.add_trailing_semicolon()); + } + } else { + stmts.push(empty_stmt); + } + } + + stmts + } + + fn visit_pat(&mut self, pat: &mut P<ast::Pat>) { + match pat.kind { + ast::PatKind::MacCall(_) => *pat = self.remove(pat.id).make_pat(), + _ => noop_visit_pat(pat, self), + } + } + + fn visit_ty(&mut self, ty: &mut P<ast::Ty>) { + match ty.kind { + ast::TyKind::MacCall(_) => *ty = self.remove(ty.id).make_ty(), + _ => noop_visit_ty(ty, self), + } + } + + fn visit_crate(&mut self, krate: &mut ast::Crate) { + if krate.is_placeholder { + *krate = self.remove(krate.id).make_crate(); + } else { + noop_visit_crate(krate, self) + } + } +} diff --git a/compiler/rustc_expand/src/proc_macro.rs b/compiler/rustc_expand/src/proc_macro.rs new file mode 100644 index 000000000..1a2ab9d19 --- /dev/null +++ b/compiler/rustc_expand/src/proc_macro.rs @@ -0,0 +1,181 @@ +use crate::base::{self, *}; +use crate::proc_macro_server; + +use rustc_ast as ast; +use rustc_ast::ptr::P; +use rustc_ast::token; +use rustc_ast::tokenstream::TokenStream; +use rustc_data_structures::sync::Lrc; +use rustc_errors::ErrorGuaranteed; +use rustc_parse::parser::ForceCollect; +use rustc_session::config::ProcMacroExecutionStrategy; +use rustc_span::profiling::SpannedEventArgRecorder; +use rustc_span::{Span, DUMMY_SP}; + +struct CrossbeamMessagePipe<T> { + tx: crossbeam_channel::Sender<T>, + rx: crossbeam_channel::Receiver<T>, +} + +impl<T> pm::bridge::server::MessagePipe<T> for CrossbeamMessagePipe<T> { + fn new() -> (Self, Self) { + let (tx1, rx1) = crossbeam_channel::bounded(1); + let (tx2, rx2) = crossbeam_channel::bounded(1); + (CrossbeamMessagePipe { tx: tx1, rx: rx2 }, CrossbeamMessagePipe { tx: tx2, rx: rx1 }) + } + + fn send(&mut self, value: T) { + self.tx.send(value).unwrap(); + } + + fn recv(&mut self) -> Option<T> { + self.rx.recv().ok() + } +} + +fn exec_strategy(ecx: &ExtCtxt<'_>) -> impl pm::bridge::server::ExecutionStrategy { + pm::bridge::server::MaybeCrossThread::<CrossbeamMessagePipe<_>>::new( + ecx.sess.opts.unstable_opts.proc_macro_execution_strategy + == ProcMacroExecutionStrategy::CrossThread, + ) +} + +pub struct BangProcMacro { + pub client: pm::bridge::client::Client<pm::TokenStream, pm::TokenStream>, +} + +impl base::BangProcMacro for BangProcMacro { + fn expand<'cx>( + &self, + ecx: &'cx mut ExtCtxt<'_>, + span: Span, + input: TokenStream, + ) -> Result<TokenStream, ErrorGuaranteed> { + let _timer = + ecx.sess.prof.generic_activity_with_arg_recorder("expand_proc_macro", |recorder| { + recorder.record_arg_with_span(ecx.expansion_descr(), span); + }); + + let proc_macro_backtrace = ecx.ecfg.proc_macro_backtrace; + let strategy = exec_strategy(ecx); + let server = proc_macro_server::Rustc::new(ecx); + self.client.run(&strategy, server, input, proc_macro_backtrace).map_err(|e| { + let mut err = ecx.struct_span_err(span, "proc macro panicked"); + if let Some(s) = e.as_str() { + err.help(&format!("message: {}", s)); + } + err.emit() + }) + } +} + +pub struct AttrProcMacro { + pub client: pm::bridge::client::Client<(pm::TokenStream, pm::TokenStream), pm::TokenStream>, +} + +impl base::AttrProcMacro for AttrProcMacro { + fn expand<'cx>( + &self, + ecx: &'cx mut ExtCtxt<'_>, + span: Span, + annotation: TokenStream, + annotated: TokenStream, + ) -> Result<TokenStream, ErrorGuaranteed> { + let _timer = + ecx.sess.prof.generic_activity_with_arg_recorder("expand_proc_macro", |recorder| { + recorder.record_arg_with_span(ecx.expansion_descr(), span); + }); + + let proc_macro_backtrace = ecx.ecfg.proc_macro_backtrace; + let strategy = exec_strategy(ecx); + let server = proc_macro_server::Rustc::new(ecx); + self.client.run(&strategy, server, annotation, annotated, proc_macro_backtrace).map_err( + |e| { + let mut err = ecx.struct_span_err(span, "custom attribute panicked"); + if let Some(s) = e.as_str() { + err.help(&format!("message: {}", s)); + } + err.emit() + }, + ) + } +} + +pub struct DeriveProcMacro { + pub client: pm::bridge::client::Client<pm::TokenStream, pm::TokenStream>, +} + +impl MultiItemModifier for DeriveProcMacro { + fn expand( + &self, + ecx: &mut ExtCtxt<'_>, + span: Span, + _meta_item: &ast::MetaItem, + item: Annotatable, + ) -> ExpandResult<Vec<Annotatable>, Annotatable> { + // We need special handling for statement items + // (e.g. `fn foo() { #[derive(Debug)] struct Bar; }`) + let is_stmt = matches!(item, Annotatable::Stmt(..)); + let hack = crate::base::ann_pretty_printing_compatibility_hack(&item, &ecx.sess.parse_sess); + let input = if hack { + let nt = match item { + Annotatable::Item(item) => token::NtItem(item), + Annotatable::Stmt(stmt) => token::NtStmt(stmt), + _ => unreachable!(), + }; + TokenStream::token_alone(token::Interpolated(Lrc::new(nt)), DUMMY_SP) + } else { + item.to_tokens() + }; + + let stream = { + let _timer = + ecx.sess.prof.generic_activity_with_arg_recorder("expand_proc_macro", |recorder| { + recorder.record_arg_with_span(ecx.expansion_descr(), span); + }); + let proc_macro_backtrace = ecx.ecfg.proc_macro_backtrace; + let strategy = exec_strategy(ecx); + let server = proc_macro_server::Rustc::new(ecx); + match self.client.run(&strategy, server, input, proc_macro_backtrace) { + Ok(stream) => stream, + Err(e) => { + let mut err = ecx.struct_span_err(span, "proc-macro derive panicked"); + if let Some(s) = e.as_str() { + err.help(&format!("message: {}", s)); + } + err.emit(); + return ExpandResult::Ready(vec![]); + } + } + }; + + let error_count_before = ecx.sess.parse_sess.span_diagnostic.err_count(); + let mut parser = + rustc_parse::stream_to_parser(&ecx.sess.parse_sess, stream, Some("proc-macro derive")); + let mut items = vec![]; + + loop { + match parser.parse_item(ForceCollect::No) { + Ok(None) => break, + Ok(Some(item)) => { + if is_stmt { + items.push(Annotatable::Stmt(P(ecx.stmt_item(span, item)))); + } else { + items.push(Annotatable::Item(item)); + } + } + Err(mut err) => { + err.emit(); + break; + } + } + } + + // fail if there have been errors emitted + if ecx.sess.parse_sess.span_diagnostic.err_count() > error_count_before { + ecx.struct_span_err(span, "proc-macro derive produced unparseable tokens").emit(); + } + + ExpandResult::Ready(items) + } +} diff --git a/compiler/rustc_expand/src/proc_macro_server.rs b/compiler/rustc_expand/src/proc_macro_server.rs new file mode 100644 index 000000000..7d9a4aed0 --- /dev/null +++ b/compiler/rustc_expand/src/proc_macro_server.rs @@ -0,0 +1,766 @@ +use crate::base::ExtCtxt; + +use rustc_ast as ast; +use rustc_ast::token; +use rustc_ast::tokenstream::{self, Spacing::*, TokenStream}; +use rustc_ast_pretty::pprust; +use rustc_data_structures::fx::FxHashMap; +use rustc_data_structures::sync::Lrc; +use rustc_errors::{Diagnostic, MultiSpan, PResult}; +use rustc_parse::lexer::nfc_normalize; +use rustc_parse::parse_stream_from_source_str; +use rustc_session::parse::ParseSess; +use rustc_span::def_id::CrateNum; +use rustc_span::symbol::{self, sym, Symbol}; +use rustc_span::{BytePos, FileName, Pos, SourceFile, Span}; + +use pm::bridge::{ + server, DelimSpan, ExpnGlobals, Group, Ident, LitKind, Literal, Punct, TokenTree, +}; +use pm::{Delimiter, Level, LineColumn}; +use std::ops::Bound; + +trait FromInternal<T> { + fn from_internal(x: T) -> Self; +} + +trait ToInternal<T> { + fn to_internal(self) -> T; +} + +impl FromInternal<token::Delimiter> for Delimiter { + fn from_internal(delim: token::Delimiter) -> Delimiter { + match delim { + token::Delimiter::Parenthesis => Delimiter::Parenthesis, + token::Delimiter::Brace => Delimiter::Brace, + token::Delimiter::Bracket => Delimiter::Bracket, + token::Delimiter::Invisible => Delimiter::None, + } + } +} + +impl ToInternal<token::Delimiter> for Delimiter { + fn to_internal(self) -> token::Delimiter { + match self { + Delimiter::Parenthesis => token::Delimiter::Parenthesis, + Delimiter::Brace => token::Delimiter::Brace, + Delimiter::Bracket => token::Delimiter::Bracket, + Delimiter::None => token::Delimiter::Invisible, + } + } +} + +impl FromInternal<token::LitKind> for LitKind { + fn from_internal(kind: token::LitKind) -> Self { + match kind { + token::Byte => LitKind::Byte, + token::Char => LitKind::Char, + token::Integer => LitKind::Integer, + token::Float => LitKind::Float, + token::Str => LitKind::Str, + token::StrRaw(n) => LitKind::StrRaw(n), + token::ByteStr => LitKind::ByteStr, + token::ByteStrRaw(n) => LitKind::ByteStrRaw(n), + token::Err => LitKind::Err, + token::Bool => unreachable!(), + } + } +} + +impl ToInternal<token::LitKind> for LitKind { + fn to_internal(self) -> token::LitKind { + match self { + LitKind::Byte => token::Byte, + LitKind::Char => token::Char, + LitKind::Integer => token::Integer, + LitKind::Float => token::Float, + LitKind::Str => token::Str, + LitKind::StrRaw(n) => token::StrRaw(n), + LitKind::ByteStr => token::ByteStr, + LitKind::ByteStrRaw(n) => token::ByteStrRaw(n), + LitKind::Err => token::Err, + } + } +} + +impl FromInternal<(TokenStream, &mut Rustc<'_, '_>)> for Vec<TokenTree<TokenStream, Span, Symbol>> { + fn from_internal((stream, rustc): (TokenStream, &mut Rustc<'_, '_>)) -> Self { + use rustc_ast::token::*; + + // Estimate the capacity as `stream.len()` rounded up to the next power + // of two to limit the number of required reallocations. + let mut trees = Vec::with_capacity(stream.len().next_power_of_two()); + let mut cursor = stream.into_trees(); + + while let Some(tree) = cursor.next() { + let (Token { kind, span }, joint) = match tree { + tokenstream::TokenTree::Delimited(span, delim, tts) => { + let delimiter = pm::Delimiter::from_internal(delim); + trees.push(TokenTree::Group(Group { + delimiter, + stream: Some(tts), + span: DelimSpan { + open: span.open, + close: span.close, + entire: span.entire(), + }, + })); + continue; + } + tokenstream::TokenTree::Token(token, spacing) => (token, spacing == Joint), + }; + + let mut op = |s: &str| { + assert!(s.is_ascii()); + trees.extend(s.as_bytes().iter().enumerate().map(|(idx, &ch)| { + TokenTree::Punct(Punct { ch, joint: joint || idx != s.len() - 1, span }) + })); + }; + + match kind { + Eq => op("="), + Lt => op("<"), + Le => op("<="), + EqEq => op("=="), + Ne => op("!="), + Ge => op(">="), + Gt => op(">"), + AndAnd => op("&&"), + OrOr => op("||"), + Not => op("!"), + Tilde => op("~"), + BinOp(Plus) => op("+"), + BinOp(Minus) => op("-"), + BinOp(Star) => op("*"), + BinOp(Slash) => op("/"), + BinOp(Percent) => op("%"), + BinOp(Caret) => op("^"), + BinOp(And) => op("&"), + BinOp(Or) => op("|"), + BinOp(Shl) => op("<<"), + BinOp(Shr) => op(">>"), + BinOpEq(Plus) => op("+="), + BinOpEq(Minus) => op("-="), + BinOpEq(Star) => op("*="), + BinOpEq(Slash) => op("/="), + BinOpEq(Percent) => op("%="), + BinOpEq(Caret) => op("^="), + BinOpEq(And) => op("&="), + BinOpEq(Or) => op("|="), + BinOpEq(Shl) => op("<<="), + BinOpEq(Shr) => op(">>="), + At => op("@"), + Dot => op("."), + DotDot => op(".."), + DotDotDot => op("..."), + DotDotEq => op("..="), + Comma => op(","), + Semi => op(";"), + Colon => op(":"), + ModSep => op("::"), + RArrow => op("->"), + LArrow => op("<-"), + FatArrow => op("=>"), + Pound => op("#"), + Dollar => op("$"), + Question => op("?"), + SingleQuote => op("'"), + + Ident(sym, is_raw) => trees.push(TokenTree::Ident(Ident { sym, is_raw, span })), + Lifetime(name) => { + let ident = symbol::Ident::new(name, span).without_first_quote(); + trees.extend([ + TokenTree::Punct(Punct { ch: b'\'', joint: true, span }), + TokenTree::Ident(Ident { sym: ident.name, is_raw: false, span }), + ]); + } + Literal(token::Lit { kind, symbol, suffix }) => { + trees.push(TokenTree::Literal(self::Literal { + kind: FromInternal::from_internal(kind), + symbol, + suffix, + span, + })); + } + DocComment(_, attr_style, data) => { + let mut escaped = String::new(); + for ch in data.as_str().chars() { + escaped.extend(ch.escape_debug()); + } + let stream = [ + Ident(sym::doc, false), + Eq, + TokenKind::lit(token::Str, Symbol::intern(&escaped), None), + ] + .into_iter() + .map(|kind| tokenstream::TokenTree::token_alone(kind, span)) + .collect(); + trees.push(TokenTree::Punct(Punct { ch: b'#', joint: false, span })); + if attr_style == ast::AttrStyle::Inner { + trees.push(TokenTree::Punct(Punct { ch: b'!', joint: false, span })); + } + trees.push(TokenTree::Group(Group { + delimiter: pm::Delimiter::Bracket, + stream: Some(stream), + span: DelimSpan::from_single(span), + })); + } + + Interpolated(nt) if let NtIdent(ident, is_raw) = *nt => { + trees.push(TokenTree::Ident(Ident { sym: ident.name, is_raw, span: ident.span })) + } + + Interpolated(nt) => { + let stream = TokenStream::from_nonterminal_ast(&nt); + // A hack used to pass AST fragments to attribute and derive + // macros as a single nonterminal token instead of a token + // stream. Such token needs to be "unwrapped" and not + // represented as a delimited group. + // FIXME: It needs to be removed, but there are some + // compatibility issues (see #73345). + if crate::base::nt_pretty_printing_compatibility_hack(&nt, rustc.sess()) { + trees.extend(Self::from_internal((stream, rustc))); + } else { + trees.push(TokenTree::Group(Group { + delimiter: pm::Delimiter::None, + stream: Some(stream), + span: DelimSpan::from_single(span), + })) + } + } + + OpenDelim(..) | CloseDelim(..) => unreachable!(), + Eof => unreachable!(), + } + } + trees + } +} + +impl ToInternal<TokenStream> for (TokenTree<TokenStream, Span, Symbol>, &mut Rustc<'_, '_>) { + fn to_internal(self) -> TokenStream { + use rustc_ast::token::*; + + let (tree, rustc) = self; + let (ch, joint, span) = match tree { + TokenTree::Punct(Punct { ch, joint, span }) => (ch, joint, span), + TokenTree::Group(Group { delimiter, stream, span: DelimSpan { open, close, .. } }) => { + return tokenstream::TokenStream::delimited( + tokenstream::DelimSpan { open, close }, + delimiter.to_internal(), + stream.unwrap_or_default(), + ); + } + TokenTree::Ident(self::Ident { sym, is_raw, span }) => { + rustc.sess().symbol_gallery.insert(sym, span); + return tokenstream::TokenStream::token_alone(Ident(sym, is_raw), span); + } + TokenTree::Literal(self::Literal { + kind: self::LitKind::Integer, + symbol, + suffix, + span, + }) if symbol.as_str().starts_with('-') => { + let minus = BinOp(BinOpToken::Minus); + let symbol = Symbol::intern(&symbol.as_str()[1..]); + let integer = TokenKind::lit(token::Integer, symbol, suffix); + let a = tokenstream::TokenTree::token_alone(minus, span); + let b = tokenstream::TokenTree::token_alone(integer, span); + return [a, b].into_iter().collect(); + } + TokenTree::Literal(self::Literal { + kind: self::LitKind::Float, + symbol, + suffix, + span, + }) if symbol.as_str().starts_with('-') => { + let minus = BinOp(BinOpToken::Minus); + let symbol = Symbol::intern(&symbol.as_str()[1..]); + let float = TokenKind::lit(token::Float, symbol, suffix); + let a = tokenstream::TokenTree::token_alone(minus, span); + let b = tokenstream::TokenTree::token_alone(float, span); + return [a, b].into_iter().collect(); + } + TokenTree::Literal(self::Literal { kind, symbol, suffix, span }) => { + return tokenstream::TokenStream::token_alone( + TokenKind::lit(kind.to_internal(), symbol, suffix), + span, + ); + } + }; + + let kind = match ch { + b'=' => Eq, + b'<' => Lt, + b'>' => Gt, + b'!' => Not, + b'~' => Tilde, + b'+' => BinOp(Plus), + b'-' => BinOp(Minus), + b'*' => BinOp(Star), + b'/' => BinOp(Slash), + b'%' => BinOp(Percent), + b'^' => BinOp(Caret), + b'&' => BinOp(And), + b'|' => BinOp(Or), + b'@' => At, + b'.' => Dot, + b',' => Comma, + b';' => Semi, + b':' => Colon, + b'#' => Pound, + b'$' => Dollar, + b'?' => Question, + b'\'' => SingleQuote, + _ => unreachable!(), + }; + + if joint { + tokenstream::TokenStream::token_joint(kind, span) + } else { + tokenstream::TokenStream::token_alone(kind, span) + } + } +} + +impl ToInternal<rustc_errors::Level> for Level { + fn to_internal(self) -> rustc_errors::Level { + match self { + Level::Error => rustc_errors::Level::Error { lint: false }, + Level::Warning => rustc_errors::Level::Warning(None), + Level::Note => rustc_errors::Level::Note, + Level::Help => rustc_errors::Level::Help, + _ => unreachable!("unknown proc_macro::Level variant: {:?}", self), + } + } +} + +pub struct FreeFunctions; + +pub(crate) struct Rustc<'a, 'b> { + ecx: &'a mut ExtCtxt<'b>, + def_site: Span, + call_site: Span, + mixed_site: Span, + krate: CrateNum, + rebased_spans: FxHashMap<usize, Span>, +} + +impl<'a, 'b> Rustc<'a, 'b> { + pub fn new(ecx: &'a mut ExtCtxt<'b>) -> Self { + let expn_data = ecx.current_expansion.id.expn_data(); + Rustc { + def_site: ecx.with_def_site_ctxt(expn_data.def_site), + call_site: ecx.with_call_site_ctxt(expn_data.call_site), + mixed_site: ecx.with_mixed_site_ctxt(expn_data.call_site), + krate: expn_data.macro_def_id.unwrap().krate, + rebased_spans: FxHashMap::default(), + ecx, + } + } + + fn sess(&self) -> &ParseSess { + self.ecx.parse_sess() + } +} + +impl server::Types for Rustc<'_, '_> { + type FreeFunctions = FreeFunctions; + type TokenStream = TokenStream; + type SourceFile = Lrc<SourceFile>; + type MultiSpan = Vec<Span>; + type Diagnostic = Diagnostic; + type Span = Span; + type Symbol = Symbol; +} + +impl server::FreeFunctions for Rustc<'_, '_> { + fn track_env_var(&mut self, var: &str, value: Option<&str>) { + self.sess() + .env_depinfo + .borrow_mut() + .insert((Symbol::intern(var), value.map(Symbol::intern))); + } + + fn track_path(&mut self, path: &str) { + self.sess().file_depinfo.borrow_mut().insert(Symbol::intern(path)); + } + + fn literal_from_str(&mut self, s: &str) -> Result<Literal<Self::Span, Self::Symbol>, ()> { + let name = FileName::proc_macro_source_code(s); + let mut parser = rustc_parse::new_parser_from_source_str(self.sess(), name, s.to_owned()); + + let first_span = parser.token.span.data(); + let minus_present = parser.eat(&token::BinOp(token::Minus)); + + let lit_span = parser.token.span.data(); + let token::Literal(mut lit) = parser.token.kind else { + return Err(()); + }; + + // Check no comment or whitespace surrounding the (possibly negative) + // literal, or more tokens after it. + if (lit_span.hi.0 - first_span.lo.0) as usize != s.len() { + return Err(()); + } + + if minus_present { + // If minus is present, check no comment or whitespace in between it + // and the literal token. + if first_span.hi.0 != lit_span.lo.0 { + return Err(()); + } + + // Check literal is a kind we allow to be negated in a proc macro token. + match lit.kind { + token::LitKind::Bool + | token::LitKind::Byte + | token::LitKind::Char + | token::LitKind::Str + | token::LitKind::StrRaw(_) + | token::LitKind::ByteStr + | token::LitKind::ByteStrRaw(_) + | token::LitKind::Err => return Err(()), + token::LitKind::Integer | token::LitKind::Float => {} + } + + // Synthesize a new symbol that includes the minus sign. + let symbol = Symbol::intern(&s[..1 + lit.symbol.as_str().len()]); + lit = token::Lit::new(lit.kind, symbol, lit.suffix); + } + let token::Lit { kind, symbol, suffix } = lit; + Ok(Literal { + kind: FromInternal::from_internal(kind), + symbol, + suffix, + span: self.call_site, + }) + } +} + +impl server::TokenStream for Rustc<'_, '_> { + fn is_empty(&mut self, stream: &Self::TokenStream) -> bool { + stream.is_empty() + } + + fn from_str(&mut self, src: &str) -> Self::TokenStream { + parse_stream_from_source_str( + FileName::proc_macro_source_code(src), + src.to_string(), + self.sess(), + Some(self.call_site), + ) + } + + fn to_string(&mut self, stream: &Self::TokenStream) -> String { + pprust::tts_to_string(stream) + } + + fn expand_expr(&mut self, stream: &Self::TokenStream) -> Result<Self::TokenStream, ()> { + // Parse the expression from our tokenstream. + let expr: PResult<'_, _> = try { + let mut p = rustc_parse::stream_to_parser( + self.sess(), + stream.clone(), + Some("proc_macro expand expr"), + ); + let expr = p.parse_expr()?; + if p.token != token::Eof { + p.unexpected()?; + } + expr + }; + let expr = expr.map_err(|mut err| { + err.emit(); + })?; + + // Perform eager expansion on the expression. + let expr = self + .ecx + .expander() + .fully_expand_fragment(crate::expand::AstFragment::Expr(expr)) + .make_expr(); + + // NOTE: For now, limit `expand_expr` to exclusively expand to literals. + // This may be relaxed in the future. + // We don't use `TokenStream::from_ast` as the tokenstream currently cannot + // be recovered in the general case. + match &expr.kind { + ast::ExprKind::Lit(l) if l.token.kind == token::Bool => Ok( + tokenstream::TokenStream::token_alone(token::Ident(l.token.symbol, false), l.span), + ), + ast::ExprKind::Lit(l) => { + Ok(tokenstream::TokenStream::token_alone(token::Literal(l.token), l.span)) + } + ast::ExprKind::Unary(ast::UnOp::Neg, e) => match &e.kind { + ast::ExprKind::Lit(l) => match l.token { + token::Lit { kind: token::Integer | token::Float, .. } => { + Ok(Self::TokenStream::from_iter([ + // FIXME: The span of the `-` token is lost when + // parsing, so we cannot faithfully recover it here. + tokenstream::TokenTree::token_alone(token::BinOp(token::Minus), e.span), + tokenstream::TokenTree::token_alone(token::Literal(l.token), l.span), + ])) + } + _ => Err(()), + }, + _ => Err(()), + }, + _ => Err(()), + } + } + + fn from_token_tree( + &mut self, + tree: TokenTree<Self::TokenStream, Self::Span, Self::Symbol>, + ) -> Self::TokenStream { + (tree, &mut *self).to_internal() + } + + fn concat_trees( + &mut self, + base: Option<Self::TokenStream>, + trees: Vec<TokenTree<Self::TokenStream, Self::Span, Self::Symbol>>, + ) -> Self::TokenStream { + let mut builder = tokenstream::TokenStreamBuilder::new(); + if let Some(base) = base { + builder.push(base); + } + for tree in trees { + builder.push((tree, &mut *self).to_internal()); + } + builder.build() + } + + fn concat_streams( + &mut self, + base: Option<Self::TokenStream>, + streams: Vec<Self::TokenStream>, + ) -> Self::TokenStream { + let mut builder = tokenstream::TokenStreamBuilder::new(); + if let Some(base) = base { + builder.push(base); + } + for stream in streams { + builder.push(stream); + } + builder.build() + } + + fn into_trees( + &mut self, + stream: Self::TokenStream, + ) -> Vec<TokenTree<Self::TokenStream, Self::Span, Self::Symbol>> { + FromInternal::from_internal((stream, self)) + } +} + +impl server::SourceFile for Rustc<'_, '_> { + fn eq(&mut self, file1: &Self::SourceFile, file2: &Self::SourceFile) -> bool { + Lrc::ptr_eq(file1, file2) + } + + fn path(&mut self, file: &Self::SourceFile) -> String { + match file.name { + FileName::Real(ref name) => name + .local_path() + .expect("attempting to get a file path in an imported file in `proc_macro::SourceFile::path`") + .to_str() + .expect("non-UTF8 file path in `proc_macro::SourceFile::path`") + .to_string(), + _ => file.name.prefer_local().to_string(), + } + } + + fn is_real(&mut self, file: &Self::SourceFile) -> bool { + file.is_real_file() + } +} + +impl server::MultiSpan for Rustc<'_, '_> { + fn new(&mut self) -> Self::MultiSpan { + vec![] + } + + fn push(&mut self, spans: &mut Self::MultiSpan, span: Self::Span) { + spans.push(span) + } +} + +impl server::Diagnostic for Rustc<'_, '_> { + fn new(&mut self, level: Level, msg: &str, spans: Self::MultiSpan) -> Self::Diagnostic { + let mut diag = Diagnostic::new(level.to_internal(), msg); + diag.set_span(MultiSpan::from_spans(spans)); + diag + } + + fn sub( + &mut self, + diag: &mut Self::Diagnostic, + level: Level, + msg: &str, + spans: Self::MultiSpan, + ) { + diag.sub(level.to_internal(), msg, MultiSpan::from_spans(spans), None); + } + + fn emit(&mut self, mut diag: Self::Diagnostic) { + self.sess().span_diagnostic.emit_diagnostic(&mut diag); + } +} + +impl server::Span for Rustc<'_, '_> { + fn debug(&mut self, span: Self::Span) -> String { + if self.ecx.ecfg.span_debug { + format!("{:?}", span) + } else { + format!("{:?} bytes({}..{})", span.ctxt(), span.lo().0, span.hi().0) + } + } + + fn source_file(&mut self, span: Self::Span) -> Self::SourceFile { + self.sess().source_map().lookup_char_pos(span.lo()).file + } + + fn parent(&mut self, span: Self::Span) -> Option<Self::Span> { + span.parent_callsite() + } + + fn source(&mut self, span: Self::Span) -> Self::Span { + span.source_callsite() + } + + fn start(&mut self, span: Self::Span) -> LineColumn { + let loc = self.sess().source_map().lookup_char_pos(span.lo()); + LineColumn { line: loc.line, column: loc.col.to_usize() } + } + + fn end(&mut self, span: Self::Span) -> LineColumn { + let loc = self.sess().source_map().lookup_char_pos(span.hi()); + LineColumn { line: loc.line, column: loc.col.to_usize() } + } + + fn before(&mut self, span: Self::Span) -> Self::Span { + span.shrink_to_lo() + } + + fn after(&mut self, span: Self::Span) -> Self::Span { + span.shrink_to_hi() + } + + fn join(&mut self, first: Self::Span, second: Self::Span) -> Option<Self::Span> { + let self_loc = self.sess().source_map().lookup_char_pos(first.lo()); + let other_loc = self.sess().source_map().lookup_char_pos(second.lo()); + + if self_loc.file.name != other_loc.file.name { + return None; + } + + Some(first.to(second)) + } + + fn subspan( + &mut self, + span: Self::Span, + start: Bound<usize>, + end: Bound<usize>, + ) -> Option<Self::Span> { + let length = span.hi().to_usize() - span.lo().to_usize(); + + let start = match start { + Bound::Included(lo) => lo, + Bound::Excluded(lo) => lo.checked_add(1)?, + Bound::Unbounded => 0, + }; + + let end = match end { + Bound::Included(hi) => hi.checked_add(1)?, + Bound::Excluded(hi) => hi, + Bound::Unbounded => length, + }; + + // Bounds check the values, preventing addition overflow and OOB spans. + if start > u32::MAX as usize + || end > u32::MAX as usize + || (u32::MAX - start as u32) < span.lo().to_u32() + || (u32::MAX - end as u32) < span.lo().to_u32() + || start >= end + || end > length + { + return None; + } + + let new_lo = span.lo() + BytePos::from_usize(start); + let new_hi = span.lo() + BytePos::from_usize(end); + Some(span.with_lo(new_lo).with_hi(new_hi)) + } + + fn resolved_at(&mut self, span: Self::Span, at: Self::Span) -> Self::Span { + span.with_ctxt(at.ctxt()) + } + + fn source_text(&mut self, span: Self::Span) -> Option<String> { + self.sess().source_map().span_to_snippet(span).ok() + } + /// Saves the provided span into the metadata of + /// *the crate we are currently compiling*, which must + /// be a proc-macro crate. This id can be passed to + /// `recover_proc_macro_span` when our current crate + /// is *run* as a proc-macro. + /// + /// Let's suppose that we have two crates - `my_client` + /// and `my_proc_macro`. The `my_proc_macro` crate + /// contains a procedural macro `my_macro`, which + /// is implemented as: `quote! { "hello" }` + /// + /// When we *compile* `my_proc_macro`, we will execute + /// the `quote` proc-macro. This will save the span of + /// "hello" into the metadata of `my_proc_macro`. As a result, + /// the body of `my_proc_macro` (after expansion) will end + /// up containing a call that looks like this: + /// `proc_macro::Ident::new("hello", proc_macro::Span::recover_proc_macro_span(0))` + /// + /// where `0` is the id returned by this function. + /// When `my_proc_macro` *executes* (during the compilation of `my_client`), + /// the call to `recover_proc_macro_span` will load the corresponding + /// span from the metadata of `my_proc_macro` (which we have access to, + /// since we've loaded `my_proc_macro` from disk in order to execute it). + /// In this way, we have obtained a span pointing into `my_proc_macro` + fn save_span(&mut self, span: Self::Span) -> usize { + self.sess().save_proc_macro_span(span) + } + + fn recover_proc_macro_span(&mut self, id: usize) -> Self::Span { + let (resolver, krate, def_site) = (&*self.ecx.resolver, self.krate, self.def_site); + *self.rebased_spans.entry(id).or_insert_with(|| { + // FIXME: `SyntaxContext` for spans from proc macro crates is lost during encoding, + // replace it with a def-site context until we are encoding it properly. + resolver.get_proc_macro_quoted_span(krate, id).with_ctxt(def_site.ctxt()) + }) + } +} + +impl server::Symbol for Rustc<'_, '_> { + fn normalize_and_validate_ident(&mut self, string: &str) -> Result<Self::Symbol, ()> { + let sym = nfc_normalize(string); + if rustc_lexer::is_ident(sym.as_str()) { Ok(sym) } else { Err(()) } + } +} + +impl server::Server for Rustc<'_, '_> { + fn globals(&mut self) -> ExpnGlobals<Self::Span> { + ExpnGlobals { + def_site: self.def_site, + call_site: self.call_site, + mixed_site: self.mixed_site, + } + } + + fn intern_symbol(string: &str) -> Self::Symbol { + Symbol::intern(string) + } + + fn with_symbol_string(symbol: &Self::Symbol, f: impl FnOnce(&str)) { + f(&symbol.as_str()) + } +} diff --git a/compiler/rustc_expand/src/tests.rs b/compiler/rustc_expand/src/tests.rs new file mode 100644 index 000000000..e44f06081 --- /dev/null +++ b/compiler/rustc_expand/src/tests.rs @@ -0,0 +1,1016 @@ +use rustc_ast as ast; +use rustc_ast::tokenstream::TokenStream; +use rustc_parse::{new_parser_from_source_str, parser::Parser, source_file_to_stream}; +use rustc_session::parse::ParseSess; +use rustc_span::create_default_session_if_not_set_then; +use rustc_span::source_map::{FilePathMapping, SourceMap}; +use rustc_span::{BytePos, Span}; + +use rustc_data_structures::sync::Lrc; +use rustc_errors::emitter::EmitterWriter; +use rustc_errors::{Handler, MultiSpan, PResult}; + +use std::io; +use std::io::prelude::*; +use std::iter::Peekable; +use std::path::{Path, PathBuf}; +use std::str; +use std::sync::{Arc, Mutex}; + +/// Map string to parser (via tts). +fn string_to_parser(ps: &ParseSess, source_str: String) -> Parser<'_> { + new_parser_from_source_str(ps, PathBuf::from("bogofile").into(), source_str) +} + +pub(crate) fn with_error_checking_parse<'a, T, F>(s: String, ps: &'a ParseSess, f: F) -> T +where + F: FnOnce(&mut Parser<'a>) -> PResult<'a, T>, +{ + let mut p = string_to_parser(&ps, s); + let x = f(&mut p).unwrap(); + p.sess.span_diagnostic.abort_if_errors(); + x +} + +/// Maps a string to tts, using a made-up filename. +pub(crate) fn string_to_stream(source_str: String) -> TokenStream { + let ps = ParseSess::new(FilePathMapping::empty()); + source_file_to_stream( + &ps, + ps.source_map().new_source_file(PathBuf::from("bogofile").into(), source_str), + None, + ) + .0 +} + +/// Parses a string, returns a crate. +pub(crate) fn string_to_crate(source_str: String) -> ast::Crate { + let ps = ParseSess::new(FilePathMapping::empty()); + with_error_checking_parse(source_str, &ps, |p| p.parse_crate_mod()) +} + +/// Does the given string match the pattern? whitespace in the first string +/// may be deleted or replaced with other whitespace to match the pattern. +/// This function is relatively Unicode-ignorant; fortunately, the careful design +/// of UTF-8 mitigates this ignorance. It doesn't do NKF-normalization(?). +pub(crate) fn matches_codepattern(a: &str, b: &str) -> bool { + let mut a_iter = a.chars().peekable(); + let mut b_iter = b.chars().peekable(); + + loop { + let (a, b) = match (a_iter.peek(), b_iter.peek()) { + (None, None) => return true, + (None, _) => return false, + (Some(&a), None) => { + if rustc_lexer::is_whitespace(a) { + break; // Trailing whitespace check is out of loop for borrowck. + } else { + return false; + } + } + (Some(&a), Some(&b)) => (a, b), + }; + + if rustc_lexer::is_whitespace(a) && rustc_lexer::is_whitespace(b) { + // Skip whitespace for `a` and `b`. + scan_for_non_ws_or_end(&mut a_iter); + scan_for_non_ws_or_end(&mut b_iter); + } else if rustc_lexer::is_whitespace(a) { + // Skip whitespace for `a`. + scan_for_non_ws_or_end(&mut a_iter); + } else if a == b { + a_iter.next(); + b_iter.next(); + } else { + return false; + } + } + + // Check if a has *only* trailing whitespace. + a_iter.all(rustc_lexer::is_whitespace) +} + +/// Advances the given peekable `Iterator` until it reaches a non-whitespace character. +fn scan_for_non_ws_or_end<I: Iterator<Item = char>>(iter: &mut Peekable<I>) { + while iter.peek().copied().map(rustc_lexer::is_whitespace) == Some(true) { + iter.next(); + } +} + +/// Identifies a position in the text by the n'th occurrence of a string. +struct Position { + string: &'static str, + count: usize, +} + +struct SpanLabel { + start: Position, + end: Position, + label: &'static str, +} + +pub(crate) struct Shared<T: Write> { + pub data: Arc<Mutex<T>>, +} + +impl<T: Write> Write for Shared<T> { + fn write(&mut self, buf: &[u8]) -> io::Result<usize> { + self.data.lock().unwrap().write(buf) + } + + fn flush(&mut self) -> io::Result<()> { + self.data.lock().unwrap().flush() + } +} + +fn test_harness(file_text: &str, span_labels: Vec<SpanLabel>, expected_output: &str) { + create_default_session_if_not_set_then(|_| { + let output = Arc::new(Mutex::new(Vec::new())); + + let fallback_bundle = + rustc_errors::fallback_fluent_bundle(rustc_errors::DEFAULT_LOCALE_RESOURCES, false); + let source_map = Lrc::new(SourceMap::new(FilePathMapping::empty())); + source_map.new_source_file(Path::new("test.rs").to_owned().into(), file_text.to_owned()); + + let primary_span = make_span(&file_text, &span_labels[0].start, &span_labels[0].end); + let mut msp = MultiSpan::from_span(primary_span); + for span_label in span_labels { + let span = make_span(&file_text, &span_label.start, &span_label.end); + msp.push_span_label(span, span_label.label); + println!("span: {:?} label: {:?}", span, span_label.label); + println!("text: {:?}", source_map.span_to_snippet(span)); + } + + let emitter = EmitterWriter::new( + Box::new(Shared { data: output.clone() }), + Some(source_map.clone()), + None, + fallback_bundle, + false, + false, + false, + None, + false, + ); + let handler = Handler::with_emitter(true, None, Box::new(emitter)); + handler.span_err(msp, "foo"); + + assert!( + expected_output.chars().next() == Some('\n'), + "expected output should begin with newline" + ); + let expected_output = &expected_output[1..]; + + let bytes = output.lock().unwrap(); + let actual_output = str::from_utf8(&bytes).unwrap(); + println!("expected output:\n------\n{}------", expected_output); + println!("actual output:\n------\n{}------", actual_output); + + assert!(expected_output == actual_output) + }) +} + +fn make_span(file_text: &str, start: &Position, end: &Position) -> Span { + let start = make_pos(file_text, start); + let end = make_pos(file_text, end) + end.string.len(); // just after matching thing ends + assert!(start <= end); + Span::with_root_ctxt(BytePos(start as u32), BytePos(end as u32)) +} + +fn make_pos(file_text: &str, pos: &Position) -> usize { + let mut remainder = file_text; + let mut offset = 0; + for _ in 0..pos.count { + if let Some(n) = remainder.find(&pos.string) { + offset += n; + remainder = &remainder[n + 1..]; + } else { + panic!("failed to find {} instances of {:?} in {:?}", pos.count, pos.string, file_text); + } + } + offset +} + +#[test] +fn ends_on_col0() { + test_harness( + r#" +fn foo() { +} +"#, + vec![SpanLabel { + start: Position { string: "{", count: 1 }, + end: Position { string: "}", count: 1 }, + label: "test", + }], + r#" +error: foo + --> test.rs:2:10 + | +2 | fn foo() { + | __________^ +3 | | } + | |_^ test + +"#, + ); +} + +#[test] +fn ends_on_col2() { + test_harness( + r#" +fn foo() { + + + } +"#, + vec![SpanLabel { + start: Position { string: "{", count: 1 }, + end: Position { string: "}", count: 1 }, + label: "test", + }], + r#" +error: foo + --> test.rs:2:10 + | +2 | fn foo() { + | __________^ +3 | | +4 | | +5 | | } + | |___^ test + +"#, + ); +} +#[test] +fn non_nested() { + test_harness( + r#" +fn foo() { + X0 Y0 + X1 Y1 + X2 Y2 +} +"#, + vec![ + SpanLabel { + start: Position { string: "X0", count: 1 }, + end: Position { string: "X2", count: 1 }, + label: "`X` is a good letter", + }, + SpanLabel { + start: Position { string: "Y0", count: 1 }, + end: Position { string: "Y2", count: 1 }, + label: "`Y` is a good letter too", + }, + ], + r#" +error: foo + --> test.rs:3:3 + | +3 | X0 Y0 + | ____^__- + | | ___| + | || +4 | || X1 Y1 +5 | || X2 Y2 + | ||____^__- `Y` is a good letter too + | |____| + | `X` is a good letter + +"#, + ); +} + +#[test] +fn nested() { + test_harness( + r#" +fn foo() { + X0 Y0 + Y1 X1 +} +"#, + vec![ + SpanLabel { + start: Position { string: "X0", count: 1 }, + end: Position { string: "X1", count: 1 }, + label: "`X` is a good letter", + }, + SpanLabel { + start: Position { string: "Y0", count: 1 }, + end: Position { string: "Y1", count: 1 }, + label: "`Y` is a good letter too", + }, + ], + r#" +error: foo + --> test.rs:3:3 + | +3 | X0 Y0 + | ____^__- + | | ___| + | || +4 | || Y1 X1 + | ||____-__^ `X` is a good letter + | |_____| + | `Y` is a good letter too + +"#, + ); +} + +#[test] +fn different_overlap() { + test_harness( + r#" +fn foo() { + X0 Y0 Z0 + X1 Y1 Z1 + X2 Y2 Z2 + X3 Y3 Z3 +} +"#, + vec![ + SpanLabel { + start: Position { string: "Y0", count: 1 }, + end: Position { string: "X2", count: 1 }, + label: "`X` is a good letter", + }, + SpanLabel { + start: Position { string: "Z1", count: 1 }, + end: Position { string: "X3", count: 1 }, + label: "`Y` is a good letter too", + }, + ], + r#" +error: foo + --> test.rs:3:6 + | +3 | X0 Y0 Z0 + | ______^ +4 | | X1 Y1 Z1 + | |_________- +5 | || X2 Y2 Z2 + | ||____^ `X` is a good letter +6 | | X3 Y3 Z3 + | |_____- `Y` is a good letter too + +"#, + ); +} + +#[test] +fn triple_overlap() { + test_harness( + r#" +fn foo() { + X0 Y0 Z0 + X1 Y1 Z1 + X2 Y2 Z2 +} +"#, + vec![ + SpanLabel { + start: Position { string: "X0", count: 1 }, + end: Position { string: "X2", count: 1 }, + label: "`X` is a good letter", + }, + SpanLabel { + start: Position { string: "Y0", count: 1 }, + end: Position { string: "Y2", count: 1 }, + label: "`Y` is a good letter too", + }, + SpanLabel { + start: Position { string: "Z0", count: 1 }, + end: Position { string: "Z2", count: 1 }, + label: "`Z` label", + }, + ], + r#" +error: foo + --> test.rs:3:3 + | +3 | X0 Y0 Z0 + | _____^__-__- + | | ____|__| + | || ___| + | ||| +4 | ||| X1 Y1 Z1 +5 | ||| X2 Y2 Z2 + | |||____^__-__- `Z` label + | ||____|__| + | |____| `Y` is a good letter too + | `X` is a good letter + +"#, + ); +} + +#[test] +fn triple_exact_overlap() { + test_harness( + r#" +fn foo() { + X0 Y0 Z0 + X1 Y1 Z1 + X2 Y2 Z2 +} +"#, + vec![ + SpanLabel { + start: Position { string: "X0", count: 1 }, + end: Position { string: "X2", count: 1 }, + label: "`X` is a good letter", + }, + SpanLabel { + start: Position { string: "X0", count: 1 }, + end: Position { string: "X2", count: 1 }, + label: "`Y` is a good letter too", + }, + SpanLabel { + start: Position { string: "X0", count: 1 }, + end: Position { string: "X2", count: 1 }, + label: "`Z` label", + }, + ], + r#" +error: foo + --> test.rs:3:3 + | +3 | / X0 Y0 Z0 +4 | | X1 Y1 Z1 +5 | | X2 Y2 Z2 + | | ^ + | | | + | | `X` is a good letter + | |____`Y` is a good letter too + | `Z` label + +"#, + ); +} + +#[test] +fn minimum_depth() { + test_harness( + r#" +fn foo() { + X0 Y0 Z0 + X1 Y1 Z1 + X2 Y2 Z2 + X3 Y3 Z3 +} +"#, + vec![ + SpanLabel { + start: Position { string: "Y0", count: 1 }, + end: Position { string: "X1", count: 1 }, + label: "`X` is a good letter", + }, + SpanLabel { + start: Position { string: "Y1", count: 1 }, + end: Position { string: "Z2", count: 1 }, + label: "`Y` is a good letter too", + }, + SpanLabel { + start: Position { string: "X2", count: 1 }, + end: Position { string: "Y3", count: 1 }, + label: "`Z`", + }, + ], + r#" +error: foo + --> test.rs:3:6 + | +3 | X0 Y0 Z0 + | ______^ +4 | | X1 Y1 Z1 + | |____^_- + | ||____| + | | `X` is a good letter +5 | | X2 Y2 Z2 + | |____-______- `Y` is a good letter too + | ____| + | | +6 | | X3 Y3 Z3 + | |________- `Z` + +"#, + ); +} + +#[test] +fn non_overlaping() { + test_harness( + r#" +fn foo() { + X0 Y0 Z0 + X1 Y1 Z1 + X2 Y2 Z2 + X3 Y3 Z3 +} +"#, + vec![ + SpanLabel { + start: Position { string: "X0", count: 1 }, + end: Position { string: "X1", count: 1 }, + label: "`X` is a good letter", + }, + SpanLabel { + start: Position { string: "Y2", count: 1 }, + end: Position { string: "Z3", count: 1 }, + label: "`Y` is a good letter too", + }, + ], + r#" +error: foo + --> test.rs:3:3 + | +3 | / X0 Y0 Z0 +4 | | X1 Y1 Z1 + | |____^ `X` is a good letter +5 | X2 Y2 Z2 + | ______- +6 | | X3 Y3 Z3 + | |__________- `Y` is a good letter too + +"#, + ); +} + +#[test] +fn overlaping_start_and_end() { + test_harness( + r#" +fn foo() { + X0 Y0 Z0 + X1 Y1 Z1 + X2 Y2 Z2 + X3 Y3 Z3 +} +"#, + vec![ + SpanLabel { + start: Position { string: "Y0", count: 1 }, + end: Position { string: "X1", count: 1 }, + label: "`X` is a good letter", + }, + SpanLabel { + start: Position { string: "Z1", count: 1 }, + end: Position { string: "Z3", count: 1 }, + label: "`Y` is a good letter too", + }, + ], + r#" +error: foo + --> test.rs:3:6 + | +3 | X0 Y0 Z0 + | ______^ +4 | | X1 Y1 Z1 + | |____^____- + | ||____| + | | `X` is a good letter +5 | | X2 Y2 Z2 +6 | | X3 Y3 Z3 + | |___________- `Y` is a good letter too + +"#, + ); +} + +#[test] +fn multiple_labels_primary_without_message() { + test_harness( + r#" +fn foo() { + a { b { c } d } +} +"#, + vec![ + SpanLabel { + start: Position { string: "b", count: 1 }, + end: Position { string: "}", count: 1 }, + label: "", + }, + SpanLabel { + start: Position { string: "a", count: 1 }, + end: Position { string: "d", count: 1 }, + label: "`a` is a good letter", + }, + SpanLabel { + start: Position { string: "c", count: 1 }, + end: Position { string: "c", count: 1 }, + label: "", + }, + ], + r#" +error: foo + --> test.rs:3:7 + | +3 | a { b { c } d } + | ----^^^^-^^-- `a` is a good letter + +"#, + ); +} + +#[test] +fn multiple_labels_secondary_without_message() { + test_harness( + r#" +fn foo() { + a { b { c } d } +} +"#, + vec![ + SpanLabel { + start: Position { string: "a", count: 1 }, + end: Position { string: "d", count: 1 }, + label: "`a` is a good letter", + }, + SpanLabel { + start: Position { string: "b", count: 1 }, + end: Position { string: "}", count: 1 }, + label: "", + }, + ], + r#" +error: foo + --> test.rs:3:3 + | +3 | a { b { c } d } + | ^^^^-------^^ `a` is a good letter + +"#, + ); +} + +#[test] +fn multiple_labels_primary_without_message_2() { + test_harness( + r#" +fn foo() { + a { b { c } d } +} +"#, + vec![ + SpanLabel { + start: Position { string: "b", count: 1 }, + end: Position { string: "}", count: 1 }, + label: "`b` is a good letter", + }, + SpanLabel { + start: Position { string: "a", count: 1 }, + end: Position { string: "d", count: 1 }, + label: "", + }, + SpanLabel { + start: Position { string: "c", count: 1 }, + end: Position { string: "c", count: 1 }, + label: "", + }, + ], + r#" +error: foo + --> test.rs:3:7 + | +3 | a { b { c } d } + | ----^^^^-^^-- + | | + | `b` is a good letter + +"#, + ); +} + +#[test] +fn multiple_labels_secondary_without_message_2() { + test_harness( + r#" +fn foo() { + a { b { c } d } +} +"#, + vec![ + SpanLabel { + start: Position { string: "a", count: 1 }, + end: Position { string: "d", count: 1 }, + label: "", + }, + SpanLabel { + start: Position { string: "b", count: 1 }, + end: Position { string: "}", count: 1 }, + label: "`b` is a good letter", + }, + ], + r#" +error: foo + --> test.rs:3:3 + | +3 | a { b { c } d } + | ^^^^-------^^ + | | + | `b` is a good letter + +"#, + ); +} + +#[test] +fn multiple_labels_secondary_without_message_3() { + test_harness( + r#" +fn foo() { + a bc d +} +"#, + vec![ + SpanLabel { + start: Position { string: "a", count: 1 }, + end: Position { string: "b", count: 1 }, + label: "`a` is a good letter", + }, + SpanLabel { + start: Position { string: "c", count: 1 }, + end: Position { string: "d", count: 1 }, + label: "", + }, + ], + r#" +error: foo + --> test.rs:3:3 + | +3 | a bc d + | ^^^^---- + | | + | `a` is a good letter + +"#, + ); +} + +#[test] +fn multiple_labels_without_message() { + test_harness( + r#" +fn foo() { + a { b { c } d } +} +"#, + vec![ + SpanLabel { + start: Position { string: "a", count: 1 }, + end: Position { string: "d", count: 1 }, + label: "", + }, + SpanLabel { + start: Position { string: "b", count: 1 }, + end: Position { string: "}", count: 1 }, + label: "", + }, + ], + r#" +error: foo + --> test.rs:3:3 + | +3 | a { b { c } d } + | ^^^^-------^^ + +"#, + ); +} + +#[test] +fn multiple_labels_without_message_2() { + test_harness( + r#" +fn foo() { + a { b { c } d } +} +"#, + vec![ + SpanLabel { + start: Position { string: "b", count: 1 }, + end: Position { string: "}", count: 1 }, + label: "", + }, + SpanLabel { + start: Position { string: "a", count: 1 }, + end: Position { string: "d", count: 1 }, + label: "", + }, + SpanLabel { + start: Position { string: "c", count: 1 }, + end: Position { string: "c", count: 1 }, + label: "", + }, + ], + r#" +error: foo + --> test.rs:3:7 + | +3 | a { b { c } d } + | ----^^^^-^^-- + +"#, + ); +} + +#[test] +fn multiple_labels_with_message() { + test_harness( + r#" +fn foo() { + a { b { c } d } +} +"#, + vec![ + SpanLabel { + start: Position { string: "a", count: 1 }, + end: Position { string: "d", count: 1 }, + label: "`a` is a good letter", + }, + SpanLabel { + start: Position { string: "b", count: 1 }, + end: Position { string: "}", count: 1 }, + label: "`b` is a good letter", + }, + ], + r#" +error: foo + --> test.rs:3:3 + | +3 | a { b { c } d } + | ^^^^-------^^ + | | | + | | `b` is a good letter + | `a` is a good letter + +"#, + ); +} + +#[test] +fn single_label_with_message() { + test_harness( + r#" +fn foo() { + a { b { c } d } +} +"#, + vec![SpanLabel { + start: Position { string: "a", count: 1 }, + end: Position { string: "d", count: 1 }, + label: "`a` is a good letter", + }], + r#" +error: foo + --> test.rs:3:3 + | +3 | a { b { c } d } + | ^^^^^^^^^^^^^ `a` is a good letter + +"#, + ); +} + +#[test] +fn single_label_without_message() { + test_harness( + r#" +fn foo() { + a { b { c } d } +} +"#, + vec![SpanLabel { + start: Position { string: "a", count: 1 }, + end: Position { string: "d", count: 1 }, + label: "", + }], + r#" +error: foo + --> test.rs:3:3 + | +3 | a { b { c } d } + | ^^^^^^^^^^^^^ + +"#, + ); +} + +#[test] +fn long_snippet() { + test_harness( + r#" +fn foo() { + X0 Y0 Z0 + X1 Y1 Z1 +1 +2 +3 +4 +5 +6 +7 +8 +9 +10 + X2 Y2 Z2 + X3 Y3 Z3 +} +"#, + vec![ + SpanLabel { + start: Position { string: "Y0", count: 1 }, + end: Position { string: "X1", count: 1 }, + label: "`X` is a good letter", + }, + SpanLabel { + start: Position { string: "Z1", count: 1 }, + end: Position { string: "Z3", count: 1 }, + label: "`Y` is a good letter too", + }, + ], + r#" +error: foo + --> test.rs:3:6 + | +3 | X0 Y0 Z0 + | ______^ +4 | | X1 Y1 Z1 + | |____^____- + | ||____| + | | `X` is a good letter +5 | | 1 +6 | | 2 +7 | | 3 +... | +15 | | X2 Y2 Z2 +16 | | X3 Y3 Z3 + | |___________- `Y` is a good letter too + +"#, + ); +} + +#[test] +fn long_snippet_multiple_spans() { + test_harness( + r#" +fn foo() { + X0 Y0 Z0 +1 +2 +3 + X1 Y1 Z1 +4 +5 +6 + X2 Y2 Z2 +7 +8 +9 +10 + X3 Y3 Z3 +} +"#, + vec![ + SpanLabel { + start: Position { string: "Y0", count: 1 }, + end: Position { string: "Y3", count: 1 }, + label: "`Y` is a good letter", + }, + SpanLabel { + start: Position { string: "Z1", count: 1 }, + end: Position { string: "Z2", count: 1 }, + label: "`Z` is a good letter too", + }, + ], + r#" +error: foo + --> test.rs:3:6 + | +3 | X0 Y0 Z0 + | ______^ +4 | | 1 +5 | | 2 +6 | | 3 +7 | | X1 Y1 Z1 + | |_________- +8 | || 4 +9 | || 5 +10 | || 6 +11 | || X2 Y2 Z2 + | ||__________- `Z` is a good letter too +... | +15 | | 10 +16 | | X3 Y3 Z3 + | |_______^ `Y` is a good letter + +"#, + ); +} diff --git a/compiler/rustc_expand/src/tokenstream/tests.rs b/compiler/rustc_expand/src/tokenstream/tests.rs new file mode 100644 index 000000000..eed696810 --- /dev/null +++ b/compiler/rustc_expand/src/tokenstream/tests.rs @@ -0,0 +1,110 @@ +use crate::tests::string_to_stream; + +use rustc_ast::token; +use rustc_ast::tokenstream::{TokenStream, TokenStreamBuilder}; +use rustc_span::create_default_session_globals_then; +use rustc_span::{BytePos, Span, Symbol}; + +fn string_to_ts(string: &str) -> TokenStream { + string_to_stream(string.to_owned()) +} + +fn sp(a: u32, b: u32) -> Span { + Span::with_root_ctxt(BytePos(a), BytePos(b)) +} + +#[test] +fn test_concat() { + create_default_session_globals_then(|| { + let test_res = string_to_ts("foo::bar::baz"); + let test_fst = string_to_ts("foo::bar"); + let test_snd = string_to_ts("::baz"); + let mut builder = TokenStreamBuilder::new(); + builder.push(test_fst); + builder.push(test_snd); + let eq_res = builder.build(); + assert_eq!(test_res.trees().count(), 5); + assert_eq!(eq_res.trees().count(), 5); + assert_eq!(test_res.eq_unspanned(&eq_res), true); + }) +} + +#[test] +fn test_to_from_bijection() { + create_default_session_globals_then(|| { + let test_start = string_to_ts("foo::bar(baz)"); + let test_end = test_start.trees().cloned().collect(); + assert_eq!(test_start, test_end) + }) +} + +#[test] +fn test_eq_0() { + create_default_session_globals_then(|| { + let test_res = string_to_ts("foo"); + let test_eqs = string_to_ts("foo"); + assert_eq!(test_res, test_eqs) + }) +} + +#[test] +fn test_eq_1() { + create_default_session_globals_then(|| { + let test_res = string_to_ts("::bar::baz"); + let test_eqs = string_to_ts("::bar::baz"); + assert_eq!(test_res, test_eqs) + }) +} + +#[test] +fn test_eq_3() { + create_default_session_globals_then(|| { + let test_res = string_to_ts(""); + let test_eqs = string_to_ts(""); + assert_eq!(test_res, test_eqs) + }) +} + +#[test] +fn test_diseq_0() { + create_default_session_globals_then(|| { + let test_res = string_to_ts("::bar::baz"); + let test_eqs = string_to_ts("bar::baz"); + assert_eq!(test_res == test_eqs, false) + }) +} + +#[test] +fn test_diseq_1() { + create_default_session_globals_then(|| { + let test_res = string_to_ts("(bar,baz)"); + let test_eqs = string_to_ts("bar,baz"); + assert_eq!(test_res == test_eqs, false) + }) +} + +#[test] +fn test_is_empty() { + create_default_session_globals_then(|| { + let test0 = TokenStream::default(); + let test1 = TokenStream::token_alone(token::Ident(Symbol::intern("a"), false), sp(0, 1)); + let test2 = string_to_ts("foo(bar::baz)"); + + assert_eq!(test0.is_empty(), true); + assert_eq!(test1.is_empty(), false); + assert_eq!(test2.is_empty(), false); + }) +} + +#[test] +fn test_dotdotdot() { + create_default_session_globals_then(|| { + let mut builder = TokenStreamBuilder::new(); + builder.push(TokenStream::token_joint(token::Dot, sp(0, 1))); + builder.push(TokenStream::token_joint(token::Dot, sp(1, 2))); + builder.push(TokenStream::token_alone(token::Dot, sp(2, 3))); + let stream = builder.build(); + assert!(stream.eq_unspanned(&string_to_ts("..."))); + assert_eq!(stream.trees().count(), 1); + }) +} |