// Copyright 2014-2017 The html5ever Project Developers. See the // COPYRIGHT file at the top-level directory of this distribution. // // Licensed under the Apache License, Version 2.0 or the MIT license // , at your // option. This file may not be copied, modified, or distributed // except according to those terms. /*! Implements the `match_token!()` macro for use by the HTML tree builder in `src/tree_builder/rules.rs`. ## Example ```rust match_token!(token { CommentToken(text) => 1, tag @ => 2, => 3,
=> else, tag @ => 4, token => 5, }) ``` ## Syntax Because of the simplistic parser, the macro invocation must start with exactly `match_token!(token {` (with whitespace as specified) and end with exactly `})`. The left-hand side of each match arm is an optional `name @` binding, followed by - an ordinary Rust pattern that starts with an identifier or an underscore, or - a sequence of HTML tag names as identifiers, each inside "<...>" or "" to match an open or close tag respectively, or - a "wildcard tag" "<_>" or "" to match all open tags or all close tags respectively. The right-hand side is either an expression or the keyword `else`. Note that this syntax does not support guards or pattern alternation like `Foo | Bar`. This is not a fundamental limitation; it's done for implementation simplicity. ## Semantics Ordinary Rust patterns match as usual. If present, the `name @` binding has the usual meaning. A sequence of named tags matches any of those tags. A single sequence can contain both open and close tags. If present, the `name @` binding binds (by move) the `Tag` struct, not the outer `Token`. That is, a match arm like ```rust tag @ => ... ``` expands to something like ```rust TagToken(tag @ Tag { name: local_name!("html"), kind: StartTag }) | TagToken(tag @ Tag { name: local_name!("head"), kind: StartTag }) => ... ``` A wildcard tag matches any tag of the appropriate kind, *unless* it was previously matched with an `else` right-hand side (more on this below). The expansion of this macro reorders code somewhat, to satisfy various restrictions arising from moves. However it provides the semantics of in-order matching, by enforcing the following restrictions on its input: - The last pattern must be a variable or the wildcard "_". In other words it must match everything. - Otherwise, ordinary Rust patterns and specific-tag patterns cannot appear after wildcard tag patterns. - No tag name may appear more than once. - A wildcard tag pattern may not occur in the same arm as any other tag. "<_> => ..." and "<_> => ..." are both forbidden. - The right-hand side "else" may only appear with specific-tag patterns. It means that these specific tags should be handled by the last, catch-all case arm, rather than by any wildcard tag arm. This situation is common in the HTML5 syntax. */ use quote::quote; use syn::{braced, parse_quote, Token}; use proc_macro2::TokenStream; use quote::ToTokens; use std::collections::HashSet; use std::fs::File; use std::io::{Read, Write}; use std::path::Path; use syn::ext::IdentExt; use syn::fold::Fold; use syn::parse::{Parse, ParseStream, Result}; pub fn expand(from: &Path, to: &Path) { let mut source = String::new(); File::open(from) .unwrap() .read_to_string(&mut source) .unwrap(); let ast = syn::parse_file(&source).expect("Parsing rules.rs module"); let mut m = MatchTokenParser {}; let ast = m.fold_file(ast); let code = ast .into_token_stream() .to_string() .replace("{ ", "{\n") .replace(" }", "\n}"); File::create(to) .unwrap() .write_all(code.as_bytes()) .unwrap(); } struct MatchTokenParser {} struct MatchToken { ident: syn::Ident, arms: Vec, } struct MatchTokenArm { binding: Option, lhs: LHS, rhs: RHS, } enum LHS { Tags(Vec), Pattern(syn::Pat), } enum RHS { Expression(syn::Expr), Else, } #[derive(PartialEq, Eq, Hash, Clone)] enum TagKind { StartTag, EndTag, } // Option is None if wildcard #[derive(PartialEq, Eq, Hash, Clone)] pub struct Tag { kind: TagKind, name: Option, } impl Parse for Tag { fn parse(input: ParseStream) -> Result { input.parse::()?; let closing: Option = input.parse()?; let name = match input.call(syn::Ident::parse_any)? { ref wildcard if wildcard == "_" => None, other => Some(other), }; input.parse::]>()?; Ok(Tag { kind: if closing.is_some() { TagKind::EndTag } else { TagKind::StartTag }, name, }) } } impl Parse for LHS { fn parse(input: ParseStream) -> Result { if input.peek(Token![<]) { let mut tags = Vec::new(); while !input.peek(Token![=>]) { tags.push(input.parse()?); } Ok(LHS::Tags(tags)) } else { let p: syn::Pat = input.parse()?; Ok(LHS::Pattern(p)) } } } impl Parse for MatchTokenArm { fn parse(input: ParseStream) -> Result { let binding = if input.peek2(Token![@]) { let binding = input.parse::()?; input.parse::()?; Some(binding) } else { None }; let lhs = input.parse::()?; input.parse::]>()?; let rhs = if input.peek(syn::token::Brace) { let block = input.parse::().unwrap(); let block = syn::ExprBlock { attrs: vec![], label: None, block, }; input.parse::>()?; RHS::Expression(syn::Expr::Block(block)) } else if input.peek(Token![else]) { input.parse::()?; input.parse::()?; RHS::Else } else { let expr = input.parse::().unwrap(); input.parse::>()?; RHS::Expression(expr) }; Ok(MatchTokenArm { binding, lhs, rhs }) } } impl Parse for MatchToken { fn parse(input: ParseStream) -> Result { let ident = input.parse::()?; let content; braced!(content in input); let mut arms = vec![]; while !content.is_empty() { arms.push(content.parse()?); } Ok(MatchToken { ident, arms }) } } pub fn expand_match_token(body: &TokenStream) -> syn::Expr { let match_token = syn::parse2::(body.clone()); let ast = expand_match_token_macro(match_token.unwrap()); syn::parse2(ast).unwrap() } fn expand_match_token_macro(match_token: MatchToken) -> TokenStream { let mut arms = match_token.arms; let to_be_matched = match_token.ident; // Handle the last arm specially at the end. let last_arm = arms.pop().unwrap(); // Tags we've seen, used for detecting duplicates. let mut seen_tags: HashSet = HashSet::new(); // Case arms for wildcard matching. We collect these and // emit them later. let mut wildcards_patterns: Vec = Vec::new(); let mut wildcards_expressions: Vec = Vec::new(); // Tags excluded (by an 'else' RHS) from wildcard matching. let mut wild_excluded_patterns: Vec = Vec::new(); let mut arms_code = Vec::new(); for MatchTokenArm { binding, lhs, rhs } in arms { // Build Rust syntax for the `name @` binding, if any. let binding = match binding { Some(ident) => quote!(#ident @), None => quote!(), }; match (lhs, rhs) { (LHS::Pattern(_), RHS::Else) => { panic!("'else' may not appear with an ordinary pattern") }, // ordinary pattern => expression (LHS::Pattern(pat), RHS::Expression(expr)) => { if !wildcards_patterns.is_empty() { panic!( "ordinary patterns may not appear after wildcard tags {:?} {:?}", pat, expr ); } arms_code.push(quote!(#binding #pat => #expr,)) }, // ... => else (LHS::Tags(tags), RHS::Else) => { for tag in tags { if !seen_tags.insert(tag.clone()) { panic!("duplicate tag"); } if tag.name.is_none() { panic!("'else' may not appear with a wildcard tag"); } wild_excluded_patterns.push(make_tag_pattern(&TokenStream::new(), tag)); } }, // <_> => expression // ... => expression (LHS::Tags(tags), RHS::Expression(expr)) => { // Is this arm a tag wildcard? // `None` if we haven't processed the first tag yet. let mut wildcard = None; for tag in tags { if !seen_tags.insert(tag.clone()) { panic!("duplicate tag"); } match tag.name { // Some(_) => { if !wildcards_patterns.is_empty() { panic!("specific tags may not appear after wildcard tags"); } if wildcard == Some(true) { panic!("wildcard tags must appear alone"); } if wildcard.is_some() { // Push the delimeter `|` if it's not the first tag. arms_code.push(quote!( | )) } arms_code.push(make_tag_pattern(&binding, tag)); wildcard = Some(false); }, // <_> None => { if wildcard.is_some() { panic!("wildcard tags must appear alone"); } wildcard = Some(true); wildcards_patterns.push(make_tag_pattern(&binding, tag)); wildcards_expressions.push(expr.clone()); }, } } match wildcard { None => panic!("[internal macro error] tag arm with no tags"), Some(false) => arms_code.push(quote!( => #expr,)), Some(true) => {}, // codegen for wildcards is deferred } }, } } // Time to process the last, catch-all arm. We will generate something like // // last_arm_token => { // let enable_wildcards = match last_arm_token { // TagToken(Tag { kind: EndTag, name: local_name!("body"), .. }) => false, // TagToken(Tag { kind: EndTag, name: local_name!("html"), .. }) => false, // // ... // _ => true, // }; // // match (enable_wildcards, last_arm_token) { // (true, TagToken(name @ Tag { kind: StartTag, .. })) // => ..., // wildcard action for start tags // // (true, TagToken(name @ Tag { kind: EndTag, .. })) // => ..., // wildcard action for end tags // // (_, token) => ... // using the pattern from that last arm // } // } let MatchTokenArm { binding, lhs, rhs } = last_arm; let (last_pat, last_expr) = match (binding, lhs, rhs) { (Some(_), _, _) => panic!("the last arm cannot have an @-binding"), (None, LHS::Tags(_), _) => panic!("the last arm cannot have tag patterns"), (None, _, RHS::Else) => panic!("the last arm cannot use 'else'"), (None, LHS::Pattern(p), RHS::Expression(e)) => (p, e), }; quote! { match #to_be_matched { #( #arms_code )* last_arm_token => { let enable_wildcards = match last_arm_token { #( #wild_excluded_patterns => false, )* _ => true, }; match (enable_wildcards, last_arm_token) { #( (true, #wildcards_patterns) => #wildcards_expressions, )* (_, #last_pat) => #last_expr, } } } } } impl Fold for MatchTokenParser { fn fold_stmt(&mut self, stmt: syn::Stmt) -> syn::Stmt { match stmt { syn::Stmt::Item(syn::Item::Macro(syn::ItemMacro { ref mac, .. })) => { if mac.path == parse_quote!(match_token) { return syn::fold::fold_stmt( self, syn::Stmt::Expr(expand_match_token(&mac.tokens)), ); } }, _ => {}, } syn::fold::fold_stmt(self, stmt) } fn fold_expr(&mut self, expr: syn::Expr) -> syn::Expr { match expr { syn::Expr::Macro(syn::ExprMacro { ref mac, .. }) => { if mac.path == parse_quote!(match_token) { return syn::fold::fold_expr(self, expand_match_token(&mac.tokens)); } }, _ => {}, } syn::fold::fold_expr(self, expr) } } fn make_tag_pattern(binding: &TokenStream, tag: Tag) -> TokenStream { let kind = match tag.kind { TagKind::StartTag => quote!(crate::tokenizer::StartTag), TagKind::EndTag => quote!(crate::tokenizer::EndTag), }; let name_field = if let Some(name) = tag.name { let name = name.to_string(); quote!(name: local_name!(#name),) } else { quote!() }; quote! { crate::tree_builder::types::TagToken(#binding crate::tokenizer::Tag { kind: #kind, #name_field .. }) } }