diff options
author | Daniel Baumann <daniel.baumann@progress-linux.org> | 2024-04-17 12:02:58 +0000 |
---|---|---|
committer | Daniel Baumann <daniel.baumann@progress-linux.org> | 2024-04-17 12:02:58 +0000 |
commit | 698f8c2f01ea549d77d7dc3338a12e04c11057b9 (patch) | |
tree | 173a775858bd501c378080a10dca74132f05bc50 /vendor/pest_generator/src | |
parent | Initial commit. (diff) | |
download | rustc-698f8c2f01ea549d77d7dc3338a12e04c11057b9.tar.xz rustc-698f8c2f01ea549d77d7dc3338a12e04c11057b9.zip |
Adding upstream version 1.64.0+dfsg1.upstream/1.64.0+dfsg1
Signed-off-by: Daniel Baumann <daniel.baumann@progress-linux.org>
Diffstat (limited to 'vendor/pest_generator/src')
-rw-r--r-- | vendor/pest_generator/src/generator.rs | 1002 | ||||
-rw-r--r-- | vendor/pest_generator/src/lib.rs | 208 | ||||
-rw-r--r-- | vendor/pest_generator/src/macros.rs | 44 |
3 files changed, 1254 insertions, 0 deletions
diff --git a/vendor/pest_generator/src/generator.rs b/vendor/pest_generator/src/generator.rs new file mode 100644 index 000000000..bed56f34a --- /dev/null +++ b/vendor/pest_generator/src/generator.rs @@ -0,0 +1,1002 @@ +// pest. The Elegant Parser +// Copyright (c) 2018 Dragoș Tiselice +// +// Licensed under the Apache License, Version 2.0 +// <LICENSE-APACHE or http://www.apache.org/licenses/LICENSE-2.0> or the MIT +// license <LICENSE-MIT or http://opensource.org/licenses/MIT>, at your +// option. All files in the project carrying such notice may not be copied, +// modified, or distributed except according to those terms. + +use std::path::PathBuf; + +use proc_macro2::{Span, TokenStream}; +use quote::{ToTokens, TokenStreamExt}; +use syn::{self, Generics, Ident}; + +use pest_meta::ast::*; +use pest_meta::optimizer::*; +use pest_meta::UNICODE_PROPERTY_NAMES; + +#[allow(clippy::needless_pass_by_value)] +pub fn generate( + name: Ident, + generics: &Generics, + path: Option<PathBuf>, + rules: Vec<OptimizedRule>, + defaults: Vec<&str>, + include_grammar: bool, +) -> TokenStream { + let uses_eoi = defaults.iter().any(|name| *name == "EOI"); + + let builtins = generate_builtin_rules(); + let include_fix = if include_grammar { + match path { + Some(ref path) => generate_include(&name, path.to_str().expect("non-Unicode path")), + None => quote!(), + } + } else { + quote!() + }; + let rule_enum = generate_enum(&rules, uses_eoi); + let patterns = generate_patterns(&rules, uses_eoi); + let skip = generate_skip(&rules); + + let mut rules: Vec<_> = rules.into_iter().map(generate_rule).collect(); + rules.extend(builtins.into_iter().filter_map(|(builtin, tokens)| { + if defaults.contains(&builtin) { + Some(tokens) + } else { + None + } + })); + + let (impl_generics, ty_generics, where_clause) = generics.split_for_impl(); + + let parser_impl = quote! { + #[allow(clippy::all)] + impl #impl_generics ::pest::Parser<Rule> for #name #ty_generics #where_clause { + fn parse<'i>( + rule: Rule, + input: &'i str + ) -> ::std::result::Result< + ::pest::iterators::Pairs<'i, Rule>, + ::pest::error::Error<Rule> + > { + mod rules { + pub mod hidden { + use super::super::Rule; + #skip + } + + pub mod visible { + use super::super::Rule; + #( #rules )* + } + + pub use self::visible::*; + } + + ::pest::state(input, |state| { + match rule { + #patterns + } + }) + } + } + }; + + quote! { + #include_fix + #rule_enum + #parser_impl + } +} + +// Note: All builtin rules should be validated as pest builtins in meta/src/validator.rs. +// Some should also be keywords. +fn generate_builtin_rules() -> Vec<(&'static str, TokenStream)> { + let mut builtins = Vec::new(); + + insert_builtin!(builtins, ANY, state.skip(1)); + insert_public_builtin!( + builtins, + EOI, + state.rule(Rule::EOI, |state| state.end_of_input()) + ); + insert_builtin!(builtins, SOI, state.start_of_input()); + insert_builtin!(builtins, PEEK, state.stack_peek()); + insert_builtin!(builtins, PEEK_ALL, state.stack_match_peek()); + insert_builtin!(builtins, POP, state.stack_pop()); + insert_builtin!(builtins, POP_ALL, state.stack_match_pop()); + insert_builtin!(builtins, DROP, state.stack_drop()); + + insert_builtin!(builtins, ASCII_DIGIT, state.match_range('0'..'9')); + insert_builtin!(builtins, ASCII_NONZERO_DIGIT, state.match_range('1'..'9')); + insert_builtin!(builtins, ASCII_BIN_DIGIT, state.match_range('0'..'1')); + insert_builtin!(builtins, ASCII_OCT_DIGIT, state.match_range('0'..'7')); + insert_builtin!( + builtins, + ASCII_HEX_DIGIT, + state + .match_range('0'..'9') + .or_else(|state| state.match_range('a'..'f')) + .or_else(|state| state.match_range('A'..'F')) + ); + insert_builtin!(builtins, ASCII_ALPHA_LOWER, state.match_range('a'..'z')); + insert_builtin!(builtins, ASCII_ALPHA_UPPER, state.match_range('A'..'Z')); + insert_builtin!( + builtins, + ASCII_ALPHA, + state + .match_range('a'..'z') + .or_else(|state| state.match_range('A'..'Z')) + ); + insert_builtin!( + builtins, + ASCII_ALPHANUMERIC, + state + .match_range('a'..'z') + .or_else(|state| state.match_range('A'..'Z')) + .or_else(|state| state.match_range('0'..'9')) + ); + insert_builtin!(builtins, ASCII, state.match_range('\x00'..'\x7f')); + insert_builtin!( + builtins, + NEWLINE, + state + .match_string("\n") + .or_else(|state| state.match_string("\r\n")) + .or_else(|state| state.match_string("\r")) + ); + + for property in UNICODE_PROPERTY_NAMES { + let property_ident: Ident = syn::parse_str(property).unwrap(); + // insert manually for #property substitution + builtins.push((property, quote! { + #[inline] + #[allow(dead_code, non_snake_case, unused_variables)] + fn #property_ident(state: Box<::pest::ParserState<Rule>>) -> ::pest::ParseResult<Box<::pest::ParserState<Rule>>> { + state.match_char_by(::pest::unicode::#property_ident) + } + })); + } + builtins +} + +// Needed because Cargo doesn't watch for changes in grammars. +fn generate_include(name: &Ident, path: &str) -> TokenStream { + let const_name = Ident::new(&format!("_PEST_GRAMMAR_{}", name), Span::call_site()); + // Need to make this relative to the current directory since the path to the file + // is derived from the CARGO_MANIFEST_DIR environment variable + let mut current_dir = std::env::current_dir().expect("Unable to get current directory"); + current_dir.push(path); + let relative_path = current_dir.to_str().expect("path contains invalid unicode"); + quote! { + #[allow(non_upper_case_globals)] + const #const_name: &'static str = include_str!(#relative_path); + } +} + +fn generate_enum(rules: &[OptimizedRule], uses_eoi: bool) -> TokenStream { + let rules = rules + .iter() + .map(|rule| Ident::new(rule.name.as_str(), Span::call_site())); + if uses_eoi { + quote! { + #[allow(dead_code, non_camel_case_types)] + #[derive(Clone, Copy, Debug, Eq, Hash, Ord, PartialEq, PartialOrd)] + pub enum Rule { + EOI, + #( #rules ),* + } + } + } else { + quote! { + #[allow(dead_code, non_camel_case_types)] + #[derive(Clone, Copy, Debug, Eq, Hash, Ord, PartialEq, PartialOrd)] + pub enum Rule { + #( #rules ),* + } + } + } +} + +fn generate_patterns(rules: &[OptimizedRule], uses_eoi: bool) -> TokenStream { + let mut rules: Vec<TokenStream> = rules + .iter() + .map(|rule| { + let rule = Ident::new(rule.name.as_str(), Span::call_site()); + quote! { + Rule::#rule => rules::#rule(state) + } + }) + .collect(); + + if uses_eoi { + rules.push(quote! { + Rule::EOI => rules::EOI(state) + }); + } + + quote! { + #( #rules ),* + } +} + +fn generate_rule(rule: OptimizedRule) -> TokenStream { + let name = Ident::new(&rule.name, Span::call_site()); + let expr = if rule.ty == RuleType::Atomic || rule.ty == RuleType::CompoundAtomic { + generate_expr_atomic(rule.expr) + } else if name == "WHITESPACE" || name == "COMMENT" { + let atomic = generate_expr_atomic(rule.expr); + + quote! { + state.atomic(::pest::Atomicity::Atomic, |state| { + #atomic + }) + } + } else { + generate_expr(rule.expr) + }; + + match rule.ty { + RuleType::Normal => quote! { + #[inline] + #[allow(non_snake_case, unused_variables)] + pub fn #name(state: Box<::pest::ParserState<Rule>>) -> ::pest::ParseResult<Box<::pest::ParserState<Rule>>> { + state.rule(Rule::#name, |state| { + #expr + }) + } + }, + RuleType::Silent => quote! { + #[inline] + #[allow(non_snake_case, unused_variables)] + pub fn #name(state: Box<::pest::ParserState<Rule>>) -> ::pest::ParseResult<Box<::pest::ParserState<Rule>>> { + #expr + } + }, + RuleType::Atomic => quote! { + #[inline] + #[allow(non_snake_case, unused_variables)] + pub fn #name(state: Box<::pest::ParserState<Rule>>) -> ::pest::ParseResult<Box<::pest::ParserState<Rule>>> { + state.rule(Rule::#name, |state| { + state.atomic(::pest::Atomicity::Atomic, |state| { + #expr + }) + }) + } + }, + RuleType::CompoundAtomic => quote! { + #[inline] + #[allow(non_snake_case, unused_variables)] + pub fn #name(state: Box<::pest::ParserState<Rule>>) -> ::pest::ParseResult<Box<::pest::ParserState<Rule>>> { + state.atomic(::pest::Atomicity::CompoundAtomic, |state| { + state.rule(Rule::#name, |state| { + #expr + }) + }) + } + }, + RuleType::NonAtomic => quote! { + #[inline] + #[allow(non_snake_case, unused_variables)] + pub fn #name(state: Box<::pest::ParserState<Rule>>) -> ::pest::ParseResult<Box<::pest::ParserState<Rule>>> { + state.atomic(::pest::Atomicity::NonAtomic, |state| { + state.rule(Rule::#name, |state| { + #expr + }) + }) + } + }, + } +} + +fn generate_skip(rules: &[OptimizedRule]) -> TokenStream { + let whitespace = rules.iter().any(|rule| rule.name == "WHITESPACE"); + let comment = rules.iter().any(|rule| rule.name == "COMMENT"); + + match (whitespace, comment) { + (false, false) => generate_rule!(skip, Ok(state)), + (true, false) => generate_rule!( + skip, + if state.atomicity() == ::pest::Atomicity::NonAtomic { + state.repeat(|state| super::visible::WHITESPACE(state)) + } else { + Ok(state) + } + ), + (false, true) => generate_rule!( + skip, + if state.atomicity() == ::pest::Atomicity::NonAtomic { + state.repeat(|state| super::visible::COMMENT(state)) + } else { + Ok(state) + } + ), + (true, true) => generate_rule!( + skip, + if state.atomicity() == ::pest::Atomicity::NonAtomic { + state.sequence(|state| { + state + .repeat(|state| super::visible::WHITESPACE(state)) + .and_then(|state| { + state.repeat(|state| { + state.sequence(|state| { + super::visible::COMMENT(state).and_then(|state| { + state.repeat(|state| super::visible::WHITESPACE(state)) + }) + }) + }) + }) + }) + } else { + Ok(state) + } + ), + } +} + +fn generate_expr(expr: OptimizedExpr) -> TokenStream { + match expr { + OptimizedExpr::Str(string) => { + quote! { + state.match_string(#string) + } + } + OptimizedExpr::Insens(string) => { + quote! { + state.match_insensitive(#string) + } + } + OptimizedExpr::Range(start, end) => { + let start = start.chars().next().unwrap(); + let end = end.chars().next().unwrap(); + + quote! { + state.match_range(#start..#end) + } + } + OptimizedExpr::Ident(ident) => { + let ident = Ident::new(&ident, Span::call_site()); + quote! { self::#ident(state) } + } + OptimizedExpr::PeekSlice(start, end_) => { + let end = QuoteOption(end_); + quote! { + state.stack_match_peek_slice(#start, #end, ::pest::MatchDir::BottomToTop) + } + } + OptimizedExpr::PosPred(expr) => { + let expr = generate_expr(*expr); + + quote! { + state.lookahead(true, |state| { + #expr + }) + } + } + OptimizedExpr::NegPred(expr) => { + let expr = generate_expr(*expr); + + quote! { + state.lookahead(false, |state| { + #expr + }) + } + } + OptimizedExpr::Seq(lhs, rhs) => { + let head = generate_expr(*lhs); + let mut tail = vec![]; + let mut current = *rhs; + + while let OptimizedExpr::Seq(lhs, rhs) = current { + tail.push(generate_expr(*lhs)); + current = *rhs; + } + tail.push(generate_expr(current)); + + quote! { + state.sequence(|state| { + #head + #( + .and_then(|state| { + super::hidden::skip(state) + }).and_then(|state| { + #tail + }) + )* + }) + } + } + OptimizedExpr::Choice(lhs, rhs) => { + let head = generate_expr(*lhs); + let mut tail = vec![]; + let mut current = *rhs; + + while let OptimizedExpr::Choice(lhs, rhs) = current { + tail.push(generate_expr(*lhs)); + current = *rhs; + } + tail.push(generate_expr(current)); + + quote! { + #head + #( + .or_else(|state| { + #tail + }) + )* + } + } + OptimizedExpr::Opt(expr) => { + let expr = generate_expr(*expr); + + quote! { + state.optional(|state| { + #expr + }) + } + } + OptimizedExpr::Rep(expr) => { + let expr = generate_expr(*expr); + + quote! { + state.sequence(|state| { + state.optional(|state| { + #expr.and_then(|state| { + state.repeat(|state| { + state.sequence(|state| { + super::hidden::skip( + state + ).and_then(|state| { + #expr + }) + }) + }) + }) + }) + }) + } + } + OptimizedExpr::Skip(strings) => { + quote! { + let strings = [#(#strings),*]; + + state.skip_until(&strings) + } + } + OptimizedExpr::Push(expr) => { + let expr = generate_expr(*expr); + + quote! { + state.stack_push(|state| #expr) + } + } + OptimizedExpr::RestoreOnErr(expr) => { + let expr = generate_expr(*expr); + + quote! { + state.restore_on_err(|state| #expr) + } + } + } +} + +fn generate_expr_atomic(expr: OptimizedExpr) -> TokenStream { + match expr { + OptimizedExpr::Str(string) => { + quote! { + state.match_string(#string) + } + } + OptimizedExpr::Insens(string) => { + quote! { + state.match_insensitive(#string) + } + } + OptimizedExpr::Range(start, end) => { + let start = start.chars().next().unwrap(); + let end = end.chars().next().unwrap(); + + quote! { + state.match_range(#start..#end) + } + } + OptimizedExpr::Ident(ident) => { + let ident = Ident::new(&ident, Span::call_site()); + quote! { self::#ident(state) } + } + OptimizedExpr::PeekSlice(start, end_) => { + let end = QuoteOption(end_); + quote! { + state.stack_match_peek_slice(#start, #end, ::pest::MatchDir::BottomToTop) + } + } + OptimizedExpr::PosPred(expr) => { + let expr = generate_expr_atomic(*expr); + + quote! { + state.lookahead(true, |state| { + #expr + }) + } + } + OptimizedExpr::NegPred(expr) => { + let expr = generate_expr_atomic(*expr); + + quote! { + state.lookahead(false, |state| { + #expr + }) + } + } + OptimizedExpr::Seq(lhs, rhs) => { + let head = generate_expr_atomic(*lhs); + let mut tail = vec![]; + let mut current = *rhs; + + while let OptimizedExpr::Seq(lhs, rhs) = current { + tail.push(generate_expr_atomic(*lhs)); + current = *rhs; + } + tail.push(generate_expr_atomic(current)); + + quote! { + state.sequence(|state| { + #head + #( + .and_then(|state| { + #tail + }) + )* + }) + } + } + OptimizedExpr::Choice(lhs, rhs) => { + let head = generate_expr_atomic(*lhs); + let mut tail = vec![]; + let mut current = *rhs; + + while let OptimizedExpr::Choice(lhs, rhs) = current { + tail.push(generate_expr_atomic(*lhs)); + current = *rhs; + } + tail.push(generate_expr_atomic(current)); + + quote! { + #head + #( + .or_else(|state| { + #tail + }) + )* + } + } + OptimizedExpr::Opt(expr) => { + let expr = generate_expr_atomic(*expr); + + quote! { + state.optional(|state| { + #expr + }) + } + } + OptimizedExpr::Rep(expr) => { + let expr = generate_expr_atomic(*expr); + + quote! { + state.repeat(|state| { + #expr + }) + } + } + OptimizedExpr::Skip(strings) => { + quote! { + let strings = [#(#strings),*]; + + state.skip_until(&strings) + } + } + OptimizedExpr::Push(expr) => { + let expr = generate_expr_atomic(*expr); + + quote! { + state.stack_push(|state| #expr) + } + } + OptimizedExpr::RestoreOnErr(expr) => { + let expr = generate_expr_atomic(*expr); + + quote! { + state.restore_on_err(|state| #expr) + } + } + } +} + +struct QuoteOption<T>(Option<T>); + +impl<T: ToTokens> ToTokens for QuoteOption<T> { + fn to_tokens(&self, tokens: &mut TokenStream) { + tokens.append_all(match self.0 { + Some(ref t) => quote! { ::std::option::Option::Some(#t) }, + None => quote! { ::std::option::Option::None }, + }); + } +} + +#[cfg(test)] +mod tests { + use super::*; + + #[test] + fn rule_enum_simple() { + let rules = vec![OptimizedRule { + name: "f".to_owned(), + ty: RuleType::Normal, + expr: OptimizedExpr::Ident("g".to_owned()), + }]; + + assert_eq!( + generate_enum(&rules, false).to_string(), + quote! { + #[allow(dead_code, non_camel_case_types)] + #[derive(Clone, Copy, Debug, Eq, Hash, Ord, PartialEq, PartialOrd)] + pub enum Rule { + f + } + } + .to_string() + ); + } + + #[test] + fn sequence() { + let expr = OptimizedExpr::Seq( + Box::new(OptimizedExpr::Str("a".to_owned())), + Box::new(OptimizedExpr::Seq( + Box::new(OptimizedExpr::Str("b".to_owned())), + Box::new(OptimizedExpr::Seq( + Box::new(OptimizedExpr::Str("c".to_owned())), + Box::new(OptimizedExpr::Str("d".to_owned())), + )), + )), + ); + + assert_eq!( + generate_expr(expr).to_string(), + quote! { + state.sequence(|state| { + state.match_string("a").and_then(|state| { + super::hidden::skip(state) + }).and_then(|state| { + state.match_string("b") + }).and_then(|state| { + super::hidden::skip(state) + }).and_then(|state| { + state.match_string("c") + }).and_then(|state| { + super::hidden::skip(state) + }).and_then(|state| { + state.match_string("d") + }) + }) + } + .to_string() + ); + } + + #[test] + fn sequence_atomic() { + let expr = OptimizedExpr::Seq( + Box::new(OptimizedExpr::Str("a".to_owned())), + Box::new(OptimizedExpr::Seq( + Box::new(OptimizedExpr::Str("b".to_owned())), + Box::new(OptimizedExpr::Seq( + Box::new(OptimizedExpr::Str("c".to_owned())), + Box::new(OptimizedExpr::Str("d".to_owned())), + )), + )), + ); + + assert_eq!( + generate_expr_atomic(expr).to_string(), + quote! { + state.sequence(|state| { + state.match_string("a").and_then(|state| { + state.match_string("b") + }).and_then(|state| { + state.match_string("c") + }).and_then(|state| { + state.match_string("d") + }) + }) + } + .to_string() + ); + } + + #[test] + fn choice() { + let expr = OptimizedExpr::Choice( + Box::new(OptimizedExpr::Str("a".to_owned())), + Box::new(OptimizedExpr::Choice( + Box::new(OptimizedExpr::Str("b".to_owned())), + Box::new(OptimizedExpr::Choice( + Box::new(OptimizedExpr::Str("c".to_owned())), + Box::new(OptimizedExpr::Str("d".to_owned())), + )), + )), + ); + + assert_eq!( + generate_expr(expr).to_string(), + quote! { + state.match_string("a").or_else(|state| { + state.match_string("b") + }).or_else(|state| { + state.match_string("c") + }).or_else(|state| { + state.match_string("d") + }) + } + .to_string() + ); + } + + #[test] + fn choice_atomic() { + let expr = OptimizedExpr::Choice( + Box::new(OptimizedExpr::Str("a".to_owned())), + Box::new(OptimizedExpr::Choice( + Box::new(OptimizedExpr::Str("b".to_owned())), + Box::new(OptimizedExpr::Choice( + Box::new(OptimizedExpr::Str("c".to_owned())), + Box::new(OptimizedExpr::Str("d".to_owned())), + )), + )), + ); + + assert_eq!( + generate_expr_atomic(expr).to_string(), + quote! { + state.match_string("a").or_else(|state| { + state.match_string("b") + }).or_else(|state| { + state.match_string("c") + }).or_else(|state| { + state.match_string("d") + }) + } + .to_string() + ); + } + + #[test] + fn skip() { + let expr = OptimizedExpr::Skip(vec!["a".to_owned(), "b".to_owned()]); + + assert_eq!( + generate_expr_atomic(expr).to_string(), + quote! { + let strings = ["a", "b"]; + + state.skip_until(&strings) + } + .to_string() + ); + } + + #[test] + fn expr_complex() { + let expr = OptimizedExpr::Choice( + Box::new(OptimizedExpr::Ident("a".to_owned())), + Box::new(OptimizedExpr::Seq( + Box::new(OptimizedExpr::Range("a".to_owned(), "b".to_owned())), + Box::new(OptimizedExpr::Seq( + Box::new(OptimizedExpr::NegPred(Box::new(OptimizedExpr::Rep( + Box::new(OptimizedExpr::Insens("b".to_owned())), + )))), + Box::new(OptimizedExpr::PosPred(Box::new(OptimizedExpr::Opt( + Box::new(OptimizedExpr::Rep(Box::new(OptimizedExpr::Choice( + Box::new(OptimizedExpr::Str("c".to_owned())), + Box::new(OptimizedExpr::Str("d".to_owned())), + )))), + )))), + )), + )), + ); + + let sequence = quote! { + state.sequence(|state| { + super::hidden::skip(state).and_then( + |state| { + state.match_insensitive("b") + } + ) + }) + }; + let repeat = quote! { + state.repeat(|state| { + state.sequence(|state| { + super::hidden::skip(state).and_then(|state| { + state.match_string("c") + .or_else(|state| { + state.match_string("d") + }) + }) + }) + }) + }; + assert_eq!( + generate_expr(expr).to_string(), + quote! { + self::a(state).or_else(|state| { + state.sequence(|state| { + state.match_range('a'..'b').and_then(|state| { + super::hidden::skip(state) + }).and_then(|state| { + state.lookahead(false, |state| { + state.sequence(|state| { + state.optional(|state| { + state.match_insensitive( + "b" + ).and_then(|state| { + state.repeat(|state| { + #sequence + }) + }) + }) + }) + }) + }).and_then(|state| { + super::hidden::skip(state) + }).and_then(|state| { + state.lookahead(true, |state| { + state.optional(|state| { + state.sequence(|state| { + state.optional(|state| { + state.match_string("c") + .or_else(|state| { + state.match_string("d") + }).and_then(|state| { + #repeat + }) + }) + }) + }) + }) + }) + }) + }) + } + .to_string() + ); + } + + #[test] + fn expr_complex_atomic() { + let expr = OptimizedExpr::Choice( + Box::new(OptimizedExpr::Ident("a".to_owned())), + Box::new(OptimizedExpr::Seq( + Box::new(OptimizedExpr::Range("a".to_owned(), "b".to_owned())), + Box::new(OptimizedExpr::Seq( + Box::new(OptimizedExpr::NegPred(Box::new(OptimizedExpr::Rep( + Box::new(OptimizedExpr::Insens("b".to_owned())), + )))), + Box::new(OptimizedExpr::PosPred(Box::new(OptimizedExpr::Opt( + Box::new(OptimizedExpr::Rep(Box::new(OptimizedExpr::Choice( + Box::new(OptimizedExpr::Str("c".to_owned())), + Box::new(OptimizedExpr::Str("d".to_owned())), + )))), + )))), + )), + )), + ); + + assert_eq!( + generate_expr_atomic(expr).to_string(), + quote! { + self::a(state).or_else(|state| { + state.sequence(|state| { + state.match_range('a'..'b').and_then(|state| { + state.lookahead(false, |state| { + state.repeat(|state| { + state.match_insensitive("b") + }) + }) + }).and_then(|state| { + state.lookahead(true, |state| { + state.optional(|state| { + state.repeat(|state| { + state.match_string("c") + .or_else(|state| { + state.match_string("d") + }) + }) + }) + }) + }) + }) + }) + } + .to_string() + ); + } + + #[test] + fn generate_complete() { + let name = Ident::new("MyParser", Span::call_site()); + let generics = Generics::default(); + let rules = vec![OptimizedRule { + name: "a".to_owned(), + ty: RuleType::Silent, + expr: OptimizedExpr::Str("b".to_owned()), + }]; + let defaults = vec!["ANY"]; + let mut current_dir = std::env::current_dir().expect("Unable to get current directory"); + current_dir.push("test.pest"); + let test_path = current_dir.to_str().expect("path contains invalid unicode"); + assert_eq!( + generate(name, &generics, Some(PathBuf::from("test.pest")), rules, defaults, true).to_string(), + quote! { + #[allow(non_upper_case_globals)] + const _PEST_GRAMMAR_MyParser: &'static str = include_str!(#test_path); + + #[allow(dead_code, non_camel_case_types)] + #[derive(Clone, Copy, Debug, Eq, Hash, Ord, PartialEq, PartialOrd)] + pub enum Rule { + a + } + + #[allow(clippy::all)] + impl ::pest::Parser<Rule> for MyParser { + fn parse<'i>( + rule: Rule, + input: &'i str + ) -> ::std::result::Result< + ::pest::iterators::Pairs<'i, Rule>, + ::pest::error::Error<Rule> + > { + mod rules { + pub mod hidden { + use super::super::Rule; + + #[inline] + #[allow(dead_code, non_snake_case, unused_variables)] + pub fn skip(state: Box<::pest::ParserState<Rule>>) -> ::pest::ParseResult<Box<::pest::ParserState<Rule>>> { + Ok(state) + } + } + + pub mod visible { + use super::super::Rule; + + #[inline] + #[allow(non_snake_case, unused_variables)] + pub fn a(state: Box<::pest::ParserState<Rule>>) -> ::pest::ParseResult<Box<::pest::ParserState<Rule>>> { + state.match_string("b") + } + + #[inline] + #[allow(dead_code, non_snake_case, unused_variables)] + pub fn ANY(state: Box<::pest::ParserState<Rule>>) -> ::pest::ParseResult<Box<::pest::ParserState<Rule>>> { + state.skip(1) + } + } + + pub use self::visible::*; + } + + ::pest::state(input, |state| { + match rule { + Rule::a => rules::a(state) + } + }) + } + } + }.to_string() + ); + } +} diff --git a/vendor/pest_generator/src/lib.rs b/vendor/pest_generator/src/lib.rs new file mode 100644 index 000000000..27b4d8168 --- /dev/null +++ b/vendor/pest_generator/src/lib.rs @@ -0,0 +1,208 @@ +// pest. The Elegant Parser +// Copyright (c) 2018 Dragoș Tiselice +// +// Licensed under the Apache License, Version 2.0 +// <LICENSE-APACHE or http://www.apache.org/licenses/LICENSE-2.0> or the MIT +// license <LICENSE-MIT or http://opensource.org/licenses/MIT>, at your +// option. All files in the project carrying such notice may not be copied, +// modified, or distributed except according to those terms. + +#![doc(html_root_url = "https://docs.rs/pest_derive")] +#![recursion_limit = "256"] + +extern crate pest; +extern crate pest_meta; + +extern crate proc_macro; +extern crate proc_macro2; +#[macro_use] +extern crate quote; +extern crate syn; + +use std::env; +use std::fs::File; +use std::io::{self, Read}; +use std::path::Path; + +use proc_macro2::TokenStream; +use syn::{Attribute, DeriveInput, Generics, Ident, Lit, Meta}; + +#[macro_use] +mod macros; +mod generator; + +use pest_meta::parser::{self, Rule}; +use pest_meta::{optimizer, unwrap_or_report, validator}; + +pub fn derive_parser(input: TokenStream, include_grammar: bool) -> TokenStream { + let ast: DeriveInput = syn::parse2(input).unwrap(); + let (name, generics, content) = parse_derive(ast); + + let (data, path) = match content { + GrammarSource::File(ref path) => { + let root = env::var("CARGO_MANIFEST_DIR").unwrap_or_else(|_| ".".into()); + let path = Path::new(&root).join("src/").join(&path); + let file_name = match path.file_name() { + Some(file_name) => file_name, + None => panic!("grammar attribute should point to a file"), + }; + + let data = match read_file(&path) { + Ok(data) => data, + Err(error) => panic!("error opening {:?}: {}", file_name, error), + }; + (data, Some(path.clone())) + } + GrammarSource::Inline(content) => (content, None), + }; + + let pairs = match parser::parse(Rule::grammar_rules, &data) { + Ok(pairs) => pairs, + Err(error) => panic!( + "error parsing \n{}", + error.renamed_rules(|rule| match *rule { + Rule::grammar_rule => "rule".to_owned(), + Rule::_push => "PUSH".to_owned(), + Rule::assignment_operator => "`=`".to_owned(), + Rule::silent_modifier => "`_`".to_owned(), + Rule::atomic_modifier => "`@`".to_owned(), + Rule::compound_atomic_modifier => "`$`".to_owned(), + Rule::non_atomic_modifier => "`!`".to_owned(), + Rule::opening_brace => "`{`".to_owned(), + Rule::closing_brace => "`}`".to_owned(), + Rule::opening_brack => "`[`".to_owned(), + Rule::closing_brack => "`]`".to_owned(), + Rule::opening_paren => "`(`".to_owned(), + Rule::positive_predicate_operator => "`&`".to_owned(), + Rule::negative_predicate_operator => "`!`".to_owned(), + Rule::sequence_operator => "`&`".to_owned(), + Rule::choice_operator => "`|`".to_owned(), + Rule::optional_operator => "`?`".to_owned(), + Rule::repeat_operator => "`*`".to_owned(), + Rule::repeat_once_operator => "`+`".to_owned(), + Rule::comma => "`,`".to_owned(), + Rule::closing_paren => "`)`".to_owned(), + Rule::quote => "`\"`".to_owned(), + Rule::insensitive_string => "`^`".to_owned(), + Rule::range_operator => "`..`".to_owned(), + Rule::single_quote => "`'`".to_owned(), + other_rule => format!("{:?}", other_rule), + }) + ), + }; + + let defaults = unwrap_or_report(validator::validate_pairs(pairs.clone())); + let ast = unwrap_or_report(parser::consume_rules(pairs)); + let optimized = optimizer::optimize(ast); + + generator::generate(name, &generics, path, optimized, defaults, include_grammar) +} + +fn read_file<P: AsRef<Path>>(path: P) -> io::Result<String> { + let mut file = File::open(path.as_ref())?; + let mut string = String::new(); + file.read_to_string(&mut string)?; + Ok(string) +} + +#[derive(Debug, PartialEq)] +enum GrammarSource { + File(String), + Inline(String), +} + +fn parse_derive(ast: DeriveInput) -> (Ident, Generics, GrammarSource) { + let name = ast.ident; + let generics = ast.generics; + + let grammar: Vec<&Attribute> = ast + .attrs + .iter() + .filter(|attr| match attr.parse_meta() { + Ok(Meta::NameValue(name_value)) => { + name_value.path.is_ident("grammar") || name_value.path.is_ident("grammar_inline") + } + _ => false, + }) + .collect(); + + let argument = match grammar.len() { + 0 => panic!("a grammar file needs to be provided with the #[grammar = \"PATH\"] or #[grammar_inline = \"GRAMMAR CONTENTS\"] attribute"), + 1 => get_attribute(grammar[0]), + _ => panic!("only 1 grammar file can be provided"), + }; + + (name, generics, argument) +} + +fn get_attribute(attr: &Attribute) -> GrammarSource { + match attr.parse_meta() { + Ok(Meta::NameValue(name_value)) => match name_value.lit { + Lit::Str(string) => { + if name_value.path.is_ident("grammar") { + GrammarSource::File(string.value()) + } else { + GrammarSource::Inline(string.value()) + } + } + _ => panic!("grammar attribute must be a string"), + }, + _ => panic!("grammar attribute must be of the form `grammar = \"...\"`"), + } +} + +#[cfg(test)] +mod tests { + use super::parse_derive; + use super::GrammarSource; + use syn; + + #[test] + fn derive_inline_file() { + let definition = " + #[other_attr] + #[grammar_inline = \"GRAMMAR\"] + pub struct MyParser<'a, T>; + "; + let ast = syn::parse_str(definition).unwrap(); + let (_, _, filename) = parse_derive(ast); + assert_eq!(filename, GrammarSource::Inline("GRAMMAR".to_string())); + } + + #[test] + fn derive_ok() { + let definition = " + #[other_attr] + #[grammar = \"myfile.pest\"] + pub struct MyParser<'a, T>; + "; + let ast = syn::parse_str(definition).unwrap(); + let (_, _, filename) = parse_derive(ast); + assert_eq!(filename, GrammarSource::File("myfile.pest".to_string())); + } + + #[test] + #[should_panic(expected = "only 1 grammar file can be provided")] + fn derive_multiple_grammars() { + let definition = " + #[other_attr] + #[grammar = \"myfile1.pest\"] + #[grammar = \"myfile2.pest\"] + pub struct MyParser<'a, T>; + "; + let ast = syn::parse_str(definition).unwrap(); + parse_derive(ast); + } + + #[test] + #[should_panic(expected = "grammar attribute must be a string")] + fn derive_wrong_arg() { + let definition = " + #[other_attr] + #[grammar = 1] + pub struct MyParser<'a, T>; + "; + let ast = syn::parse_str(definition).unwrap(); + parse_derive(ast); + } +} diff --git a/vendor/pest_generator/src/macros.rs b/vendor/pest_generator/src/macros.rs new file mode 100644 index 000000000..9d02725da --- /dev/null +++ b/vendor/pest_generator/src/macros.rs @@ -0,0 +1,44 @@ +// pest. The Elegant Parser +// Copyright (c) 2018 Dragoș Tiselice +// +// Licensed under the Apache License, Version 2.0 +// <LICENSE-APACHE or http://www.apache.org/licenses/LICENSE-2.0> or the MIT +// license <LICENSE-MIT or http://opensource.org/licenses/MIT>, at your +// option. All files in the project carrying such notice may not be copied, +// modified, or distributed except according to those terms. + +macro_rules! insert_builtin { + ($builtin: expr, $name: ident, $pattern: expr) => { + $builtin.push((stringify!($name), generate_rule!($name, $pattern))); + }; +} + +macro_rules! insert_public_builtin { + ($builtin: expr, $name: ident, $pattern: expr) => { + $builtin.push((stringify!($name), generate_public_rule!($name, $pattern))); + }; +} + +macro_rules! generate_rule { + ($name: ident, $pattern: expr) => { + quote! { + #[inline] + #[allow(dead_code, non_snake_case, unused_variables)] + pub fn $name(state: Box<::pest::ParserState<Rule>>) -> ::pest::ParseResult<Box<::pest::ParserState<Rule>>> { + $pattern + } + } + } +} + +macro_rules! generate_public_rule { + ($name: ident, $pattern: expr) => { + quote! { + #[inline] + #[allow(dead_code, non_snake_case, unused_variables)] + pub fn $name(state: Box<::pest::ParserState<Rule>>) -> ::pest::ParseResult<Box<::pest::ParserState<Rule>>> { + $pattern + } + } + } +} |