From 9835e2ae736235810b4ea1c162ca5e65c547e770 Mon Sep 17 00:00:00 2001 From: Daniel Baumann Date: Sat, 18 May 2024 04:49:50 +0200 Subject: Merging upstream version 1.71.1+dfsg1. Signed-off-by: Daniel Baumann --- vendor/pest_generator/src/docs.rs | 122 +++++++++++++++++++++ vendor/pest_generator/src/generator.rs | 104 +++++++++++++----- vendor/pest_generator/src/lib.rs | 189 +++++++++++++++++++++++---------- 3 files changed, 336 insertions(+), 79 deletions(-) create mode 100644 vendor/pest_generator/src/docs.rs (limited to 'vendor/pest_generator/src') diff --git a/vendor/pest_generator/src/docs.rs b/vendor/pest_generator/src/docs.rs new file mode 100644 index 000000000..f1ce1881a --- /dev/null +++ b/vendor/pest_generator/src/docs.rs @@ -0,0 +1,122 @@ +use pest::iterators::Pairs; +use pest_meta::parser::Rule; +use std::collections::HashMap; + +#[derive(Debug)] +pub(crate) struct DocComment { + pub grammar_doc: String, + + /// HashMap for store all doc_comments for rules. + /// key is rule name, value is doc_comment. + pub line_docs: HashMap, +} + +/// Consume pairs to matches `Rule::grammar_doc`, `Rule::line_doc` into `DocComment` +/// +/// e.g. +/// +/// a pest file: +/// +/// ```ignore +/// //! This is a grammar doc +/// /// line doc 1 +/// /// line doc 2 +/// foo = {} +/// +/// /// line doc 3 +/// bar = {} +/// ``` +/// +/// Then will get: +/// +/// ```ignore +/// grammar_doc = "This is a grammar doc" +/// line_docs = { "foo": "line doc 1\nline doc 2", "bar": "line doc 3" } +/// ``` +pub(crate) fn consume(pairs: Pairs<'_, Rule>) -> DocComment { + let mut grammar_doc = String::new(); + + let mut line_docs: HashMap = HashMap::new(); + let mut line_doc = String::new(); + + for pair in pairs { + match pair.as_rule() { + Rule::grammar_doc => { + // grammar_doc > inner_doc + let inner_doc = pair.into_inner().next().unwrap(); + grammar_doc.push_str(inner_doc.as_str()); + grammar_doc.push('\n'); + } + Rule::grammar_rule => { + if let Some(inner) = pair.into_inner().next() { + // grammar_rule > line_doc | identifier + match inner.as_rule() { + Rule::line_doc => { + if let Some(inner_doc) = inner.into_inner().next() { + line_doc.push_str(inner_doc.as_str()); + line_doc.push('\n'); + } + } + Rule::identifier => { + if !line_doc.is_empty() { + let rule_name = inner.as_str().to_owned(); + + // Remove last \n + line_doc.pop(); + line_docs.insert(rule_name, line_doc.clone()); + line_doc.clear(); + } + } + _ => (), + } + } + } + _ => (), + } + } + + if !grammar_doc.is_empty() { + // Remove last \n + grammar_doc.pop(); + } + + DocComment { + grammar_doc, + line_docs, + } +} + +#[cfg(test)] +mod tests { + use std::collections::HashMap; + + use pest_meta::parser; + use pest_meta::parser::Rule; + + #[test] + fn test_doc_comment() { + let pairs = match parser::parse(Rule::grammar_rules, include_str!("../tests/test.pest")) { + Ok(pairs) => pairs, + Err(_) => panic!("error parsing tests/test.pest"), + }; + + let doc_comment = super::consume(pairs); + + let mut expected = HashMap::new(); + expected.insert("foo".to_owned(), "Matches foo str, e.g.: `foo`".to_owned()); + expected.insert( + "bar".to_owned(), + "Matches bar str\n\n Indent 2, e.g: `bar` or `foobar`".to_owned(), + ); + expected.insert( + "dar".to_owned(), + "Matches dar\n\nMatch dar description\n".to_owned(), + ); + assert_eq!(expected, doc_comment.line_docs); + + assert_eq!( + "A parser for JSON file.\nAnd this is a example for JSON parser.\n\n indent-4-space\n", + doc_comment.grammar_doc + ); + } +} diff --git a/vendor/pest_generator/src/generator.rs b/vendor/pest_generator/src/generator.rs index fc1263d86..0dbcaa310 100644 --- a/vendor/pest_generator/src/generator.rs +++ b/vendor/pest_generator/src/generator.rs @@ -17,26 +17,26 @@ use pest::unicode::unicode_property_names; use pest_meta::ast::*; use pest_meta::optimizer::*; -pub fn generate( +use crate::docs::DocComment; + +pub(crate) fn generate( name: Ident, generics: &Generics, - path: Option, + paths: Vec, rules: Vec, defaults: Vec<&str>, + doc_comment: &DocComment, include_grammar: bool, ) -> TokenStream { let uses_eoi = defaults.iter().any(|name| *name == "EOI"); let builtins = generate_builtin_rules(); let include_fix = if include_grammar { - match path { - Some(ref path) => generate_include(&name, path.to_str().expect("non-Unicode path")), - None => quote!(), - } + generate_include(&name, paths) } else { quote!() }; - let rule_enum = generate_enum(&rules, uses_eoi); + let rule_enum = generate_enum(&rules, doc_comment, uses_eoi); let patterns = generate_patterns(&rules, uses_eoi); let skip = generate_skip(&rules); @@ -167,24 +167,55 @@ fn generate_builtin_rules() -> Vec<(&'static str, TokenStream)> { builtins } -// Needed because Cargo doesn't watch for changes in grammars. -fn generate_include(name: &Ident, path: &str) -> TokenStream { +/// Generate Rust `include_str!` for grammar files, then Cargo will watch changes in grammars. +fn generate_include(name: &Ident, paths: Vec) -> TokenStream { let const_name = format_ident!("_PEST_GRAMMAR_{}", name); // Need to make this relative to the current directory since the path to the file // is derived from the CARGO_MANIFEST_DIR environment variable - let mut current_dir = std::env::current_dir().expect("Unable to get current directory"); - current_dir.push(path); - let relative_path = current_dir.to_str().expect("path contains invalid unicode"); + let current_dir = std::env::current_dir().expect("Unable to get current directory"); + + let include_tokens = paths.iter().map(|path| { + let path = path.to_str().expect("non-Unicode path"); + + let relative_path = current_dir + .join(path) + .to_str() + .expect("path contains invalid unicode") + .to_string(); + + quote! { + include_str!(#relative_path) + } + }); + + let len = include_tokens.len(); quote! { #[allow(non_upper_case_globals)] - const #const_name: &'static str = include_str!(#relative_path); + const #const_name: [&'static str; #len] = [ + #(#include_tokens),* + ]; } } -fn generate_enum(rules: &[OptimizedRule], uses_eoi: bool) -> TokenStream { - let rules = rules.iter().map(|rule| format_ident!("r#{}", rule.name)); +fn generate_enum(rules: &[OptimizedRule], doc_comment: &DocComment, uses_eoi: bool) -> TokenStream { + let rules = rules.iter().map(|rule| { + let rule_name = format_ident!("r#{}", rule.name); + + match doc_comment.line_docs.get(&rule.name) { + Some(doc) => quote! { + #[doc = #doc] + #rule_name + }, + None => quote! { + #rule_name + }, + } + }); + + let grammar_doc = &doc_comment.grammar_doc; if uses_eoi { quote! { + #[doc = #grammar_doc] #[allow(dead_code, non_camel_case_types, clippy::upper_case_acronyms)] #[derive(Clone, Copy, Debug, Eq, Hash, Ord, PartialEq, PartialOrd)] pub enum Rule { @@ -194,6 +225,7 @@ fn generate_enum(rules: &[OptimizedRule], uses_eoi: bool) -> TokenStream { } } else { quote! { + #[doc = #grammar_doc] #[allow(dead_code, non_camel_case_types, clippy::upper_case_acronyms)] #[derive(Clone, Copy, Debug, Eq, Hash, Ord, PartialEq, PartialOrd)] pub enum Rule { @@ -208,6 +240,7 @@ fn generate_patterns(rules: &[OptimizedRule], uses_eoi: bool) -> TokenStream { .iter() .map(|rule| { let rule = format_ident!("r#{}", rule.name); + quote! { Rule::#rule => rules::#rule(state) } @@ -657,10 +690,11 @@ fn option_type() -> TokenStream { #[cfg(test)] mod tests { - use proc_macro2::Span; - use super::*; + use proc_macro2::Span; + use std::collections::HashMap; + #[test] fn rule_enum_simple() { let rules = vec![OptimizedRule { @@ -669,12 +703,22 @@ mod tests { expr: OptimizedExpr::Ident("g".to_owned()), }]; + let mut line_docs = HashMap::new(); + line_docs.insert("f".to_owned(), "This is rule comment".to_owned()); + + let doc_comment = &DocComment { + grammar_doc: "Rule doc\nhello".to_owned(), + line_docs, + }; + assert_eq!( - generate_enum(&rules, false).to_string(), + generate_enum(&rules, doc_comment, false).to_string(), quote! { + #[doc = "Rule doc\nhello"] #[allow(dead_code, non_camel_case_types, clippy::upper_case_acronyms)] #[derive(Clone, Copy, Debug, Eq, Hash, Ord, PartialEq, PartialOrd)] pub enum Rule { + #[doc = "This is rule comment"] r#f } } @@ -957,7 +1001,7 @@ mod tests { } #[test] - fn generate_complete() { + fn test_generate_complete() { let name = Ident::new("MyParser", Span::call_site()); let generics = Generics::default(); @@ -974,22 +1018,34 @@ mod tests { }, ]; + let mut line_docs = HashMap::new(); + line_docs.insert("if".to_owned(), "If statement".to_owned()); + + let doc_comment = &DocComment { + line_docs, + grammar_doc: "This is Rule doc\nThis is second line".to_owned(), + }; + let defaults = vec!["ANY"]; let result = result_type(); let box_ty = box_type(); - let mut current_dir = std::env::current_dir().expect("Unable to get current directory"); - current_dir.push("test.pest"); - let test_path = current_dir.to_str().expect("path contains invalid unicode"); + let current_dir = std::env::current_dir().expect("Unable to get current directory"); + + let base_path = current_dir.join("base.pest").to_str().unwrap().to_string(); + let test_path = current_dir.join("test.pest").to_str().unwrap().to_string(); + assert_eq!( - generate(name, &generics, Some(PathBuf::from("test.pest")), rules, defaults, true).to_string(), + generate(name, &generics, vec![PathBuf::from("base.pest"), PathBuf::from("test.pest")], rules, defaults, doc_comment, true).to_string(), quote! { #[allow(non_upper_case_globals)] - const _PEST_GRAMMAR_MyParser: &'static str = include_str!(#test_path); + const _PEST_GRAMMAR_MyParser: [&'static str; 2usize] = [include_str!(#base_path), include_str!(#test_path)]; + #[doc = "This is Rule doc\nThis is second line"] #[allow(dead_code, non_camel_case_types, clippy::upper_case_acronyms)] #[derive(Clone, Copy, Debug, Eq, Hash, Ord, PartialEq, PartialOrd)] pub enum Rule { r#a, + #[doc = "If statement"] r#if } diff --git a/vendor/pest_generator/src/lib.rs b/vendor/pest_generator/src/lib.rs index 2a1203e4a..7aed1936f 100644 --- a/vendor/pest_generator/src/lib.rs +++ b/vendor/pest_generator/src/lib.rs @@ -27,10 +27,11 @@ use std::io::{self, Read}; use std::path::Path; use proc_macro2::TokenStream; -use syn::{Attribute, DeriveInput, Generics, Ident, Lit, Meta}; +use syn::{Attribute, DeriveInput, Expr, ExprLit, Generics, Ident, Lit, Meta}; #[macro_use] mod macros; +mod docs; mod generator; use pest_meta::parser::{self, rename_meta_rule, Rule}; @@ -41,39 +42,50 @@ use pest_meta::{optimizer, unwrap_or_report, validator}; /// "include_str" statement (done in pest_derive, but turned off in the local bootstrap). pub fn derive_parser(input: TokenStream, include_grammar: bool) -> TokenStream { let ast: DeriveInput = syn::parse2(input).unwrap(); - let (name, generics, content) = parse_derive(ast); - - let (data, path) = match content { - GrammarSource::File(ref path) => { - let root = env::var("CARGO_MANIFEST_DIR").unwrap_or_else(|_| ".".into()); - - // Check whether we can find a file at the path relative to the CARGO_MANIFEST_DIR - // first. - // - // If we cannot find the expected file over there, fallback to the - // `CARGO_MANIFEST_DIR/src`, which is the old default and kept for convenience - // reasons. - // TODO: This could be refactored once `std::path::absolute()` get's stabilized. - // https://doc.rust-lang.org/std/path/fn.absolute.html - let path = if Path::new(&root).join(path).exists() { - Path::new(&root).join(path) - } else { - Path::new(&root).join("src/").join(path) - }; - - let file_name = match path.file_name() { - Some(file_name) => file_name, - None => panic!("grammar attribute should point to a file"), - }; - - let data = match read_file(&path) { - Ok(data) => data, - Err(error) => panic!("error opening {:?}: {}", file_name, error), - }; - (data, Some(path.clone())) + let (name, generics, contents) = parse_derive(ast); + + let mut data = String::new(); + let mut paths = vec![]; + + for content in contents { + let (_data, _path) = match content { + GrammarSource::File(ref path) => { + let root = env::var("CARGO_MANIFEST_DIR").unwrap_or_else(|_| ".".into()); + + // Check whether we can find a file at the path relative to the CARGO_MANIFEST_DIR + // first. + // + // If we cannot find the expected file over there, fallback to the + // `CARGO_MANIFEST_DIR/src`, which is the old default and kept for convenience + // reasons. + // TODO: This could be refactored once `std::path::absolute()` get's stabilized. + // https://doc.rust-lang.org/std/path/fn.absolute.html + let path = if Path::new(&root).join(path).exists() { + Path::new(&root).join(path) + } else { + Path::new(&root).join("src/").join(path) + }; + + let file_name = match path.file_name() { + Some(file_name) => file_name, + None => panic!("grammar attribute should point to a file"), + }; + + let data = match read_file(&path) { + Ok(data) => data, + Err(error) => panic!("error opening {:?}: {}", file_name, error), + }; + (data, Some(path.clone())) + } + GrammarSource::Inline(content) => (content, None), + }; + + data.push_str(&_data); + match _path { + Some(path) => paths.push(path), + None => (), } - GrammarSource::Inline(content) => (content, None), - }; + } let pairs = match parser::parse(Rule::grammar_rules, &data) { Ok(pairs) => pairs, @@ -81,10 +93,19 @@ pub fn derive_parser(input: TokenStream, include_grammar: bool) -> TokenStream { }; let defaults = unwrap_or_report(validator::validate_pairs(pairs.clone())); + let doc_comment = docs::consume(pairs.clone()); let ast = unwrap_or_report(parser::consume_rules(pairs)); let optimized = optimizer::optimize(ast); - generator::generate(name, &generics, path, optimized, defaults, include_grammar) + generator::generate( + name, + &generics, + paths, + optimized, + defaults, + &doc_comment, + include_grammar, + ) } fn read_file>(path: P) -> io::Result { @@ -100,34 +121,38 @@ enum GrammarSource { Inline(String), } -fn parse_derive(ast: DeriveInput) -> (Ident, Generics, GrammarSource) { +fn parse_derive(ast: DeriveInput) -> (Ident, Generics, Vec) { let name = ast.ident; let generics = ast.generics; let grammar: Vec<&Attribute> = ast .attrs .iter() - .filter(|attr| match attr.parse_meta() { - Ok(Meta::NameValue(name_value)) => { - name_value.path.is_ident("grammar") || name_value.path.is_ident("grammar_inline") - } - _ => false, + .filter(|attr| { + let path = attr.meta.path(); + path.is_ident("grammar") || path.is_ident("grammar_inline") }) .collect(); - let argument = match grammar.len() { - 0 => panic!("a grammar file needs to be provided with the #[grammar = \"PATH\"] or #[grammar_inline = \"GRAMMAR CONTENTS\"] attribute"), - 1 => get_attribute(grammar[0]), - _ => panic!("only 1 grammar file can be provided"), - }; + if grammar.is_empty() { + panic!("a grammar file needs to be provided with the #[grammar = \"PATH\"] or #[grammar_inline = \"GRAMMAR CONTENTS\"] attribute"); + } - (name, generics, argument) + let mut grammar_sources = Vec::with_capacity(grammar.len()); + for attr in grammar { + grammar_sources.push(get_attribute(attr)) + } + + (name, generics, grammar_sources) } fn get_attribute(attr: &Attribute) -> GrammarSource { - match attr.parse_meta() { - Ok(Meta::NameValue(name_value)) => match name_value.lit { - Lit::Str(string) => { + match &attr.meta { + Meta::NameValue(name_value) => match &name_value.value { + Expr::Lit(ExprLit { + lit: Lit::Str(string), + .. + }) => { if name_value.path.is_ident("grammar") { GrammarSource::File(string.value()) } else { @@ -153,8 +178,8 @@ mod tests { pub struct MyParser<'a, T>; "; let ast = syn::parse_str(definition).unwrap(); - let (_, _, filename) = parse_derive(ast); - assert_eq!(filename, GrammarSource::Inline("GRAMMAR".to_string())); + let (_, _, filenames) = parse_derive(ast); + assert_eq!(filenames, [GrammarSource::Inline("GRAMMAR".to_string())]); } #[test] @@ -165,12 +190,11 @@ mod tests { pub struct MyParser<'a, T>; "; let ast = syn::parse_str(definition).unwrap(); - let (_, _, filename) = parse_derive(ast); - assert_eq!(filename, GrammarSource::File("myfile.pest".to_string())); + let (_, _, filenames) = parse_derive(ast); + assert_eq!(filenames, [GrammarSource::File("myfile.pest".to_string())]); } #[test] - #[should_panic(expected = "only 1 grammar file can be provided")] fn derive_multiple_grammars() { let definition = " #[other_attr] @@ -179,7 +203,14 @@ mod tests { pub struct MyParser<'a, T>; "; let ast = syn::parse_str(definition).unwrap(); - parse_derive(ast); + let (_, _, filenames) = parse_derive(ast); + assert_eq!( + filenames, + [ + GrammarSource::File("myfile1.pest".to_string()), + GrammarSource::File("myfile2.pest".to_string()) + ] + ); } #[test] @@ -193,4 +224,52 @@ mod tests { let ast = syn::parse_str(definition).unwrap(); parse_derive(ast); } + + #[test] + #[should_panic( + expected = "a grammar file needs to be provided with the #[grammar = \"PATH\"] or #[grammar_inline = \"GRAMMAR CONTENTS\"] attribute" + )] + fn derive_no_grammar() { + let definition = " + #[other_attr] + pub struct MyParser<'a, T>; + "; + let ast = syn::parse_str(definition).unwrap(); + parse_derive(ast); + } + + #[doc = "Matches dar\n\nMatch dar description\n"] + #[test] + fn test_generate_doc() { + let input = quote! { + #[derive(Parser)] + #[grammar = "../tests/test.pest"] + pub struct TestParser; + }; + + let token = super::derive_parser(input, true); + + let expected = quote! { + #[doc = "A parser for JSON file.\nAnd this is a example for JSON parser.\n\n indent-4-space\n"] + #[allow(dead_code, non_camel_case_types, clippy::upper_case_acronyms)] + #[derive(Clone, Copy, Debug, Eq, Hash, Ord, PartialEq, PartialOrd)] + + pub enum Rule { + #[doc = "Matches foo str, e.g.: `foo`"] + r#foo, + #[doc = "Matches bar str\n\n Indent 2, e.g: `bar` or `foobar`"] + r#bar, + r#bar1, + #[doc = "Matches dar\n\nMatch dar description\n"] + r#dar + } + }; + + assert!( + token.to_string().contains(expected.to_string().as_str()), + "{}\n\nExpected to contains:\n{}", + token, + expected + ); + } } -- cgit v1.2.3