From 698f8c2f01ea549d77d7dc3338a12e04c11057b9 Mon Sep 17 00:00:00 2001 From: Daniel Baumann Date: Wed, 17 Apr 2024 14:02:58 +0200 Subject: Adding upstream version 1.64.0+dfsg1. Signed-off-by: Daniel Baumann --- vendor/pest_meta/src/validator.rs | 851 ++++++++++++++++++++++++++++++++++++++ 1 file changed, 851 insertions(+) create mode 100644 vendor/pest_meta/src/validator.rs (limited to 'vendor/pest_meta/src/validator.rs') diff --git a/vendor/pest_meta/src/validator.rs b/vendor/pest_meta/src/validator.rs new file mode 100644 index 000000000..a358dbeec --- /dev/null +++ b/vendor/pest_meta/src/validator.rs @@ -0,0 +1,851 @@ +// pest. The Elegant Parser +// Copyright (c) 2018 DragoČ™ Tiselice +// +// Licensed under the Apache License, Version 2.0 +// or the MIT +// license , at your +// option. All files in the project carrying such notice may not be copied, +// modified, or distributed except according to those terms. + +use std::collections::{HashMap, HashSet}; + +use pest::error::{Error, ErrorVariant, InputLocation}; +use pest::iterators::Pairs; +use pest::Span; + +use parser::{ParserExpr, ParserNode, ParserRule, Rule}; +use UNICODE_PROPERTY_NAMES; + +#[allow(clippy::needless_pass_by_value)] +pub fn validate_pairs<'i>(pairs: Pairs<'i, Rule>) -> Result, Vec>> { + let mut rust_keywords = HashSet::new(); + rust_keywords.insert("abstract"); + rust_keywords.insert("alignof"); + rust_keywords.insert("as"); + rust_keywords.insert("become"); + rust_keywords.insert("box"); + rust_keywords.insert("break"); + rust_keywords.insert("const"); + rust_keywords.insert("continue"); + rust_keywords.insert("crate"); + rust_keywords.insert("do"); + rust_keywords.insert("else"); + rust_keywords.insert("enum"); + rust_keywords.insert("extern"); + rust_keywords.insert("false"); + rust_keywords.insert("final"); + rust_keywords.insert("fn"); + rust_keywords.insert("for"); + rust_keywords.insert("if"); + rust_keywords.insert("impl"); + rust_keywords.insert("in"); + rust_keywords.insert("let"); + rust_keywords.insert("loop"); + rust_keywords.insert("macro"); + rust_keywords.insert("match"); + rust_keywords.insert("mod"); + rust_keywords.insert("move"); + rust_keywords.insert("mut"); + rust_keywords.insert("offsetof"); + rust_keywords.insert("override"); + rust_keywords.insert("priv"); + rust_keywords.insert("proc"); + rust_keywords.insert("pure"); + rust_keywords.insert("pub"); + rust_keywords.insert("ref"); + rust_keywords.insert("return"); + rust_keywords.insert("Self"); + rust_keywords.insert("self"); + rust_keywords.insert("sizeof"); + rust_keywords.insert("static"); + rust_keywords.insert("struct"); + rust_keywords.insert("super"); + rust_keywords.insert("trait"); + rust_keywords.insert("true"); + rust_keywords.insert("type"); + rust_keywords.insert("typeof"); + rust_keywords.insert("unsafe"); + rust_keywords.insert("unsized"); + rust_keywords.insert("use"); + rust_keywords.insert("virtual"); + rust_keywords.insert("where"); + rust_keywords.insert("while"); + rust_keywords.insert("yield"); + + let mut pest_keywords = HashSet::new(); + pest_keywords.insert("_"); + pest_keywords.insert("ANY"); + pest_keywords.insert("DROP"); + pest_keywords.insert("EOI"); + pest_keywords.insert("PEEK"); + pest_keywords.insert("PEEK_ALL"); + pest_keywords.insert("POP"); + pest_keywords.insert("POP_ALL"); + pest_keywords.insert("PUSH"); + pest_keywords.insert("SOI"); + + let mut builtins = HashSet::new(); + builtins.insert("ANY"); + builtins.insert("DROP"); + builtins.insert("EOI"); + builtins.insert("PEEK"); + builtins.insert("PEEK_ALL"); + builtins.insert("POP"); + builtins.insert("POP_ALL"); + builtins.insert("SOI"); + builtins.insert("ASCII_DIGIT"); + builtins.insert("ASCII_NONZERO_DIGIT"); + builtins.insert("ASCII_BIN_DIGIT"); + builtins.insert("ASCII_OCT_DIGIT"); + builtins.insert("ASCII_HEX_DIGIT"); + builtins.insert("ASCII_ALPHA_LOWER"); + builtins.insert("ASCII_ALPHA_UPPER"); + builtins.insert("ASCII_ALPHA"); + builtins.insert("ASCII_ALPHANUMERIC"); + builtins.insert("ASCII"); + builtins.insert("NEWLINE"); + builtins.extend(UNICODE_PROPERTY_NAMES); + + let definitions: Vec<_> = pairs + .clone() + .filter(|pair| pair.as_rule() == Rule::grammar_rule) + .map(|pair| pair.into_inner().next().unwrap().as_span()) + .collect(); + let called_rules: Vec<_> = pairs + .clone() + .filter(|pair| pair.as_rule() == Rule::grammar_rule) + .flat_map(|pair| { + pair.into_inner() + .flatten() + .skip(1) + .filter(|pair| pair.as_rule() == Rule::identifier) + .map(|pair| pair.as_span()) + }) + .collect(); + + let mut errors = vec![]; + + errors.extend(validate_rust_keywords(&definitions, &rust_keywords)); + errors.extend(validate_pest_keywords(&definitions, &pest_keywords)); + errors.extend(validate_already_defined(&definitions)); + errors.extend(validate_undefined(&definitions, &called_rules, &builtins)); + + if !errors.is_empty() { + return Err(errors); + } + + let definitions: HashSet<_> = definitions.iter().map(|span| span.as_str()).collect(); + let called_rules: HashSet<_> = called_rules.iter().map(|span| span.as_str()).collect(); + + let defaults = called_rules.difference(&definitions); + + Ok(defaults.cloned().collect()) +} + +#[allow(clippy::implicit_hasher, clippy::ptr_arg)] +pub fn validate_rust_keywords<'i>( + definitions: &Vec>, + rust_keywords: &HashSet<&str>, +) -> Vec> { + let mut errors = vec![]; + + for definition in definitions { + let name = definition.as_str(); + + if rust_keywords.contains(name) { + errors.push(Error::new_from_span( + ErrorVariant::CustomError { + message: format!("{} is a rust keyword", name), + }, + definition.clone(), + )) + } + } + + errors +} + +#[allow(clippy::implicit_hasher, clippy::ptr_arg)] +pub fn validate_pest_keywords<'i>( + definitions: &Vec>, + pest_keywords: &HashSet<&str>, +) -> Vec> { + let mut errors = vec![]; + + for definition in definitions { + let name = definition.as_str(); + + if pest_keywords.contains(name) { + errors.push(Error::new_from_span( + ErrorVariant::CustomError { + message: format!("{} is a pest keyword", name), + }, + definition.clone(), + )) + } + } + + errors +} + +#[allow(clippy::ptr_arg)] +pub fn validate_already_defined<'i>(definitions: &Vec>) -> Vec> { + let mut errors = vec![]; + let mut defined = HashSet::new(); + + for definition in definitions { + let name = definition.as_str(); + + if defined.contains(&name) { + errors.push(Error::new_from_span( + ErrorVariant::CustomError { + message: format!("rule {} already defined", name), + }, + definition.clone(), + )) + } else { + defined.insert(name); + } + } + + errors +} + +#[allow(clippy::implicit_hasher, clippy::ptr_arg)] +pub fn validate_undefined<'i>( + definitions: &Vec>, + called_rules: &Vec>, + builtins: &HashSet<&str>, +) -> Vec> { + let mut errors = vec![]; + let definitions: HashSet<_> = definitions.iter().map(|span| span.as_str()).collect(); + + for rule in called_rules { + let name = rule.as_str(); + + if !definitions.contains(name) && !builtins.contains(name) { + errors.push(Error::new_from_span( + ErrorVariant::CustomError { + message: format!("rule {} is undefined", name), + }, + rule.clone(), + )) + } + } + + errors +} + +#[allow(clippy::ptr_arg)] +pub fn validate_ast<'a, 'i: 'a>(rules: &'a Vec>) -> Vec> { + let mut errors = vec![]; + + errors.extend(validate_repetition(rules)); + errors.extend(validate_choices(rules)); + errors.extend(validate_whitespace_comment(rules)); + errors.extend(validate_left_recursion(rules)); + + errors.sort_by_key(|error| match error.location { + InputLocation::Span(span) => span, + _ => unreachable!(), + }); + + errors +} + +fn is_non_progressing<'i>( + expr: &ParserExpr<'i>, + rules: &HashMap>, + trace: &mut Vec, +) -> bool { + match *expr { + ParserExpr::Str(ref string) => string == "", + ParserExpr::Ident(ref ident) => { + if ident == "soi" || ident == "eoi" { + return true; + } + + if !trace.contains(ident) { + if let Some(node) = rules.get(ident) { + trace.push(ident.clone()); + let result = is_non_progressing(&node.expr, rules, trace); + trace.pop().unwrap(); + + return result; + } + } + + false + } + ParserExpr::PosPred(_) => true, + ParserExpr::NegPred(_) => true, + ParserExpr::Seq(ref lhs, ref rhs) => { + is_non_progressing(&lhs.expr, rules, trace) + && is_non_progressing(&rhs.expr, rules, trace) + } + ParserExpr::Choice(ref lhs, ref rhs) => { + is_non_progressing(&lhs.expr, rules, trace) + || is_non_progressing(&rhs.expr, rules, trace) + } + _ => false, + } +} + +fn is_non_failing<'i>( + expr: &ParserExpr<'i>, + rules: &HashMap>, + trace: &mut Vec, +) -> bool { + match *expr { + ParserExpr::Str(ref string) => string == "", + ParserExpr::Ident(ref ident) => { + if !trace.contains(ident) { + if let Some(node) = rules.get(ident) { + trace.push(ident.clone()); + let result = is_non_failing(&node.expr, rules, trace); + trace.pop().unwrap(); + + return result; + } + } + + false + } + ParserExpr::Opt(_) => true, + ParserExpr::Rep(_) => true, + ParserExpr::Seq(ref lhs, ref rhs) => { + is_non_failing(&lhs.expr, rules, trace) && is_non_failing(&rhs.expr, rules, trace) + } + ParserExpr::Choice(ref lhs, ref rhs) => { + is_non_failing(&lhs.expr, rules, trace) || is_non_failing(&rhs.expr, rules, trace) + } + _ => false, + } +} + +fn validate_repetition<'a, 'i: 'a>(rules: &'a [ParserRule<'i>]) -> Vec> { + let mut result = vec![]; + let map = to_hash_map(rules); + + for rule in rules { + let mut errors = rule.node + .clone() + .filter_map_top_down(|node| match node.expr { + ParserExpr::Rep(ref other) + | ParserExpr::RepOnce(ref other) + | ParserExpr::RepMin(ref other, _) => { + if is_non_failing(&other.expr, &map, &mut vec![]) { + Some(Error::new_from_span( + ErrorVariant::CustomError { + message: + "expression inside repetition cannot fail and will repeat \ + infinitely" + .to_owned() + }, + node.span.clone() + )) + } else if is_non_progressing(&other.expr, &map, &mut vec![]) { + Some(Error::new_from_span( + ErrorVariant::CustomError { + message: + "expression inside repetition is non-progressing and will repeat \ + infinitely" + .to_owned(), + }, + node.span.clone() + )) + } else { + None + } + } + _ => None + }); + + result.append(&mut errors); + } + + result +} + +fn validate_choices<'a, 'i: 'a>(rules: &'a [ParserRule<'i>]) -> Vec> { + let mut result = vec![]; + let map = to_hash_map(rules); + + for rule in rules { + let mut errors = rule + .node + .clone() + .filter_map_top_down(|node| match node.expr { + ParserExpr::Choice(ref lhs, _) => { + let node = match lhs.expr { + ParserExpr::Choice(_, ref rhs) => rhs, + _ => lhs, + }; + + if is_non_failing(&node.expr, &map, &mut vec![]) { + Some(Error::new_from_span( + ErrorVariant::CustomError { + message: + "expression cannot fail; following choices cannot be reached" + .to_owned(), + }, + node.span.clone(), + )) + } else { + None + } + } + _ => None, + }); + + result.append(&mut errors); + } + + result +} + +fn validate_whitespace_comment<'a, 'i: 'a>(rules: &'a [ParserRule<'i>]) -> Vec> { + let map = to_hash_map(rules); + + rules + .iter() + .filter_map(|rule| { + if rule.name == "WHITESPACE" || rule.name == "COMMENT" { + if is_non_failing(&rule.node.expr, &map, &mut vec![]) { + Some(Error::new_from_span( + ErrorVariant::CustomError { + message: format!( + "{} cannot fail and will repeat infinitely", + &rule.name + ), + }, + rule.node.span.clone(), + )) + } else if is_non_progressing(&rule.node.expr, &map, &mut vec![]) { + Some(Error::new_from_span( + ErrorVariant::CustomError { + message: format!( + "{} is non-progressing and will repeat infinitely", + &rule.name + ), + }, + rule.node.span.clone(), + )) + } else { + None + } + } else { + None + } + }) + .collect() +} + +fn validate_left_recursion<'a, 'i: 'a>(rules: &'a [ParserRule<'i>]) -> Vec> { + left_recursion(to_hash_map(rules)) +} + +fn to_hash_map<'a, 'i: 'a>(rules: &'a [ParserRule<'i>]) -> HashMap> { + rules.iter().map(|r| (r.name.clone(), &r.node)).collect() +} + +#[allow(clippy::needless_pass_by_value)] +fn left_recursion<'a, 'i: 'a>(rules: HashMap>) -> Vec> { + fn check_expr<'a, 'i: 'a>( + node: &'a ParserNode<'i>, + rules: &'a HashMap>, + trace: &mut Vec, + ) -> Option> { + match node.expr.clone() { + ParserExpr::Ident(other) => { + if trace[0] == other { + trace.push(other); + let chain = trace + .iter() + .map(|ident| ident.as_ref()) + .collect::>() + .join(" -> "); + + return Some(Error::new_from_span( + ErrorVariant::CustomError { + message: format!( + "rule {} is left-recursive ({}); pest::prec_climber might be useful \ + in this case", + node.span.as_str(), + chain + ) + }, + node.span.clone() + )); + } + + if !trace.contains(&other) { + if let Some(node) = rules.get(&other) { + trace.push(other); + let result = check_expr(node, rules, trace); + trace.pop().unwrap(); + + return result; + } + } + + None + } + ParserExpr::Seq(ref lhs, ref rhs) => { + if is_non_failing(&lhs.expr, rules, &mut vec![trace.last().unwrap().clone()]) { + check_expr(rhs, rules, trace) + } else { + check_expr(lhs, rules, trace) + } + } + ParserExpr::Choice(ref lhs, ref rhs) => { + check_expr(&lhs, rules, trace).or_else(|| check_expr(&rhs, rules, trace)) + } + ParserExpr::Rep(ref node) => check_expr(&node, rules, trace), + ParserExpr::RepOnce(ref node) => check_expr(&node, rules, trace), + ParserExpr::Opt(ref node) => check_expr(&node, rules, trace), + ParserExpr::PosPred(ref node) => check_expr(&node, rules, trace), + ParserExpr::NegPred(ref node) => check_expr(&node, rules, trace), + ParserExpr::Push(ref node) => check_expr(&node, rules, trace), + _ => None, + } + } + + let mut errors = vec![]; + + for (ref name, ref node) in &rules { + let name = (*name).clone(); + + if let Some(error) = check_expr(node, &rules, &mut vec![name]) { + errors.push(error); + } + } + + errors +} + +#[cfg(test)] +mod tests { + use super::super::parser::{consume_rules, PestParser}; + use super::super::unwrap_or_report; + use super::*; + use pest::Parser; + + #[test] + #[should_panic(expected = "grammar error + + --> 1:1 + | +1 | let = { \"a\" } + | ^-^ + | + = let is a rust keyword")] + fn rust_keyword() { + let input = "let = { \"a\" }"; + unwrap_or_report(validate_pairs( + PestParser::parse(Rule::grammar_rules, input).unwrap(), + )); + } + + #[test] + #[should_panic(expected = "grammar error + + --> 1:1 + | +1 | ANY = { \"a\" } + | ^-^ + | + = ANY is a pest keyword")] + fn pest_keyword() { + let input = "ANY = { \"a\" }"; + unwrap_or_report(validate_pairs( + PestParser::parse(Rule::grammar_rules, input).unwrap(), + )); + } + + #[test] + #[should_panic(expected = "grammar error + + --> 1:13 + | +1 | a = { \"a\" } a = { \"a\" } + | ^ + | + = rule a already defined")] + fn already_defined() { + let input = "a = { \"a\" } a = { \"a\" }"; + unwrap_or_report(validate_pairs( + PestParser::parse(Rule::grammar_rules, input).unwrap(), + )); + } + + #[test] + #[should_panic(expected = "grammar error + + --> 1:7 + | +1 | a = { b } + | ^ + | + = rule b is undefined")] + fn undefined() { + let input = "a = { b }"; + unwrap_or_report(validate_pairs( + PestParser::parse(Rule::grammar_rules, input).unwrap(), + )); + } + + #[test] + fn valid_recursion() { + let input = "a = { \"\" ~ \"a\"? ~ \"a\"* ~ (\"a\" | \"b\") ~ a }"; + unwrap_or_report(consume_rules( + PestParser::parse(Rule::grammar_rules, input).unwrap(), + )); + } + + #[test] + #[should_panic(expected = "grammar error + + --> 1:16 + | +1 | WHITESPACE = { \"\" } + | ^^ + | + = WHITESPACE cannot fail and will repeat infinitely")] + fn non_failing_whitespace() { + let input = "WHITESPACE = { \"\" }"; + unwrap_or_report(consume_rules( + PestParser::parse(Rule::grammar_rules, input).unwrap(), + )); + } + + #[test] + #[should_panic(expected = "grammar error + + --> 1:13 + | +1 | COMMENT = { soi } + | ^-^ + | + = COMMENT is non-progressing and will repeat infinitely")] + fn non_progressing_comment() { + let input = "COMMENT = { soi }"; + unwrap_or_report(consume_rules( + PestParser::parse(Rule::grammar_rules, input).unwrap(), + )); + } + + #[test] + #[should_panic(expected = "grammar error + + --> 1:7 + | +1 | a = { (\"\")* } + | ^---^ + | + = expression inside repetition cannot fail and will repeat infinitely")] + fn non_failing_repetition() { + let input = "a = { (\"\")* }"; + unwrap_or_report(consume_rules( + PestParser::parse(Rule::grammar_rules, input).unwrap(), + )); + } + + #[test] + #[should_panic(expected = "grammar error + + --> 1:18 + | +1 | a = { \"\" } b = { a* } + | ^^ + | + = expression inside repetition cannot fail and will repeat infinitely")] + fn indirect_non_failing_repetition() { + let input = "a = { \"\" } b = { a* }"; + unwrap_or_report(consume_rules( + PestParser::parse(Rule::grammar_rules, input).unwrap(), + )); + } + + #[test] + #[should_panic(expected = "grammar error + + --> 1:20 + | +1 | a = { \"a\" ~ (\"b\" ~ (\"\")*) } + | ^---^ + | + = expression inside repetition cannot fail and will repeat infinitely")] + fn deep_non_failing_repetition() { + let input = "a = { \"a\" ~ (\"b\" ~ (\"\")*) }"; + unwrap_or_report(consume_rules( + PestParser::parse(Rule::grammar_rules, input).unwrap(), + )); + } + + #[test] + #[should_panic(expected = "grammar error + + --> 1:7 + | +1 | a = { (\"\" ~ &\"a\" ~ !\"a\" ~ (soi | eoi))* } + | ^-------------------------------^ + | + = expression inside repetition is non-progressing and will repeat infinitely")] + fn non_progressing_repetition() { + let input = "a = { (\"\" ~ &\"a\" ~ !\"a\" ~ (soi | eoi))* }"; + unwrap_or_report(consume_rules( + PestParser::parse(Rule::grammar_rules, input).unwrap(), + )); + } + + #[test] + #[should_panic(expected = "grammar error + + --> 1:20 + | +1 | a = { !\"a\" } b = { a* } + | ^^ + | + = expression inside repetition is non-progressing and will repeat infinitely")] + fn indirect_non_progressing_repetition() { + let input = "a = { !\"a\" } b = { a* }"; + unwrap_or_report(consume_rules( + PestParser::parse(Rule::grammar_rules, input).unwrap(), + )); + } + + #[test] + #[should_panic(expected = "grammar error + + --> 1:7 + | +1 | a = { a } + | ^ + | + = rule a is left-recursive (a -> a); pest::prec_climber might be useful in this case")] + fn simple_left_recursion() { + let input = "a = { a }"; + unwrap_or_report(consume_rules( + PestParser::parse(Rule::grammar_rules, input).unwrap(), + )); + } + + #[test] + #[should_panic(expected = "grammar error + + --> 1:7 + | +1 | a = { b } b = { a } + | ^ + | + = rule b is left-recursive (b -> a -> b); pest::prec_climber might be useful in this case + + --> 1:17 + | +1 | a = { b } b = { a } + | ^ + | + = rule a is left-recursive (a -> b -> a); pest::prec_climber might be useful in this case")] + fn indirect_left_recursion() { + let input = "a = { b } b = { a }"; + unwrap_or_report(consume_rules( + PestParser::parse(Rule::grammar_rules, input).unwrap(), + )); + } + + #[test] + #[should_panic(expected = "grammar error + + --> 1:39 + | +1 | a = { \"\" ~ \"a\"? ~ \"a\"* ~ (\"a\" | \"\") ~ a } + | ^ + | + = rule a is left-recursive (a -> a); pest::prec_climber might be useful in this case")] + fn non_failing_left_recursion() { + let input = "a = { \"\" ~ \"a\"? ~ \"a\"* ~ (\"a\" | \"\") ~ a }"; + unwrap_or_report(consume_rules( + PestParser::parse(Rule::grammar_rules, input).unwrap(), + )); + } + + #[test] + #[should_panic(expected = "grammar error + + --> 1:13 + | +1 | a = { \"a\" | a } + | ^ + | + = rule a is left-recursive (a -> a); pest::prec_climber might be useful in this case")] + fn non_primary_choice_left_recursion() { + let input = "a = { \"a\" | a }"; + unwrap_or_report(consume_rules( + PestParser::parse(Rule::grammar_rules, input).unwrap(), + )); + } + + #[test] + #[should_panic(expected = "grammar error + + --> 1:7 + | +1 | a = { \"a\"* | \"a\" | \"b\" } + | ^--^ + | + = expression cannot fail; following choices cannot be reached")] + fn lhs_non_failing_choice() { + let input = "a = { \"a\"* | \"a\" | \"b\" }"; + unwrap_or_report(consume_rules( + PestParser::parse(Rule::grammar_rules, input).unwrap(), + )); + } + + #[test] + #[should_panic(expected = "grammar error + + --> 1:13 + | +1 | a = { \"a\" | \"a\"* | \"b\" } + | ^--^ + | + = expression cannot fail; following choices cannot be reached")] + fn lhs_non_failing_choice_middle() { + let input = "a = { \"a\" | \"a\"* | \"b\" }"; + unwrap_or_report(consume_rules( + PestParser::parse(Rule::grammar_rules, input).unwrap(), + )); + } + + #[test] + #[should_panic(expected = "grammar error + + --> 1:7 + | +1 | a = { b | \"a\" } b = { \"b\"* | \"c\" } + | ^ + | + = expression cannot fail; following choices cannot be reached + + --> 1:23 + | +1 | a = { b | \"a\" } b = { \"b\"* | \"c\" } + | ^--^ + | + = expression cannot fail; following choices cannot be reached")] + fn lhs_non_failing_nested_choices() { + let input = "a = { b | \"a\" } b = { \"b\"* | \"c\" }"; + unwrap_or_report(consume_rules( + PestParser::parse(Rule::grammar_rules, input).unwrap(), + )); + } + + #[test] + fn skip_can_be_defined() { + let input = "skip = { \"\" }"; + unwrap_or_report(consume_rules( + PestParser::parse(Rule::grammar_rules, input).unwrap(), + )); + } +} -- cgit v1.2.3