From 698f8c2f01ea549d77d7dc3338a12e04c11057b9 Mon Sep 17 00:00:00 2001 From: Daniel Baumann Date: Wed, 17 Apr 2024 14:02:58 +0200 Subject: Adding upstream version 1.64.0+dfsg1. Signed-off-by: Daniel Baumann --- vendor/pest_derive/src/lib.rs | 302 ++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 302 insertions(+) create mode 100644 vendor/pest_derive/src/lib.rs (limited to 'vendor/pest_derive/src/lib.rs') diff --git a/vendor/pest_derive/src/lib.rs b/vendor/pest_derive/src/lib.rs new file mode 100644 index 000000000..f60d5d943 --- /dev/null +++ b/vendor/pest_derive/src/lib.rs @@ -0,0 +1,302 @@ +// pest. The Elegant Parser +// Copyright (c) 2018 Dragoș Tiselice +// +// Licensed under the Apache License, Version 2.0 +// or the MIT +// license , at your +// option. All files in the project carrying such notice may not be copied, +// modified, or distributed except according to those terms. + +//! # pest. The Elegant Parser +//! +//! pest is a general purpose parser written in Rust with a focus on accessibility, correctness, +//! and performance. It uses parsing expression grammars (or [PEG]) as input, which are similar in +//! spirit to regular expressions, but which offer the enhanced expressivity needed to parse +//! complex languages. +//! +//! [PEG]: https://en.wikipedia.org/wiki/Parsing_expression_grammar +//! +//! ## Getting started +//! +//! The recommended way to start parsing with pest is to read the official [book]. +//! +//! Other helpful resources: +//! +//! * API reference on [docs.rs] +//! * play with grammars and share them on our [fiddle] +//! * leave feedback, ask questions, or greet us on [Gitter] +//! +//! [book]: https://pest-parser.github.io/book +//! [docs.rs]: https://docs.rs/pest +//! [fiddle]: https://pest-parser.github.io/#editor +//! [Gitter]: https://gitter.im/dragostis/pest +//! +//! ## `.pest` files +//! +//! Grammar definitions reside in custom `.pest` files located in the `src` directory. Their path is +//! relative to `src` and is specified between the `derive` attribute and empty `struct` that +//! `Parser` will be derived on. +//! +//! ```ignore +//! #[derive(Parser)] +//! #[grammar = "path/to/my_grammar.pest"] // relative to src +//! struct MyParser; +//! ``` +//! +//! ## Inline grammars +//! +//! Grammars can also be inlined by using the `#[grammar_inline = "..."]` attribute. +//! +//! ## Grammar +//! +//! A grammar is a series of rules separated by whitespace, possibly containing comments. +//! +//! ### Comments +//! +//! Comments start with `//` and end at the end of the line. +//! +//! ```ignore +//! // a comment +//! ``` +//! +//! ### Rules +//! +//! Rules have the following form: +//! +//! ```ignore +//! name = optional_modifier { expression } +//! ``` +//! +//! The name of the rule is formed from alphanumeric characters or `_` with the condition that the +//! first character is not a digit and is used to create token pairs. When the rule starts being +//! parsed, the starting part of the token is being produced, with the ending part being produced +//! when the rule finishes parsing. +//! +//! The following token pair notation `a(b(), c())` denotes the tokens: start `a`, start `b`, end +//! `b`, start `c`, end `c`, end `a`. +//! +//! #### Modifiers +//! +//! Modifiers are optional and can be one of `_`, `@`, `$`, or `!`. These modifiers change the +//! behavior of the rules. +//! +//! 1. Silent (`_`) +//! +//! Silent rules do not create token pairs during parsing, nor are they error-reported. +//! +//! ```ignore +//! a = _{ "a" } +//! b = { a ~ "b" } +//! ``` +//! +//! Parsing `"ab"` produces the token pair `b()`. +//! +//! 2. Atomic (`@`) +//! +//! Atomic rules do not accept whitespace or comments within their expressions and have a +//! cascading effect on any rule they call. I.e. rules that are not atomic but are called by atomic +//! rules behave atomically. +//! +//! Any rules called by atomic rules do not generate token pairs. +//! +//! ```ignore +//! a = { "a" } +//! b = @{ a ~ "b" } +//! +//! WHITESPACE = _{ " " } +//! ``` +//! +//! Parsing `"ab"` produces the token pair `b()`, while `"a b"` produces an error. +//! +//! 3. Compound-atomic (`$`) +//! +//! Compound-atomic are identical to atomic rules with the exception that rules called by them are +//! not forbidden from generating token pairs. +//! +//! ```ignore +//! a = { "a" } +//! b = ${ a ~ "b" } +//! +//! WHITESPACE = _{ " " } +//! ``` +//! +//! Parsing `"ab"` produces the token pairs `b(a())`, while `"a b"` produces an error. +//! +//! 4. Non-atomic (`!`) +//! +//! Non-atomic are identical to normal rules with the exception that they stop the cascading effect +//! of atomic and compound-atomic rules. +//! +//! ```ignore +//! a = { "a" } +//! b = !{ a ~ "b" } +//! c = @{ b } +//! +//! WHITESPACE = _{ " " } +//! ``` +//! +//! Parsing both `"ab"` and `"a b"` produce the token pairs `c(a())`. +//! +//! #### Expressions +//! +//! Expressions can be either terminals or non-terminals. +//! +//! 1. Terminals +//! +//! | Terminal | Usage | +//! |------------|----------------------------------------------------------------| +//! | `"a"` | matches the exact string `"a"` | +//! | `^"a"` | matches the exact string `"a"` case insensitively (ASCII only) | +//! | `'a'..'z'` | matches one character between `'a'` and `'z'` | +//! | `a` | matches rule `a` | +//! +//! Strings and characters follow +//! [Rust's escape mechanisms](https://doc.rust-lang.org/reference/tokens.html#byte-escapes), while +//! identifiers can contain alpha-numeric characters and underscores (`_`), as long as they do not +//! start with a digit. +//! +//! 2. Non-terminals +//! +//! | Non-terminal | Usage | +//! |-----------------------|------------------------------------------------------------| +//! | `(e)` | matches `e` | +//! | `e1 ~ e2` | matches the sequence `e1` `e2` | +//! | e1 \| e2 | matches either `e1` or `e2` | +//! | `e*` | matches `e` zero or more times | +//! | `e+` | matches `e` one or more times | +//! | `e{n}` | matches `e` exactly `n` times | +//! | `e{, n}` | matches `e` at most `n` times | +//! | `e{n,} ` | matches `e` at least `n` times | +//! | `e{m, n}` | matches `e` between `m` and `n` times inclusively | +//! | `e?` | optionally matches `e` | +//! | `&e` | matches `e` without making progress | +//! | `!e` | matches if `e` doesn't match without making progress | +//! | `PUSH(e)` | matches `e` and pushes it's captured string down the stack | +//! +//! where `e`, `e1`, and `e2` are expressions. +//! +//! Expressions can modify the stack only if they match the input. For example, +//! if `e1` in the compound expression `e1 | e2` does not match the input, then +//! it does not modify the stack, so `e2` sees the stack in the same state as +//! `e1` did. Repetitions and optionals (`e*`, `e+`, `e{, n}`, `e{n,}`, +//! `e{m,n}`, `e?`) can modify the stack each time `e` matches. The `!e` and `&e` +//! expressions are a special case; they never modify the stack. +//! +//! ## Special rules +//! +//! Special rules can be called within the grammar. They are: +//! +//! * `WHITESPACE` - runs between rules and sub-rules +//! * `COMMENT` - runs between rules and sub-rules +//! * `ANY` - matches exactly one `char` +//! * `SOI` - (start-of-input) matches only when a `Parser` is still at the starting position +//! * `EOI` - (end-of-input) matches only when a `Parser` has reached its end +//! * `POP` - pops a string from the stack and matches it +//! * `POP_ALL` - pops the entire state of the stack and matches it +//! * `PEEK` - peeks a string from the stack and matches it +//! * `PEEK[a..b]` - peeks part of the stack and matches it +//! * `PEEK_ALL` - peeks the entire state of the stack and matches it +//! * `DROP` - drops the top of the stack (fails to match if the stack is empty) +//! +//! `WHITESPACE` and `COMMENT` should be defined manually if needed. All other rules cannot be +//! overridden. +//! +//! ## `WHITESPACE` and `COMMENT` +//! +//! When defined, these rules get matched automatically in sequences (`~`) and repetitions +//! (`*`, `+`) between expressions. Atomic rules and those rules called by atomic rules are exempt +//! from this behavior. +//! +//! These rules should be defined so as to match one whitespace character and one comment only since +//! they are run in repetitions. +//! +//! If both `WHITESPACE` and `COMMENT` are defined, this grammar: +//! +//! ```ignore +//! a = { b ~ c } +//! ``` +//! +//! is effectively transformed into this one behind the scenes: +//! +//! ```ignore +//! a = { b ~ WHITESPACE* ~ (COMMENT ~ WHITESPACE*)* ~ c } +//! ``` +//! +//! ## `PUSH`, `POP`, `DROP`, and `PEEK` +//! +//! `PUSH(e)` simply pushes the captured string of the expression `e` down a stack. This stack can +//! then later be used to match grammar based on its content with `POP` and `PEEK`. +//! +//! `PEEK` always matches the string at the top of stack. So, if the stack contains `["b", "a"]` +//! (`"a"` being on top), this grammar: +//! +//! ```ignore +//! a = { PEEK } +//! ``` +//! +//! is effectively transformed into at parse time: +//! +//! ```ignore +//! a = { "a" } +//! ``` +//! +//! `POP` works the same way with the exception that it pops the string off of the stack if the +//! match worked. With the stack from above, if `POP` matches `"a"`, the stack will be mutated +//! to `["b"]`. +//! +//! `DROP` makes it possible to remove the string at the top of the stack +//! without matching it. If the stack is nonempty, `DROP` drops the top of the +//! stack. If the stack is empty, then `DROP` fails to match. +//! +//! ### Advanced peeking +//! +//! `PEEK[start..end]` and `PEEK_ALL` allow to peek deeper into the stack. The syntax works exactly +//! like Rust’s exclusive slice syntax. Additionally, negative indices can be used to indicate an +//! offset from the top. If the end lies before or at the start, the expression matches (as does +//! a `PEEK_ALL` on an empty stack). With the stack `["c", "b", "a"]` (`"a"` on top): +//! +//! ```ignore +//! fill = PUSH("c") ~ PUSH("b") ~ PUSH("a") +//! v = { PEEK_ALL } = { "a" ~ "b" ~ "c" } // top to bottom +//! w = { PEEK[..] } = { "c" ~ "b" ~ "a" } // bottom to top +//! x = { PEEK[1..2] } = { PEEK[1..-1] } = { "b" } +//! y = { PEEK[..-2] } = { PEEK[0..1] } = { "a" } +//! z = { PEEK[1..] } = { PEEK[-2..3] } = { "c" ~ "b" } +//! n = { PEEK[2..-2] } = { PEEK[2..1] } = { "" } +//! ``` +//! +//! For historical reasons, `PEEK_ALL` matches from top to bottom, while `PEEK[start..end]` matches +//! from bottom to top. There is currectly no syntax to match a slice of the stack top to bottom. +//! +//! ## `Rule` +//! +//! All rules defined or used in the grammar populate a generated `enum` called `Rule`. This +//! implements `pest`'s `RuleType` and can be used throughout the API. +//! +//! ## `Built-in rules` +//! +//! Pest also comes with a number of built-in rules for convenience. They are: +//! +//! * `ASCII_DIGIT` - matches a numeric character from 0..9 +//! * `ASCII_NONZERO_DIGIT` - matches a numeric character from 1..9 +//! * `ASCII_BIN_DIGIT` - matches a numeric character from 0..1 +//! * `ASCII_OCT_DIGIT` - matches a numeric character from 0..7 +//! * `ASCII_HEX_DIGIT` - matches a numeric character from 0..9 or a..f or A..F +//! * `ASCII_ALPHA_LOWER` - matches a character from a..z +//! * `ASCII_ALPHA_UPPER` - matches a character from A..Z +//! * `ASCII_ALPHA` - matches a character from a..z or A..Z +//! * `ASCII_ALPHANUMERIC` - matches a character from a..z or A..Z or 0..9 +//! * `ASCII` - matches a character from \x00..\x7f +//! * `NEWLINE` - matches either "\n" or "\r\n" or "\r" + +#![doc(html_root_url = "https://docs.rs/pest_derive")] + +extern crate pest_generator; +extern crate proc_macro; + +use proc_macro::TokenStream; + +#[proc_macro_derive(Parser, attributes(grammar, grammar_inline))] +pub fn derive_parser(input: TokenStream) -> TokenStream { + pest_generator::derive_parser(input.into(), true).into() +} -- cgit v1.2.3