summaryrefslogtreecommitdiffstats
path: root/vendor/pest/src/lib.rs
diff options
context:
space:
mode:
Diffstat (limited to 'vendor/pest/src/lib.rs')
-rw-r--r--vendor/pest/src/lib.rs288
1 files changed, 277 insertions, 11 deletions
diff --git a/vendor/pest/src/lib.rs b/vendor/pest/src/lib.rs
index 30f87db60..5cf2f9e26 100644
--- a/vendor/pest/src/lib.rs
+++ b/vendor/pest/src/lib.rs
@@ -7,7 +7,11 @@
// option. All files in the project carrying such notice may not be copied,
// modified, or distributed except according to those terms.
#![no_std]
-
+#![doc(
+ html_logo_url = "https://raw.githubusercontent.com/pest-parser/pest/master/pest-logo.svg",
+ html_favicon_url = "https://raw.githubusercontent.com/pest-parser/pest/master/pest-logo.svg"
+)]
+#![warn(missing_docs, rust_2018_idioms, unused_qualifications)]
//! # pest. The Elegant Parser
//!
//! pest is a general purpose parser written in Rust with a focus on accessibility, correctness,
@@ -25,12 +29,15 @@
//!
//! * API reference on [docs.rs]
//! * play with grammars and share them on our [fiddle]
-//! * leave feedback, ask questions, or greet us on [Gitter]
+//! * find previous common questions answered or ask questions on [GitHub Discussions]
+//! * leave feedback, ask questions, or greet us on [Gitter] or [Discord]
//!
-//! [book]: https://pest-parser.github.io/book
+//! [book]: https://pest.rs/book
//! [docs.rs]: https://docs.rs/pest
-//! [fiddle]: https://pest-parser.github.io/#editor
-//! [Gitter]: https://gitter.im/dragostis/pest
+//! [fiddle]: https://pest.rs/#editor
+//! [Gitter]: https://gitter.im/pest-parser/pest
+//! [Discord]: https://discord.gg/XEGACtWpT2
+//! [GitHub Discussions]: https://github.com/pest-parser/pest/discussions
//!
//! ## Usage
//!
@@ -61,18 +68,271 @@
//!
//! [`Parser`]: trait.Parser.html
//! [`pest_derive` crate]: https://docs.rs/pest_derive/
+//!
+//! ## Grammar
+//!
+//! A grammar is a series of rules separated by whitespace, possibly containing comments.
+//!
+//! ### Comments
+//!
+//! Comments start with `//` and end at the end of the line.
+//!
+//! ```text
+//! // a comment
+//! ```
+//!
+//! ### Rules
+//!
+//! Rules have the following form:
+//!
+//! ```ignore
+//! name = optional_modifier { expression }
+//! ```
+//!
+//! The name of the rule is formed from alphanumeric characters or `_` with the condition that the
+//! first character is not a digit and is used to create token pairs. When the rule starts being
+//! parsed, the starting part of the token is being produced, with the ending part being produced
+//! when the rule finishes parsing.
+//!
+//! The following token pair notation `a(b(), c())` denotes the tokens: start `a`, start `b`, end
+//! `b`, start `c`, end `c`, end `a`.
+//!
+//! #### Modifiers
+//!
+//! Modifiers are optional and can be one of `_`, `@`, `$`, or `!`. These modifiers change the
+//! behavior of the rules.
+//!
+//! 1. Silent (`_`)
+//!
+//! Silent rules do not create token pairs during parsing, nor are they error-reported.
+//!
+//! ```ignore
+//! a = _{ "a" }
+//! b = { a ~ "b" }
+//! ```
+//!
+//! Parsing `"ab"` produces the token pair `b()`.
+//!
+//! 2. Atomic (`@`)
+//!
+//! Atomic rules do not accept whitespace or comments within their expressions and have a
+//! cascading effect on any rule they call. I.e. rules that are not atomic but are called by atomic
+//! rules behave atomically.
+//!
+//! Any rules called by atomic rules do not generate token pairs.
+//!
+//! ```ignore
+//! a = { "a" }
+//! b = @{ a ~ "b" }
+//!
+//! WHITESPACE = _{ " " }
+//! ```
+//!
+//! Parsing `"ab"` produces the token pair `b()`, while `"a b"` produces an error.
+//!
+//! 3. Compound-atomic (`$`)
+//!
+//! Compound-atomic are identical to atomic rules with the exception that rules called by them are
+//! not forbidden from generating token pairs.
+//!
+//! ```ignore
+//! a = { "a" }
+//! b = ${ a ~ "b" }
+//!
+//! WHITESPACE = _{ " " }
+//! ```
+//!
+//! Parsing `"ab"` produces the token pairs `b(a())`, while `"a b"` produces an error.
+//!
+//! 4. Non-atomic (`!`)
+//!
+//! Non-atomic are identical to normal rules with the exception that they stop the cascading effect
+//! of atomic and compound-atomic rules.
+//!
+//! ```ignore
+//! a = { "a" }
+//! b = !{ a ~ "b" }
+//! c = @{ b }
+//!
+//! WHITESPACE = _{ " " }
+//! ```
+//!
+//! Parsing both `"ab"` and `"a b"` produce the token pairs `c(a())`.
+//!
+//! #### Expressions
+//!
+//! Expressions can be either terminals or non-terminals.
+//!
+//! 1. Terminals
+//!
+//! | Terminal | Usage |
+//! |------------|----------------------------------------------------------------|
+//! | `"a"` | matches the exact string `"a"` |
+//! | `^"a"` | matches the exact string `"a"` case insensitively (ASCII only) |
+//! | `'a'..'z'` | matches one character between `'a'` and `'z'` |
+//! | `a` | matches rule `a` |
+//!
+//! Strings and characters follow
+//! [Rust's escape mechanisms](https://doc.rust-lang.org/reference/tokens.html#byte-escapes), while
+//! identifiers can contain alphanumeric characters and underscores (`_`), as long as they do not
+//! start with a digit.
+//!
+//! 2. Non-terminals
+//!
+//! | Non-terminal | Usage |
+//! |-----------------------|------------------------------------------------------------|
+//! | `(e)` | matches `e` |
+//! | `e1 ~ e2` | matches the sequence `e1` `e2` |
+//! | <code>e1 \| e2</code> | matches either `e1` or `e2` |
+//! | `e*` | matches `e` zero or more times |
+//! | `e+` | matches `e` one or more times |
+//! | `e{n}` | matches `e` exactly `n` times |
+//! | `e{, n}` | matches `e` at most `n` times |
+//! | `e{n,}` | matches `e` at least `n` times |
+//! | `e{m, n}` | matches `e` between `m` and `n` times inclusively |
+//! | `e?` | optionally matches `e` |
+//! | `&e` | matches `e` without making progress |
+//! | `!e` | matches if `e` doesn't match without making progress |
+//! | `PUSH(e)` | matches `e` and pushes it's captured string down the stack |
+//!
+//! where `e`, `e1`, and `e2` are expressions.
+//!
+//! Matching is greedy, without backtracking. Note the difference in behavior for
+//! these two rules in matching identifiers that don't end in an underscore:
+//!
+//! ```ignore
+//! // input: ab_bb_b
+//!
+//! identifier = @{ "a" ~ ("b"|"_")* ~ "b" }
+//! // matches: a b_bb_b nothing -> error!
+//!
+//! identifier = @{ "a" ~ ("_"* ~ "b")* }
+//! // matches: a b, _bb, _b in three repetitions
+//! ```
+//!
+//! Expressions can modify the stack only if they match the input. For example,
+//! if `e1` in the compound expression `e1 | e2` does not match the input, then
+//! it does not modify the stack, so `e2` sees the stack in the same state as
+//! `e1` did. Repetitions and optionals (`e*`, `e+`, `e{, n}`, `e{n,}`,
+//! `e{m,n}`, `e?`) can modify the stack each time `e` matches. The `!e` and `&e`
+//! expressions are a special case; they never modify the stack.
+//! Many languages have "keyword" tokens (e.g. if, for, while) as well as general
+//! tokens (e.g. identifier) that matches any word. In order to match a keyword,
+//! generally, you may need to restrict that is not immediately followed by another
+//! letter or digit (otherwise it would be matched as an identifier).
+//!
+//! ## Special rules
+//!
+//! Special rules can be called within the grammar. They are:
+//!
+//! * `WHITESPACE` - runs between rules and sub-rules
+//! * `COMMENT` - runs between rules and sub-rules
+//! * `ANY` - matches exactly one `char`
+//! * `SOI` - (start-of-input) matches only when a `Parser` is still at the starting position
+//! * `EOI` - (end-of-input) matches only when a `Parser` has reached its end
+//! * `POP` - pops a string from the stack and matches it
+//! * `POP_ALL` - pops the entire state of the stack and matches it
+//! * `PEEK` - peeks a string from the stack and matches it
+//! * `PEEK[a..b]` - peeks part of the stack and matches it
+//! * `PEEK_ALL` - peeks the entire state of the stack and matches it
+//! * `DROP` - drops the top of the stack (fails to match if the stack is empty)
+//!
+//! `WHITESPACE` and `COMMENT` should be defined manually if needed. All other rules cannot be
+//! overridden.
+//!
+//! ## `WHITESPACE` and `COMMENT`
+//!
+//! When defined, these rules get matched automatically in sequences (`~`) and repetitions
+//! (`*`, `+`) between expressions. Atomic rules and those rules called by atomic rules are exempt
+//! from this behavior.
+//!
+//! These rules should be defined so as to match one whitespace character and one comment only since
+//! they are run in repetitions.
+//!
+//! If both `WHITESPACE` and `COMMENT` are defined, this grammar:
+//!
+//! ```ignore
+//! a = { b ~ c }
+//! ```
+//!
+//! is effectively transformed into this one behind the scenes:
+//!
+//! ```ignore
+//! a = { b ~ WHITESPACE* ~ (COMMENT ~ WHITESPACE*)* ~ c }
+//! ```
+//!
+//! ## `PUSH`, `POP`, `DROP`, and `PEEK`
+//!
+//! `PUSH(e)` simply pushes the captured string of the expression `e` down a stack. This stack can
+//! then later be used to match grammar based on its content with `POP` and `PEEK`.
+//!
+//! `PEEK` always matches the string at the top of stack. So, if the stack contains `["b", "a"]`
+//! (`"a"` being on top), this grammar:
+//!
+//! ```ignore
+//! a = { PEEK }
+//! ```
+//!
+//! is effectively transformed into at parse time:
+//!
+//! ```ignore
+//! a = { "a" }
+//! ```
+//!
+//! `POP` works the same way with the exception that it pops the string off of the stack if the
+//! match worked. With the stack from above, if `POP` matches `"a"`, the stack will be mutated
+//! to `["b"]`.
+//!
+//! `DROP` makes it possible to remove the string at the top of the stack
+//! without matching it. If the stack is nonempty, `DROP` drops the top of the
+//! stack. If the stack is empty, then `DROP` fails to match.
+//!
+//! ### Advanced peeking
+//!
+//! `PEEK[start..end]` and `PEEK_ALL` allow to peek deeper into the stack. The syntax works exactly
+//! like Rust’s exclusive slice syntax. Additionally, negative indices can be used to indicate an
+//! offset from the top. If the end lies before or at the start, the expression matches (as does
+//! a `PEEK_ALL` on an empty stack). With the stack `["c", "b", "a"]` (`"a"` on top):
+//!
+//! ```ignore
+//! fill = PUSH("c") ~ PUSH("b") ~ PUSH("a")
+//! v = { PEEK_ALL } = { "a" ~ "b" ~ "c" } // top to bottom
+//! w = { PEEK[..] } = { "c" ~ "b" ~ "a" } // bottom to top
+//! x = { PEEK[1..2] } = { PEEK[1..-1] } = { "b" }
+//! y = { PEEK[..-2] } = { PEEK[0..1] } = { "a" }
+//! z = { PEEK[1..] } = { PEEK[-2..3] } = { "c" ~ "b" }
+//! n = { PEEK[2..-2] } = { PEEK[2..1] } = { "" }
+//! ```
+//!
+//! For historical reasons, `PEEK_ALL` matches from top to bottom, while `PEEK[start..end]` matches
+//! from bottom to top. There is currently no syntax to match a slice of the stack top to bottom.
+//!
+//! ## `Rule`
+//!
+//! All rules defined or used in the grammar populate a generated `enum` called `Rule`. This
+//! implements `pest`'s `RuleType` and can be used throughout the API.
+//!
+//! ## `Built-in rules`
+//!
+//! Pest also comes with a number of built-in rules for convenience. They are:
+//!
+//! * `ASCII_DIGIT` - matches a numeric character from 0..9
+//! * `ASCII_NONZERO_DIGIT` - matches a numeric character from 1..9
+//! * `ASCII_BIN_DIGIT` - matches a numeric character from 0..1
+//! * `ASCII_OCT_DIGIT` - matches a numeric character from 0..7
+//! * `ASCII_HEX_DIGIT` - matches a numeric character from 0..9 or a..f or A..F
+//! * `ASCII_ALPHA_LOWER` - matches a character from a..z
+//! * `ASCII_ALPHA_UPPER` - matches a character from A..Z
+//! * `ASCII_ALPHA` - matches a character from a..z or A..Z
+//! * `ASCII_ALPHANUMERIC` - matches a character from a..z or A..Z or 0..9
+//! * `ASCII` - matches a character from \x00..\x7f
+//! * `NEWLINE` - matches either "\n" or "\r\n" or "\r"
#![doc(html_root_url = "https://docs.rs/pest")]
extern crate alloc;
#[cfg(feature = "std")]
extern crate std;
-extern crate ucd_trie;
-
-#[cfg(feature = "pretty-print")]
-extern crate serde;
-#[cfg(feature = "pretty-print")]
-extern crate serde_json;
pub use crate::parser::Parser;
pub use crate::parser_state::{
@@ -90,6 +350,12 @@ mod macros;
mod parser;
mod parser_state;
mod position;
+pub mod pratt_parser;
+#[deprecated(
+ since = "2.4.0",
+ note = "Use `pest::pratt_parser` instead (it is an equivalent which also supports unary prefix/suffix operators).
+While prec_climber is going to be kept in 2.x minor and patch releases, it may be removed in a future major release."
+)]
pub mod prec_climber;
mod span;
mod stack;