summaryrefslogtreecommitdiffstats
path: root/third_party/rust/nom/src/lib.rs
diff options
context:
space:
mode:
Diffstat (limited to '')
-rw-r--r--third_party/rust/nom/src/lib.rs463
1 files changed, 463 insertions, 0 deletions
diff --git a/third_party/rust/nom/src/lib.rs b/third_party/rust/nom/src/lib.rs
new file mode 100644
index 0000000000..9429467cf3
--- /dev/null
+++ b/third_party/rust/nom/src/lib.rs
@@ -0,0 +1,463 @@
+//! # nom, eating data byte by byte
+//!
+//! nom is a parser combinator library with a focus on safe parsing,
+//! streaming patterns, and as much as possible zero copy.
+//!
+//! ## Example
+//!
+//! ```rust
+//! use nom::{
+//! IResult,
+//! bytes::complete::{tag, take_while_m_n},
+//! combinator::map_res,
+//! sequence::tuple};
+//!
+//! #[derive(Debug,PartialEq)]
+//! pub struct Color {
+//! pub red: u8,
+//! pub green: u8,
+//! pub blue: u8,
+//! }
+//!
+//! fn from_hex(input: &str) -> Result<u8, std::num::ParseIntError> {
+//! u8::from_str_radix(input, 16)
+//! }
+//!
+//! fn is_hex_digit(c: char) -> bool {
+//! c.is_digit(16)
+//! }
+//!
+//! fn hex_primary(input: &str) -> IResult<&str, u8> {
+//! map_res(
+//! take_while_m_n(2, 2, is_hex_digit),
+//! from_hex
+//! )(input)
+//! }
+//!
+//! fn hex_color(input: &str) -> IResult<&str, Color> {
+//! let (input, _) = tag("#")(input)?;
+//! let (input, (red, green, blue)) = tuple((hex_primary, hex_primary, hex_primary))(input)?;
+//!
+//! Ok((input, Color { red, green, blue }))
+//! }
+//!
+//! fn main() {
+//! assert_eq!(hex_color("#2F14DF"), Ok(("", Color {
+//! red: 47,
+//! green: 20,
+//! blue: 223,
+//! })));
+//! }
+//! ```
+//!
+//! The code is available on [Github](https://github.com/Geal/nom)
+//!
+//! There are a few [guides](https://github.com/Geal/nom/tree/main/doc) with more details
+//! about [how to write parsers](https://github.com/Geal/nom/blob/main/doc/making_a_new_parser_from_scratch.md),
+//! or the [error management system](https://github.com/Geal/nom/blob/main/doc/error_management.md).
+//! You can also check out the [recipes] module that contains examples of common patterns.
+//!
+//! **Looking for a specific combinator? Read the
+//! ["choose a combinator" guide](https://github.com/Geal/nom/blob/main/doc/choosing_a_combinator.md)**
+//!
+//! If you are upgrading to nom 5.0, please read the
+//! [migration document](https://github.com/Geal/nom/blob/main/doc/upgrading_to_nom_5.md).
+//!
+//! ## Parser combinators
+//!
+//! Parser combinators are an approach to parsers that is very different from
+//! software like [lex](https://en.wikipedia.org/wiki/Lex_(software)) and
+//! [yacc](https://en.wikipedia.org/wiki/Yacc). Instead of writing the grammar
+//! in a separate syntax and generating the corresponding code, you use very small
+//! functions with very specific purposes, like "take 5 bytes", or "recognize the
+//! word 'HTTP'", and assemble them in meaningful patterns like "recognize
+//! 'HTTP', then a space, then a version".
+//! The resulting code is small, and looks like the grammar you would have
+//! written with other parser approaches.
+//!
+//! This gives us a few advantages:
+//!
+//! - The parsers are small and easy to write
+//! - The parsers components are easy to reuse (if they're general enough, please add them to nom!)
+//! - The parsers components are easy to test separately (unit tests and property-based tests)
+//! - The parser combination code looks close to the grammar you would have written
+//! - You can build partial parsers, specific to the data you need at the moment, and ignore the rest
+//!
+//! Here is an example of one such parser, to recognize text between parentheses:
+//!
+//! ```rust
+//! use nom::{
+//! IResult,
+//! sequence::delimited,
+//! // see the "streaming/complete" paragraph lower for an explanation of these submodules
+//! character::complete::char,
+//! bytes::complete::is_not
+//! };
+//!
+//! fn parens(input: &str) -> IResult<&str, &str> {
+//! delimited(char('('), is_not(")"), char(')'))(input)
+//! }
+//! ```
+//!
+//! It defines a function named `parens` which will recognize a sequence of the
+//! character `(`, the longest byte array not containing `)`, then the character
+//! `)`, and will return the byte array in the middle.
+//!
+//! Here is another parser, written without using nom's combinators this time:
+//!
+//! ```rust
+//! use nom::{IResult, Err, Needed};
+//!
+//! # fn main() {
+//! fn take4(i: &[u8]) -> IResult<&[u8], &[u8]>{
+//! if i.len() < 4 {
+//! Err(Err::Incomplete(Needed::new(4)))
+//! } else {
+//! Ok((&i[4..], &i[0..4]))
+//! }
+//! }
+//! # }
+//! ```
+//!
+//! This function takes a byte array as input, and tries to consume 4 bytes.
+//! Writing all the parsers manually, like this, is dangerous, despite Rust's
+//! safety features. There are still a lot of mistakes one can make. That's why
+//! nom provides a list of functions to help in developing parsers.
+//!
+//! With functions, you would write it like this:
+//!
+//! ```rust
+//! use nom::{IResult, bytes::streaming::take};
+//! fn take4(input: &str) -> IResult<&str, &str> {
+//! take(4u8)(input)
+//! }
+//! ```
+//!
+//! A parser in nom is a function which, for an input type `I`, an output type `O`
+//! and an optional error type `E`, will have the following signature:
+//!
+//! ```rust,compile_fail
+//! fn parser(input: I) -> IResult<I, O, E>;
+//! ```
+//!
+//! Or like this, if you don't want to specify a custom error type (it will be `(I, ErrorKind)` by default):
+//!
+//! ```rust,compile_fail
+//! fn parser(input: I) -> IResult<I, O>;
+//! ```
+//!
+//! `IResult` is an alias for the `Result` type:
+//!
+//! ```rust
+//! use nom::{Needed, error::Error};
+//!
+//! type IResult<I, O, E = Error<I>> = Result<(I, O), Err<E>>;
+//!
+//! enum Err<E> {
+//! Incomplete(Needed),
+//! Error(E),
+//! Failure(E),
+//! }
+//! ```
+//!
+//! It can have the following values:
+//!
+//! - A correct result `Ok((I,O))` with the first element being the remaining of the input (not parsed yet), and the second the output value;
+//! - An error `Err(Err::Error(c))` with `c` an error that can be built from the input position and a parser specific error
+//! - An error `Err(Err::Incomplete(Needed))` indicating that more input is necessary. `Needed` can indicate how much data is needed
+//! - An error `Err(Err::Failure(c))`. It works like the `Error` case, except it indicates an unrecoverable error: We cannot backtrack and test another parser
+//!
+//! Please refer to the ["choose a combinator" guide](https://github.com/Geal/nom/blob/main/doc/choosing_a_combinator.md) for an exhaustive list of parsers.
+//! See also the rest of the documentation [here](https://github.com/Geal/nom/blob/main/doc).
+//!
+//! ## Making new parsers with function combinators
+//!
+//! nom is based on functions that generate parsers, with a signature like
+//! this: `(arguments) -> impl Fn(Input) -> IResult<Input, Output, Error>`.
+//! The arguments of a combinator can be direct values (like `take` which uses
+//! a number of bytes or character as argument) or even other parsers (like
+//! `delimited` which takes as argument 3 parsers, and returns the result of
+//! the second one if all are successful).
+//!
+//! Here are some examples:
+//!
+//! ```rust
+//! use nom::IResult;
+//! use nom::bytes::complete::{tag, take};
+//! fn abcd_parser(i: &str) -> IResult<&str, &str> {
+//! tag("abcd")(i) // will consume bytes if the input begins with "abcd"
+//! }
+//!
+//! fn take_10(i: &[u8]) -> IResult<&[u8], &[u8]> {
+//! take(10u8)(i) // will consume and return 10 bytes of input
+//! }
+//! ```
+//!
+//! ## Combining parsers
+//!
+//! There are higher level patterns, like the **`alt`** combinator, which
+//! provides a choice between multiple parsers. If one branch fails, it tries
+//! the next, and returns the result of the first parser that succeeds:
+//!
+//! ```rust
+//! use nom::IResult;
+//! use nom::branch::alt;
+//! use nom::bytes::complete::tag;
+//!
+//! let mut alt_tags = alt((tag("abcd"), tag("efgh")));
+//!
+//! assert_eq!(alt_tags(&b"abcdxxx"[..]), Ok((&b"xxx"[..], &b"abcd"[..])));
+//! assert_eq!(alt_tags(&b"efghxxx"[..]), Ok((&b"xxx"[..], &b"efgh"[..])));
+//! assert_eq!(alt_tags(&b"ijklxxx"[..]), Err(nom::Err::Error((&b"ijklxxx"[..], nom::error::ErrorKind::Tag))));
+//! ```
+//!
+//! The **`opt`** combinator makes a parser optional. If the child parser returns
+//! an error, **`opt`** will still succeed and return None:
+//!
+//! ```rust
+//! use nom::{IResult, combinator::opt, bytes::complete::tag};
+//! fn abcd_opt(i: &[u8]) -> IResult<&[u8], Option<&[u8]>> {
+//! opt(tag("abcd"))(i)
+//! }
+//!
+//! assert_eq!(abcd_opt(&b"abcdxxx"[..]), Ok((&b"xxx"[..], Some(&b"abcd"[..]))));
+//! assert_eq!(abcd_opt(&b"efghxxx"[..]), Ok((&b"efghxxx"[..], None)));
+//! ```
+//!
+//! **`many0`** applies a parser 0 or more times, and returns a vector of the aggregated results:
+//!
+//! ```rust
+//! # #[cfg(feature = "alloc")]
+//! # fn main() {
+//! use nom::{IResult, multi::many0, bytes::complete::tag};
+//! use std::str;
+//!
+//! fn multi(i: &str) -> IResult<&str, Vec<&str>> {
+//! many0(tag("abcd"))(i)
+//! }
+//!
+//! let a = "abcdef";
+//! let b = "abcdabcdef";
+//! let c = "azerty";
+//! assert_eq!(multi(a), Ok(("ef", vec!["abcd"])));
+//! assert_eq!(multi(b), Ok(("ef", vec!["abcd", "abcd"])));
+//! assert_eq!(multi(c), Ok(("azerty", Vec::new())));
+//! # }
+//! # #[cfg(not(feature = "alloc"))]
+//! # fn main() {}
+//! ```
+//!
+//! Here are some basic combinators available:
+//!
+//! - **`opt`**: Will make the parser optional (if it returns the `O` type, the new parser returns `Option<O>`)
+//! - **`many0`**: Will apply the parser 0 or more times (if it returns the `O` type, the new parser returns `Vec<O>`)
+//! - **`many1`**: Will apply the parser 1 or more times
+//!
+//! There are more complex (and more useful) parsers like `tuple`, which is
+//! used to apply a series of parsers then assemble their results.
+//!
+//! Example with `tuple`:
+//!
+//! ```rust
+//! # fn main() {
+//! use nom::{error::ErrorKind, Needed,
+//! number::streaming::be_u16,
+//! bytes::streaming::{tag, take},
+//! sequence::tuple};
+//!
+//! let mut tpl = tuple((be_u16, take(3u8), tag("fg")));
+//!
+//! assert_eq!(
+//! tpl(&b"abcdefgh"[..]),
+//! Ok((
+//! &b"h"[..],
+//! (0x6162u16, &b"cde"[..], &b"fg"[..])
+//! ))
+//! );
+//! assert_eq!(tpl(&b"abcde"[..]), Err(nom::Err::Incomplete(Needed::new(2))));
+//! let input = &b"abcdejk"[..];
+//! assert_eq!(tpl(input), Err(nom::Err::Error((&input[5..], ErrorKind::Tag))));
+//! # }
+//! ```
+//!
+//! But you can also use a sequence of combinators written in imperative style,
+//! thanks to the `?` operator:
+//!
+//! ```rust
+//! # fn main() {
+//! use nom::{IResult, bytes::complete::tag};
+//!
+//! #[derive(Debug, PartialEq)]
+//! struct A {
+//! a: u8,
+//! b: u8
+//! }
+//!
+//! fn ret_int1(i:&[u8]) -> IResult<&[u8], u8> { Ok((i,1)) }
+//! fn ret_int2(i:&[u8]) -> IResult<&[u8], u8> { Ok((i,2)) }
+//!
+//! fn f(i: &[u8]) -> IResult<&[u8], A> {
+//! // if successful, the parser returns `Ok((remaining_input, output_value))` that we can destructure
+//! let (i, _) = tag("abcd")(i)?;
+//! let (i, a) = ret_int1(i)?;
+//! let (i, _) = tag("efgh")(i)?;
+//! let (i, b) = ret_int2(i)?;
+//!
+//! Ok((i, A { a, b }))
+//! }
+//!
+//! let r = f(b"abcdefghX");
+//! assert_eq!(r, Ok((&b"X"[..], A{a: 1, b: 2})));
+//! # }
+//! ```
+//!
+//! ## Streaming / Complete
+//!
+//! Some of nom's modules have `streaming` or `complete` submodules. They hold
+//! different variants of the same combinators.
+//!
+//! A streaming parser assumes that we might not have all of the input data.
+//! This can happen with some network protocol or large file parsers, where the
+//! input buffer can be full and need to be resized or refilled.
+//!
+//! A complete parser assumes that we already have all of the input data.
+//! This will be the common case with small files that can be read entirely to
+//! memory.
+//!
+//! Here is how it works in practice:
+//!
+//! ```rust
+//! use nom::{IResult, Err, Needed, error::{Error, ErrorKind}, bytes, character};
+//!
+//! fn take_streaming(i: &[u8]) -> IResult<&[u8], &[u8]> {
+//! bytes::streaming::take(4u8)(i)
+//! }
+//!
+//! fn take_complete(i: &[u8]) -> IResult<&[u8], &[u8]> {
+//! bytes::complete::take(4u8)(i)
+//! }
+//!
+//! // both parsers will take 4 bytes as expected
+//! assert_eq!(take_streaming(&b"abcde"[..]), Ok((&b"e"[..], &b"abcd"[..])));
+//! assert_eq!(take_complete(&b"abcde"[..]), Ok((&b"e"[..], &b"abcd"[..])));
+//!
+//! // if the input is smaller than 4 bytes, the streaming parser
+//! // will return `Incomplete` to indicate that we need more data
+//! assert_eq!(take_streaming(&b"abc"[..]), Err(Err::Incomplete(Needed::new(1))));
+//!
+//! // but the complete parser will return an error
+//! assert_eq!(take_complete(&b"abc"[..]), Err(Err::Error(Error::new(&b"abc"[..], ErrorKind::Eof))));
+//!
+//! // the alpha0 function recognizes 0 or more alphabetic characters
+//! fn alpha0_streaming(i: &str) -> IResult<&str, &str> {
+//! character::streaming::alpha0(i)
+//! }
+//!
+//! fn alpha0_complete(i: &str) -> IResult<&str, &str> {
+//! character::complete::alpha0(i)
+//! }
+//!
+//! // if there's a clear limit to the recognized characters, both parsers work the same way
+//! assert_eq!(alpha0_streaming("abcd;"), Ok((";", "abcd")));
+//! assert_eq!(alpha0_complete("abcd;"), Ok((";", "abcd")));
+//!
+//! // but when there's no limit, the streaming version returns `Incomplete`, because it cannot
+//! // know if more input data should be recognized. The whole input could be "abcd;", or
+//! // "abcde;"
+//! assert_eq!(alpha0_streaming("abcd"), Err(Err::Incomplete(Needed::new(1))));
+//!
+//! // while the complete version knows that all of the data is there
+//! assert_eq!(alpha0_complete("abcd"), Ok(("", "abcd")));
+//! ```
+//! **Going further:** Read the [guides](https://github.com/Geal/nom/tree/main/doc),
+//! check out the [recipes]!
+#![cfg_attr(not(feature = "std"), no_std)]
+#![cfg_attr(feature = "cargo-clippy", allow(clippy::doc_markdown))]
+#![cfg_attr(feature = "docsrs", feature(doc_cfg))]
+#![cfg_attr(feature = "docsrs", feature(extended_key_value_attributes))]
+#![deny(missing_docs)]
+#[cfg_attr(nightly, warn(rustdoc::missing_doc_code_examples))]
+#[cfg(feature = "alloc")]
+#[macro_use]
+extern crate alloc;
+#[cfg(doctest)]
+extern crate doc_comment;
+
+#[cfg(doctest)]
+doc_comment::doctest!("../README.md");
+
+/// Lib module to re-export everything needed from `std` or `core`/`alloc`. This is how `serde` does
+/// it, albeit there it is not public.
+#[cfg_attr(nightly, allow(rustdoc::missing_doc_code_examples))]
+pub mod lib {
+ /// `std` facade allowing `std`/`core` to be interchangeable. Reexports `alloc` crate optionally,
+ /// as well as `core` or `std`
+ #[cfg(not(feature = "std"))]
+ #[cfg_attr(nightly, allow(rustdoc::missing_doc_code_examples))]
+ /// internal std exports for no_std compatibility
+ pub mod std {
+ #[doc(hidden)]
+ #[cfg(not(feature = "alloc"))]
+ pub use core::borrow;
+
+ #[cfg(feature = "alloc")]
+ #[doc(hidden)]
+ pub use alloc::{borrow, boxed, string, vec};
+
+ #[doc(hidden)]
+ pub use core::{cmp, convert, fmt, iter, mem, ops, option, result, slice, str};
+
+ /// internal reproduction of std prelude
+ #[doc(hidden)]
+ pub mod prelude {
+ pub use core::prelude as v1;
+ }
+ }
+
+ #[cfg(feature = "std")]
+ #[cfg_attr(nightly, allow(rustdoc::missing_doc_code_examples))]
+ /// internal std exports for no_std compatibility
+ pub mod std {
+ #[doc(hidden)]
+ pub use std::{
+ alloc, borrow, boxed, cmp, collections, convert, fmt, hash, iter, mem, ops, option, result,
+ slice, str, string, vec,
+ };
+
+ /// internal reproduction of std prelude
+ #[doc(hidden)]
+ pub mod prelude {
+ pub use std::prelude as v1;
+ }
+ }
+}
+
+pub use self::bits::*;
+pub use self::internal::*;
+pub use self::traits::*;
+
+pub use self::str::*;
+
+#[macro_use]
+pub mod error;
+
+pub mod combinator;
+mod internal;
+mod traits;
+#[macro_use]
+pub mod branch;
+pub mod multi;
+pub mod sequence;
+
+pub mod bits;
+pub mod bytes;
+
+pub mod character;
+
+mod str;
+
+pub mod number;
+
+#[cfg(feature = "docsrs")]
+#[cfg_attr(feature = "docsrs", cfg_attr(feature = "docsrs", doc = include_str!("../doc/nom_recipes.md")))]
+pub mod recipes {}