//! Basic types to build the parsers use self::Needed::*; use crate::error::{self, ErrorKind}; use crate::lib::std::fmt; use core::num::NonZeroUsize; /// Holds the result of parsing functions /// /// It depends on the input type `I`, the output type `O`, and the error type `E` /// (by default `(I, nom::ErrorKind)`) /// /// The `Ok` side is a pair containing the remainder of the input (the part of the data that /// was not parsed) and the produced value. The `Err` side contains an instance of `nom::Err`. /// /// Outside of the parsing code, you can use the [Finish::finish] method to convert /// it to a more common result type pub type IResult> = Result<(I, O), Err>; /// Helper trait to convert a parser's result to a more manageable type pub trait Finish { /// converts the parser's result to a type that is more consumable by error /// management libraries. It keeps the same `Ok` branch, and merges `Err::Error` /// and `Err::Failure` into the `Err` side. /// /// *warning*: if the result is `Err(Err::Incomplete(_))`, this method will panic. /// - "complete" parsers: It will not be an issue, `Incomplete` is never used /// - "streaming" parsers: `Incomplete` will be returned if there's not enough data /// for the parser to decide, and you should gather more data before parsing again. /// Once the parser returns either `Ok(_)`, `Err(Err::Error(_))` or `Err(Err::Failure(_))`, /// you can get out of the parsing loop and call `finish()` on the parser's result fn finish(self) -> Result<(I, O), E>; } impl Finish for IResult { fn finish(self) -> Result<(I, O), E> { match self { Ok(res) => Ok(res), Err(Err::Error(e)) | Err(Err::Failure(e)) => Err(e), Err(Err::Incomplete(_)) => { panic!("Cannot call `finish()` on `Err(Err::Incomplete(_))`: this result means that the parser does not have enough data to decide, you should gather more data and try to reapply the parser instead") } } } } /// Contains information on needed data if a parser returned `Incomplete` #[derive(Debug, PartialEq, Eq, Clone, Copy)] #[cfg_attr(nightly, warn(rustdoc::missing_doc_code_examples))] pub enum Needed { /// Needs more data, but we do not know how much Unknown, /// Contains the required data size in bytes Size(NonZeroUsize), } impl Needed { /// Creates `Needed` instance, returns `Needed::Unknown` if the argument is zero pub fn new(s: usize) -> Self { match NonZeroUsize::new(s) { Some(sz) => Needed::Size(sz), None => Needed::Unknown, } } /// Indicates if we know how many bytes we need pub fn is_known(&self) -> bool { *self != Unknown } /// Maps a `Needed` to `Needed` by applying a function to a contained `Size` value. #[inline] pub fn map usize>(self, f: F) -> Needed { match self { Unknown => Unknown, Size(n) => Needed::new(f(n)), } } } /// The `Err` enum indicates the parser was not successful /// /// It has three cases: /// /// * `Incomplete` indicates that more data is needed to decide. The `Needed` enum /// can contain how many additional bytes are necessary. If you are sure your parser /// is working on full data, you can wrap your parser with the `complete` combinator /// to transform that case in `Error` /// * `Error` means some parser did not succeed, but another one might (as an example, /// when testing different branches of an `alt` combinator) /// * `Failure` indicates an unrecoverable error. As an example, if you recognize a prefix /// to decide on the next parser to apply, and that parser fails, you know there's no need /// to try other parsers, you were already in the right branch, so the data is invalid /// #[derive(Debug, Clone, PartialEq)] #[cfg_attr(nightly, warn(rustdoc::missing_doc_code_examples))] pub enum Err { /// There was not enough data Incomplete(Needed), /// The parser had an error (recoverable) Error(E), /// The parser had an unrecoverable error: we got to the right /// branch and we know other branches won't work, so backtrack /// as fast as possible Failure(E), } impl Err { /// Tests if the result is Incomplete pub fn is_incomplete(&self) -> bool { if let Err::Incomplete(_) = self { true } else { false } } /// Applies the given function to the inner error pub fn map(self, f: F) -> Err where F: FnOnce(E) -> E2, { match self { Err::Incomplete(n) => Err::Incomplete(n), Err::Failure(t) => Err::Failure(f(t)), Err::Error(t) => Err::Error(f(t)), } } /// Automatically converts between errors if the underlying type supports it pub fn convert(e: Err) -> Self where E: From, { e.map(crate::lib::std::convert::Into::into) } } impl Err<(T, ErrorKind)> { /// Maps `Err<(T, ErrorKind)>` to `Err<(U, ErrorKind)>` with the given `F: T -> U` pub fn map_input(self, f: F) -> Err<(U, ErrorKind)> where F: FnOnce(T) -> U, { match self { Err::Incomplete(n) => Err::Incomplete(n), Err::Failure((input, k)) => Err::Failure((f(input), k)), Err::Error((input, k)) => Err::Error((f(input), k)), } } } impl Err> { /// Maps `Err>` to `Err>` with the given `F: T -> U` pub fn map_input(self, f: F) -> Err> where F: FnOnce(T) -> U, { match self { Err::Incomplete(n) => Err::Incomplete(n), Err::Failure(error::Error { input, code }) => Err::Failure(error::Error { input: f(input), code, }), Err::Error(error::Error { input, code }) => Err::Error(error::Error { input: f(input), code, }), } } } #[cfg(feature = "alloc")] use crate::lib::std::{borrow::ToOwned, string::String, vec::Vec}; #[cfg(feature = "alloc")] impl Err<(&[u8], ErrorKind)> { /// Obtaining ownership #[cfg_attr(feature = "docsrs", doc(cfg(feature = "alloc")))] pub fn to_owned(self) -> Err<(Vec, ErrorKind)> { self.map_input(ToOwned::to_owned) } } #[cfg(feature = "alloc")] impl Err<(&str, ErrorKind)> { /// Obtaining ownership #[cfg_attr(feature = "docsrs", doc(cfg(feature = "alloc")))] pub fn to_owned(self) -> Err<(String, ErrorKind)> { self.map_input(ToOwned::to_owned) } } #[cfg(feature = "alloc")] impl Err> { /// Obtaining ownership #[cfg_attr(feature = "docsrs", doc(cfg(feature = "alloc")))] pub fn to_owned(self) -> Err>> { self.map_input(ToOwned::to_owned) } } #[cfg(feature = "alloc")] impl Err> { /// Obtaining ownership #[cfg_attr(feature = "docsrs", doc(cfg(feature = "alloc")))] pub fn to_owned(self) -> Err> { self.map_input(ToOwned::to_owned) } } impl Eq for Err {} impl fmt::Display for Err where E: fmt::Debug, { fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { match self { Err::Incomplete(Needed::Size(u)) => write!(f, "Parsing requires {} bytes/chars", u), Err::Incomplete(Needed::Unknown) => write!(f, "Parsing requires more data"), Err::Failure(c) => write!(f, "Parsing Failure: {:?}", c), Err::Error(c) => write!(f, "Parsing Error: {:?}", c), } } } #[cfg(feature = "std")] use std::error::Error; #[cfg(feature = "std")] impl Error for Err where E: fmt::Debug, { fn source(&self) -> Option<&(dyn Error + 'static)> { None // no underlying error } } /// All nom parsers implement this trait pub trait Parser { /// A parser takes in input type, and returns a `Result` containing /// either the remaining input and the output value, or an error fn parse(&mut self, input: I) -> IResult; /// Maps a function over the result of a parser fn map(self, g: G) -> Map where G: Fn(O) -> O2, Self: core::marker::Sized, { Map { f: self, g, phantom: core::marker::PhantomData, } } /// Creates a second parser from the output of the first one, then apply over the rest of the input fn flat_map(self, g: G) -> FlatMap where G: FnMut(O) -> H, H: Parser, Self: core::marker::Sized, { FlatMap { f: self, g, phantom: core::marker::PhantomData, } } /// Applies a second parser over the output of the first one fn and_then(self, g: G) -> AndThen where G: Parser, Self: core::marker::Sized, { AndThen { f: self, g, phantom: core::marker::PhantomData, } } /// Applies a second parser after the first one, return their results as a tuple fn and(self, g: G) -> And where G: Parser, Self: core::marker::Sized, { And { f: self, g } } /// Applies a second parser over the input if the first one failed fn or(self, g: G) -> Or where G: Parser, Self: core::marker::Sized, { Or { f: self, g } } /// automatically converts the parser's output and error values to another type, as long as they /// implement the `From` trait fn into, E2: From>(self) -> Into where Self: core::marker::Sized, { Into { f: self, phantom_out1: core::marker::PhantomData, phantom_err1: core::marker::PhantomData, phantom_out2: core::marker::PhantomData, phantom_err2: core::marker::PhantomData, } } } impl<'a, I, O, E, F> Parser for F where F: FnMut(I) -> IResult + 'a, { fn parse(&mut self, i: I) -> IResult { self(i) } } #[cfg(feature = "alloc")] use alloc::boxed::Box; #[cfg(feature = "alloc")] impl<'a, I, O, E> Parser for Box + 'a> { fn parse(&mut self, input: I) -> IResult { (**self).parse(input) } } /// Implementation of `Parser::map` #[cfg_attr(nightly, warn(rustdoc::missing_doc_code_examples))] pub struct Map { f: F, g: G, phantom: core::marker::PhantomData, } impl<'a, I, O1, O2, E, F: Parser, G: Fn(O1) -> O2> Parser for Map { fn parse(&mut self, i: I) -> IResult { match self.f.parse(i) { Err(e) => Err(e), Ok((i, o)) => Ok((i, (self.g)(o))), } } } /// Implementation of `Parser::flat_map` #[cfg_attr(nightly, warn(rustdoc::missing_doc_code_examples))] pub struct FlatMap { f: F, g: G, phantom: core::marker::PhantomData, } impl<'a, I, O1, O2, E, F: Parser, G: Fn(O1) -> H, H: Parser> Parser for FlatMap { fn parse(&mut self, i: I) -> IResult { let (i, o1) = self.f.parse(i)?; (self.g)(o1).parse(i) } } /// Implementation of `Parser::and_then` #[cfg_attr(nightly, warn(rustdoc::missing_doc_code_examples))] pub struct AndThen { f: F, g: G, phantom: core::marker::PhantomData, } impl<'a, I, O1, O2, E, F: Parser, G: Parser> Parser for AndThen { fn parse(&mut self, i: I) -> IResult { let (i, o1) = self.f.parse(i)?; let (_, o2) = self.g.parse(o1)?; Ok((i, o2)) } } /// Implementation of `Parser::and` #[cfg_attr(nightly, warn(rustdoc::missing_doc_code_examples))] pub struct And { f: F, g: G, } impl<'a, I, O1, O2, E, F: Parser, G: Parser> Parser for And { fn parse(&mut self, i: I) -> IResult { let (i, o1) = self.f.parse(i)?; let (i, o2) = self.g.parse(i)?; Ok((i, (o1, o2))) } } /// Implementation of `Parser::or` #[cfg_attr(nightly, warn(rustdoc::missing_doc_code_examples))] pub struct Or { f: F, g: G, } impl<'a, I: Clone, O, E: crate::error::ParseError, F: Parser, G: Parser> Parser for Or { fn parse(&mut self, i: I) -> IResult { match self.f.parse(i.clone()) { Err(Err::Error(e1)) => match self.g.parse(i) { Err(Err::Error(e2)) => Err(Err::Error(e1.or(e2))), res => res, }, res => res, } } } /// Implementation of `Parser::into` #[cfg_attr(nightly, warn(rustdoc::missing_doc_code_examples))] pub struct Into, E1, E2: From> { f: F, phantom_out1: core::marker::PhantomData, phantom_err1: core::marker::PhantomData, phantom_out2: core::marker::PhantomData, phantom_err2: core::marker::PhantomData, } impl< 'a, I: Clone, O1, O2: From, E1, E2: crate::error::ParseError + From, F: Parser, > Parser for Into { fn parse(&mut self, i: I) -> IResult { match self.f.parse(i) { Ok((i, o)) => Ok((i, o.into())), Err(Err::Error(e)) => Err(Err::Error(e.into())), Err(Err::Failure(e)) => Err(Err::Failure(e.into())), Err(Err::Incomplete(e)) => Err(Err::Incomplete(e)), } } } #[cfg(test)] mod tests { use super::*; use crate::error::ErrorKind; #[doc(hidden)] #[macro_export] macro_rules! assert_size ( ($t:ty, $sz:expr) => ( assert_eq!(crate::lib::std::mem::size_of::<$t>(), $sz); ); ); #[test] #[cfg(target_pointer_width = "64")] fn size_test() { assert_size!(IResult<&[u8], &[u8], (&[u8], u32)>, 40); //FIXME: since rust 1.65, this is now 32 bytes, likely thanks to https://github.com/rust-lang/rust/pull/94075 // deactivating that test for now because it'll have different values depending on the rust version // assert_size!(IResult<&str, &str, u32>, 40); assert_size!(Needed, 8); assert_size!(Err, 16); assert_size!(ErrorKind, 1); } #[test] fn err_map_test() { let e = Err::Error(1); assert_eq!(e.map(|v| v + 1), Err::Error(2)); } }