//! Stream capability for combinators to parse //! //! Stream types include: //! - `&[u8]` and [`Bytes`] for binary data //! - `&str` (aliased as [`Str`]) and [`BStr`] for UTF-8 data //! - [`Located`] can track the location within the original buffer to report //! [spans][crate::Parser::with_span] //! - [`Stateful`] to thread global state through your parsers //! - [`Partial`] can mark an input as partial buffer that is being streamed into //! - [Custom stream types][crate::_topic::stream] use core::num::NonZeroUsize; use crate::error::Needed; use crate::lib::std::iter::{Cloned, Enumerate}; use crate::lib::std::slice::Iter; use crate::lib::std::str::from_utf8; use crate::lib::std::str::CharIndices; use crate::lib::std::str::FromStr; #[allow(unused_imports)] #[cfg(feature = "unstable-doc")] use crate::error::ErrMode; #[cfg(feature = "alloc")] use crate::lib::std::collections::BTreeMap; #[cfg(feature = "std")] use crate::lib::std::collections::HashMap; #[cfg(feature = "alloc")] use crate::lib::std::string::String; #[cfg(feature = "alloc")] use crate::lib::std::vec::Vec; mod impls; #[cfg(test)] mod tests; /// UTF-8 Stream pub type Str<'i> = &'i str; /// Improved `Debug` experience for `&[u8]` byte streams #[allow(clippy::derive_hash_xor_eq)] #[derive(Hash)] #[repr(transparent)] pub struct Bytes([u8]); impl Bytes { /// Make a stream out of a byte slice-like. #[inline] pub fn new>(bytes: &B) -> &Self { Self::from_bytes(bytes.as_ref()) } #[inline] fn from_bytes(slice: &[u8]) -> &Self { unsafe { crate::lib::std::mem::transmute(slice) } } #[inline] fn as_bytes(&self) -> &[u8] { &self.0 } } /// Improved `Debug` experience for `&[u8]` UTF-8-ish streams #[allow(clippy::derive_hash_xor_eq)] #[derive(Hash)] #[repr(transparent)] pub struct BStr([u8]); impl BStr { /// Make a stream out of a byte slice-like. #[inline] pub fn new>(bytes: &B) -> &Self { Self::from_bytes(bytes.as_ref()) } #[inline] fn from_bytes(slice: &[u8]) -> &Self { unsafe { crate::lib::std::mem::transmute(slice) } } #[inline] fn as_bytes(&self) -> &[u8] { &self.0 } } /// Allow collecting the span of a parsed token /// /// See [`Parser::span`][crate::Parser::span] and [`Parser::with_span`][crate::Parser::with_span] for more details #[derive(Copy, Clone, Default, Debug, PartialEq, Eq, PartialOrd, Ord)] pub struct Located { initial: I, input: I, } impl Located where I: Clone + Offset, { /// Wrap another Stream with span tracking pub fn new(input: I) -> Self { let initial = input.clone(); Self { initial, input } } fn location(&self) -> usize { self.input.offset_from(&self.initial) } } impl AsRef for Located { #[inline(always)] fn as_ref(&self) -> &I { &self.input } } impl crate::lib::std::ops::Deref for Located { type Target = I; #[inline(always)] fn deref(&self) -> &Self::Target { &self.input } } impl crate::lib::std::fmt::Display for Located { fn fmt(&self, f: &mut crate::lib::std::fmt::Formatter<'_>) -> crate::lib::std::fmt::Result { self.input.fmt(f) } } /// Thread global state through your parsers /// /// Use cases /// - Recursion checks /// - Error recovery /// - Debugging /// /// # Example /// /// ``` /// # use std::cell::Cell; /// # use winnow::prelude::*; /// # use winnow::stream::Stateful; /// # use winnow::ascii::alpha1; /// # type Error = (); /// /// #[derive(Clone, Debug)] /// struct State<'s>(&'s Cell); /// /// impl<'s> State<'s> { /// fn count(&self) { /// self.0.set(self.0.get() + 1); /// } /// } /// /// type Stream<'is> = Stateful<&'is str, State<'is>>; /// /// fn word<'s>(i: &mut Stream<'s>) -> PResult<&'s str> { /// i.state.count(); /// alpha1.parse_next(i) /// } /// /// let data = "Hello"; /// let state = Cell::new(0); /// let input = Stream { input: data, state: State(&state) }; /// let output = word.parse(input).unwrap(); /// assert_eq!(state.get(), 1); /// ``` #[derive(Clone, Copy, Debug, Eq, PartialEq)] pub struct Stateful { /// Inner input being wrapped in state pub input: I, /// User-provided state pub state: S, } impl AsRef for Stateful { #[inline(always)] fn as_ref(&self) -> &I { &self.input } } impl crate::lib::std::ops::Deref for Stateful { type Target = I; #[inline(always)] fn deref(&self) -> &Self::Target { self.as_ref() } } impl crate::lib::std::fmt::Display for Stateful { fn fmt(&self, f: &mut crate::lib::std::fmt::Formatter<'_>) -> crate::lib::std::fmt::Result { self.input.fmt(f) } } /// Mark the input as a partial buffer for streaming input. /// /// Complete input means that we already have all of the data. This will be the common case with /// small files that can be read entirely to memory. /// /// In contrast, streaming input assumes that we might not have all of the data. /// This can happen with some network protocol or large file parsers, where the /// input buffer can be full and need to be resized or refilled. /// - [`ErrMode::Incomplete`] will report how much more data is needed. /// - [`Parser::complete_err`][crate::Parser::complete_err] transform [`ErrMode::Incomplete`] to /// [`ErrMode::Backtrack`] /// /// See also [`StreamIsPartial`] to tell whether the input supports complete or partial parsing. /// /// See also [Special Topics: Parsing Partial Input][crate::_topic::partial]. /// /// # Example /// /// Here is how it works in practice: /// /// ```rust /// # use winnow::{PResult, error::ErrMode, error::Needed, error::{InputError, ErrorKind}, token, ascii, stream::Partial}; /// # use winnow::prelude::*; /// /// fn take_partial<'s>(i: &mut Partial<&'s [u8]>) -> PResult<&'s [u8], InputError>> { /// token::take(4u8).parse_next(i) /// } /// /// fn take_complete<'s>(i: &mut &'s [u8]) -> PResult<&'s [u8], InputError<&'s [u8]>> { /// token::take(4u8).parse_next(i) /// } /// /// // both parsers will take 4 bytes as expected /// assert_eq!(take_partial.parse_peek(Partial::new(&b"abcde"[..])), Ok((Partial::new(&b"e"[..]), &b"abcd"[..]))); /// assert_eq!(take_complete.parse_peek(&b"abcde"[..]), Ok((&b"e"[..], &b"abcd"[..]))); /// /// // if the input is smaller than 4 bytes, the partial parser /// // will return `Incomplete` to indicate that we need more data /// assert_eq!(take_partial.parse_peek(Partial::new(&b"abc"[..])), Err(ErrMode::Incomplete(Needed::new(1)))); /// /// // but the complete parser will return an error /// assert_eq!(take_complete.parse_peek(&b"abc"[..]), Err(ErrMode::Backtrack(InputError::new(&b"abc"[..], ErrorKind::Slice)))); /// /// // the alpha0 function recognizes 0 or more alphabetic characters /// fn alpha0_partial<'s>(i: &mut Partial<&'s str>) -> PResult<&'s str, InputError>> { /// ascii::alpha0.parse_next(i) /// } /// /// fn alpha0_complete<'s>(i: &mut &'s str) -> PResult<&'s str, InputError<&'s str>> { /// ascii::alpha0.parse_next(i) /// } /// /// // if there's a clear limit to the recognized characters, both parsers work the same way /// assert_eq!(alpha0_partial.parse_peek(Partial::new("abcd;")), Ok((Partial::new(";"), "abcd"))); /// assert_eq!(alpha0_complete.parse_peek("abcd;"), Ok((";", "abcd"))); /// /// // but when there's no limit, the partial version returns `Incomplete`, because it cannot /// // know if more input data should be recognized. The whole input could be "abcd;", or /// // "abcde;" /// assert_eq!(alpha0_partial.parse_peek(Partial::new("abcd")), Err(ErrMode::Incomplete(Needed::new(1)))); /// /// // while the complete version knows that all of the data is there /// assert_eq!(alpha0_complete.parse_peek("abcd"), Ok(("", "abcd"))); /// ``` #[derive(Copy, Clone, Debug, PartialEq, Eq, PartialOrd, Ord)] pub struct Partial { input: I, partial: bool, } impl Partial where I: StreamIsPartial, { /// Create a partial input pub fn new(input: I) -> Self { debug_assert!( !I::is_partial_supported(), "`Partial` can only wrap complete sources" ); let partial = true; Self { input, partial } } /// Extract the original [`Stream`] #[inline(always)] pub fn into_inner(self) -> I { self.input } } impl Default for Partial where I: Default + StreamIsPartial, { fn default() -> Self { Self::new(I::default()) } } impl crate::lib::std::ops::Deref for Partial { type Target = I; #[inline(always)] fn deref(&self) -> &Self::Target { &self.input } } impl crate::lib::std::fmt::Display for Partial { fn fmt(&self, f: &mut crate::lib::std::fmt::Formatter<'_>) -> crate::lib::std::fmt::Result { self.input.fmt(f) } } /// Abstract method to calculate the input length pub trait SliceLen { /// Calculates the input length, as indicated by its name, /// and the name of the trait itself fn slice_len(&self) -> usize; } impl<'a, T> SliceLen for &'a [T] { #[inline] fn slice_len(&self) -> usize { self.len() } } impl SliceLen for [T; LEN] { #[inline] fn slice_len(&self) -> usize { self.len() } } impl<'a, T, const LEN: usize> SliceLen for &'a [T; LEN] { #[inline] fn slice_len(&self) -> usize { self.len() } } impl<'a> SliceLen for &'a str { #[inline] fn slice_len(&self) -> usize { self.len() } } impl<'a> SliceLen for &'a Bytes { #[inline] fn slice_len(&self) -> usize { self.len() } } impl<'a> SliceLen for &'a BStr { #[inline] fn slice_len(&self) -> usize { self.len() } } impl SliceLen for (I, usize, usize) where I: SliceLen, { #[inline(always)] fn slice_len(&self) -> usize { self.0.slice_len() * 8 + self.2 - self.1 } } impl SliceLen for Located where I: SliceLen, { #[inline(always)] fn slice_len(&self) -> usize { self.input.slice_len() } } impl SliceLen for Stateful where I: SliceLen, { #[inline(always)] fn slice_len(&self) -> usize { self.input.slice_len() } } impl SliceLen for Partial where I: SliceLen, { #[inline(always)] fn slice_len(&self) -> usize { self.input.slice_len() } } /// Core definition for parser input state pub trait Stream: Offset<::Checkpoint> + crate::lib::std::fmt::Debug { /// The smallest unit being parsed /// /// Example: `u8` for `&[u8]` or `char` for `&str` type Token: crate::lib::std::fmt::Debug; /// Sequence of `Token`s /// /// Example: `&[u8]` for `Located<&[u8]>` or `&str` for `Located<&str>` type Slice: crate::lib::std::fmt::Debug; /// Iterate with the offset from the current location type IterOffsets: Iterator; /// A parse location within the stream type Checkpoint: Offset + Clone + crate::lib::std::fmt::Debug; /// Iterate with the offset from the current location fn iter_offsets(&self) -> Self::IterOffsets; /// Returns the offaet to the end of the input fn eof_offset(&self) -> usize; /// Split off the next token from the input fn next_token(&mut self) -> Option; /// Split off the next token from the input #[inline(always)] fn peek_token(&self) -> Option<(Self, Self::Token)> where Self: Clone, { let mut peek = self.clone(); let token = peek.next_token()?; Some((peek, token)) } /// Finds the offset of the next matching token fn offset_for

(&self, predicate: P) -> Option where P: Fn(Self::Token) -> bool; /// Get the offset for the number of `tokens` into the stream /// /// This means "0 tokens" will return `0` offset fn offset_at(&self, tokens: usize) -> Result; /// Split off a slice of tokens from the input /// /// **NOTE:** For inputs with variable width tokens, like `&str`'s `char`, `offset` might not correspond /// with the number of tokens. To get a valid offset, use: /// - [`Stream::eof_offset`] /// - [`Stream::iter_offsets`] /// - [`Stream::offset_for`] /// - [`Stream::offset_at`] /// /// # Panic /// /// This will panic if /// /// * Indexes must be within bounds of the original input; /// * Indexes must uphold invariants of the stream, like for `str` they must lie on UTF-8 /// sequence boundaries. /// fn next_slice(&mut self, offset: usize) -> Self::Slice; /// Split off a slice of tokens from the input #[inline(always)] fn peek_slice(&self, offset: usize) -> (Self, Self::Slice) where Self: Clone, { let mut peek = self.clone(); let slice = peek.next_slice(offset); (peek, slice) } /// Advance to the end of the stream #[inline(always)] fn finish(&mut self) -> Self::Slice { self.next_slice(self.eof_offset()) } /// Advance to the end of the stream #[inline(always)] fn peek_finish(&self) -> (Self, Self::Slice) where Self: Clone, { let mut peek = self.clone(); let slice = peek.finish(); (peek, slice) } /// Save the current parse location within the stream fn checkpoint(&self) -> Self::Checkpoint; /// Revert the stream to a prior [`Self::Checkpoint`] /// /// # Panic /// /// May panic if an invalid [`Self::Checkpoint`] is provided fn reset(&mut self, checkpoint: Self::Checkpoint); /// Return the inner-most stream fn raw(&self) -> &dyn crate::lib::std::fmt::Debug; } impl<'i, T> Stream for &'i [T] where T: Clone + crate::lib::std::fmt::Debug, { type Token = T; type Slice = &'i [T]; type IterOffsets = Enumerate>>; type Checkpoint = Checkpoint; #[inline(always)] fn iter_offsets(&self) -> Self::IterOffsets { self.iter().cloned().enumerate() } #[inline(always)] fn eof_offset(&self) -> usize { self.len() } #[inline(always)] fn next_token(&mut self) -> Option { let (token, next) = self.split_first()?; *self = next; Some(token.clone()) } #[inline(always)] fn offset_for

(&self, predicate: P) -> Option where P: Fn(Self::Token) -> bool, { self.iter().position(|b| predicate(b.clone())) } #[inline(always)] fn offset_at(&self, tokens: usize) -> Result { if let Some(needed) = tokens.checked_sub(self.len()).and_then(NonZeroUsize::new) { Err(Needed::Size(needed)) } else { Ok(tokens) } } #[inline(always)] fn next_slice(&mut self, offset: usize) -> Self::Slice { let (slice, next) = self.split_at(offset); *self = next; slice } #[inline(always)] fn checkpoint(&self) -> Self::Checkpoint { Checkpoint(*self) } #[inline(always)] fn reset(&mut self, checkpoint: Self::Checkpoint) { *self = checkpoint.0; } #[inline(always)] fn raw(&self) -> &dyn crate::lib::std::fmt::Debug { self } } impl<'i> Stream for &'i str { type Token = char; type Slice = &'i str; type IterOffsets = CharIndices<'i>; type Checkpoint = Checkpoint; #[inline(always)] fn iter_offsets(&self) -> Self::IterOffsets { self.char_indices() } #[inline(always)] fn eof_offset(&self) -> usize { self.len() } #[inline(always)] fn next_token(&mut self) -> Option { let c = self.chars().next()?; let offset = c.len(); *self = &self[offset..]; Some(c) } #[inline(always)] fn offset_for

(&self, predicate: P) -> Option where P: Fn(Self::Token) -> bool, { for (o, c) in self.iter_offsets() { if predicate(c) { return Some(o); } } None } #[inline] fn offset_at(&self, tokens: usize) -> Result { let mut cnt = 0; for (offset, _) in self.iter_offsets() { if cnt == tokens { return Ok(offset); } cnt += 1; } if cnt == tokens { Ok(self.eof_offset()) } else { Err(Needed::Unknown) } } #[inline(always)] fn next_slice(&mut self, offset: usize) -> Self::Slice { let (slice, next) = self.split_at(offset); *self = next; slice } #[inline(always)] fn checkpoint(&self) -> Self::Checkpoint { Checkpoint(*self) } #[inline(always)] fn reset(&mut self, checkpoint: Self::Checkpoint) { *self = checkpoint.0; } #[inline(always)] fn raw(&self) -> &dyn crate::lib::std::fmt::Debug { self } } impl<'i> Stream for &'i Bytes { type Token = u8; type Slice = &'i [u8]; type IterOffsets = Enumerate>>; type Checkpoint = Checkpoint; #[inline(always)] fn iter_offsets(&self) -> Self::IterOffsets { self.iter().cloned().enumerate() } #[inline(always)] fn eof_offset(&self) -> usize { self.len() } #[inline(always)] fn next_token(&mut self) -> Option { if self.is_empty() { None } else { let token = self[0]; *self = &self[1..]; Some(token) } } #[inline(always)] fn offset_for

(&self, predicate: P) -> Option where P: Fn(Self::Token) -> bool, { self.iter().position(|b| predicate(*b)) } #[inline(always)] fn offset_at(&self, tokens: usize) -> Result { if let Some(needed) = tokens.checked_sub(self.len()).and_then(NonZeroUsize::new) { Err(Needed::Size(needed)) } else { Ok(tokens) } } #[inline(always)] fn next_slice(&mut self, offset: usize) -> Self::Slice { let (slice, next) = self.0.split_at(offset); *self = Bytes::from_bytes(next); slice } #[inline(always)] fn checkpoint(&self) -> Self::Checkpoint { Checkpoint(*self) } #[inline(always)] fn reset(&mut self, checkpoint: Self::Checkpoint) { *self = checkpoint.0; } #[inline(always)] fn raw(&self) -> &dyn crate::lib::std::fmt::Debug { self } } impl<'i> Stream for &'i BStr { type Token = u8; type Slice = &'i [u8]; type IterOffsets = Enumerate>>; type Checkpoint = Checkpoint; #[inline(always)] fn iter_offsets(&self) -> Self::IterOffsets { self.iter().cloned().enumerate() } #[inline(always)] fn eof_offset(&self) -> usize { self.len() } #[inline(always)] fn next_token(&mut self) -> Option { if self.is_empty() { None } else { let token = self[0]; *self = &self[1..]; Some(token) } } #[inline(always)] fn offset_for

(&self, predicate: P) -> Option where P: Fn(Self::Token) -> bool, { self.iter().position(|b| predicate(*b)) } #[inline(always)] fn offset_at(&self, tokens: usize) -> Result { if let Some(needed) = tokens.checked_sub(self.len()).and_then(NonZeroUsize::new) { Err(Needed::Size(needed)) } else { Ok(tokens) } } #[inline(always)] fn next_slice(&mut self, offset: usize) -> Self::Slice { let (slice, next) = self.0.split_at(offset); *self = BStr::from_bytes(next); slice } #[inline(always)] fn checkpoint(&self) -> Self::Checkpoint { Checkpoint(*self) } #[inline(always)] fn reset(&mut self, checkpoint: Self::Checkpoint) { *self = checkpoint.0; } #[inline(always)] fn raw(&self) -> &dyn crate::lib::std::fmt::Debug { self } } impl Stream for (I, usize) where I: Stream + Clone, { type Token = bool; type Slice = (I::Slice, usize, usize); type IterOffsets = BitOffsets; type Checkpoint = Checkpoint<(I::Checkpoint, usize)>; #[inline(always)] fn iter_offsets(&self) -> Self::IterOffsets { BitOffsets { i: self.clone(), o: 0, } } #[inline(always)] fn eof_offset(&self) -> usize { let offset = self.0.eof_offset() * 8; if offset == 0 { 0 } else { offset - self.1 } } #[inline(always)] fn next_token(&mut self) -> Option { next_bit(self) } #[inline(always)] fn offset_for

(&self, predicate: P) -> Option where P: Fn(Self::Token) -> bool, { self.iter_offsets() .find_map(|(o, b)| predicate(b).then_some(o)) } #[inline(always)] fn offset_at(&self, tokens: usize) -> Result { if let Some(needed) = tokens .checked_sub(self.eof_offset()) .and_then(NonZeroUsize::new) { Err(Needed::Size(needed)) } else { Ok(tokens) } } #[inline(always)] fn next_slice(&mut self, offset: usize) -> Self::Slice { let byte_offset = (offset + self.1) / 8; let end_offset = (offset + self.1) % 8; let s = self.0.next_slice(byte_offset); let start_offset = self.1; self.1 = end_offset; (s, start_offset, end_offset) } #[inline(always)] fn checkpoint(&self) -> Self::Checkpoint { Checkpoint((self.0.checkpoint(), self.1)) } #[inline(always)] fn reset(&mut self, checkpoint: Self::Checkpoint) { self.0.reset(checkpoint.0 .0); self.1 = checkpoint.0 .1; } #[inline(always)] fn raw(&self) -> &dyn crate::lib::std::fmt::Debug { &self.0 } } /// Iterator for [bit][crate::binary::bits] stream (`(I, usize)`) pub struct BitOffsets { i: (I, usize), o: usize, } impl Iterator for BitOffsets where I: Stream + Clone, { type Item = (usize, bool); fn next(&mut self) -> Option { let b = next_bit(&mut self.i)?; let o = self.o; self.o += 1; Some((o, b)) } } fn next_bit(i: &mut (I, usize)) -> Option where I: Stream + Clone, { if i.eof_offset() == 0 { return None; } let offset = i.1; let mut next_i = i.0.clone(); let byte = next_i.next_token()?; let bit = (byte >> offset) & 0x1 == 0x1; let next_offset = offset + 1; if next_offset == 8 { i.0 = next_i; i.1 = 0; Some(bit) } else { i.1 = next_offset; Some(bit) } } impl Stream for Located { type Token = ::Token; type Slice = ::Slice; type IterOffsets = ::IterOffsets; type Checkpoint = Checkpoint; #[inline(always)] fn iter_offsets(&self) -> Self::IterOffsets { self.input.iter_offsets() } #[inline(always)] fn eof_offset(&self) -> usize { self.input.eof_offset() } #[inline(always)] fn next_token(&mut self) -> Option { self.input.next_token() } #[inline(always)] fn offset_for

(&self, predicate: P) -> Option where P: Fn(Self::Token) -> bool, { self.input.offset_for(predicate) } #[inline(always)] fn offset_at(&self, tokens: usize) -> Result { self.input.offset_at(tokens) } #[inline(always)] fn next_slice(&mut self, offset: usize) -> Self::Slice { self.input.next_slice(offset) } #[inline(always)] fn checkpoint(&self) -> Self::Checkpoint { Checkpoint(self.input.checkpoint()) } #[inline(always)] fn reset(&mut self, checkpoint: Self::Checkpoint) { self.input.reset(checkpoint.0); } #[inline(always)] fn raw(&self) -> &dyn crate::lib::std::fmt::Debug { &self.input } } impl Stream for Stateful { type Token = ::Token; type Slice = ::Slice; type IterOffsets = ::IterOffsets; type Checkpoint = Checkpoint; #[inline(always)] fn iter_offsets(&self) -> Self::IterOffsets { self.input.iter_offsets() } #[inline(always)] fn eof_offset(&self) -> usize { self.input.eof_offset() } #[inline(always)] fn next_token(&mut self) -> Option { self.input.next_token() } #[inline(always)] fn offset_for

(&self, predicate: P) -> Option where P: Fn(Self::Token) -> bool, { self.input.offset_for(predicate) } #[inline(always)] fn offset_at(&self, tokens: usize) -> Result { self.input.offset_at(tokens) } #[inline(always)] fn next_slice(&mut self, offset: usize) -> Self::Slice { self.input.next_slice(offset) } #[inline(always)] fn checkpoint(&self) -> Self::Checkpoint { Checkpoint(self.input.checkpoint()) } #[inline(always)] fn reset(&mut self, checkpoint: Self::Checkpoint) { self.input.reset(checkpoint.0); } #[inline(always)] fn raw(&self) -> &dyn crate::lib::std::fmt::Debug { &self.input } } impl Stream for Partial { type Token = ::Token; type Slice = ::Slice; type IterOffsets = ::IterOffsets; type Checkpoint = Checkpoint; #[inline(always)] fn iter_offsets(&self) -> Self::IterOffsets { self.input.iter_offsets() } #[inline(always)] fn eof_offset(&self) -> usize { self.input.eof_offset() } #[inline(always)] fn next_token(&mut self) -> Option { self.input.next_token() } #[inline(always)] fn offset_for

(&self, predicate: P) -> Option where P: Fn(Self::Token) -> bool, { self.input.offset_for(predicate) } #[inline(always)] fn offset_at(&self, tokens: usize) -> Result { self.input.offset_at(tokens) } #[inline(always)] fn next_slice(&mut self, offset: usize) -> Self::Slice { self.input.next_slice(offset) } #[inline(always)] fn checkpoint(&self) -> Self::Checkpoint { Checkpoint(self.input.checkpoint()) } #[inline(always)] fn reset(&mut self, checkpoint: Self::Checkpoint) { self.input.reset(checkpoint.0); } #[inline(always)] fn raw(&self) -> &dyn crate::lib::std::fmt::Debug { &self.input } } /// Number of indices input has advanced since start of parsing pub trait Location { /// Number of indices input has advanced since start of parsing fn location(&self) -> usize; } impl Location for Located where I: Clone + Offset, { #[inline(always)] fn location(&self) -> usize { self.location() } } impl Location for Stateful where I: Location, { #[inline(always)] fn location(&self) -> usize { self.input.location() } } impl Location for Partial where I: Location, { #[inline(always)] fn location(&self) -> usize { self.input.location() } } /// Marks the input as being the complete buffer or a partial buffer for streaming input /// /// See [`Partial`] for marking a presumed complete buffer type as a streaming buffer. pub trait StreamIsPartial: Sized { /// Whether the stream is currently partial or complete type PartialState; /// Mark the stream is complete #[must_use] fn complete(&mut self) -> Self::PartialState; /// Restore the stream back to its previous state fn restore_partial(&mut self, state: Self::PartialState); /// Report whether the [`Stream`] is can ever be incomplete fn is_partial_supported() -> bool; /// Report whether the [`Stream`] is currently incomplete #[inline(always)] fn is_partial(&self) -> bool { Self::is_partial_supported() } } impl<'a, T> StreamIsPartial for &'a [T] { type PartialState = (); fn complete(&mut self) -> Self::PartialState {} fn restore_partial(&mut self, _state: Self::PartialState) {} #[inline(always)] fn is_partial_supported() -> bool { false } } impl<'a> StreamIsPartial for &'a str { type PartialState = (); fn complete(&mut self) -> Self::PartialState { // Already complete } fn restore_partial(&mut self, _state: Self::PartialState) {} #[inline(always)] fn is_partial_supported() -> bool { false } } impl<'a> StreamIsPartial for &'a Bytes { type PartialState = (); fn complete(&mut self) -> Self::PartialState { // Already complete } fn restore_partial(&mut self, _state: Self::PartialState) {} #[inline(always)] fn is_partial_supported() -> bool { false } } impl<'a> StreamIsPartial for &'a BStr { type PartialState = (); fn complete(&mut self) -> Self::PartialState { // Already complete } fn restore_partial(&mut self, _state: Self::PartialState) {} #[inline(always)] fn is_partial_supported() -> bool { false } } impl StreamIsPartial for (I, usize) where I: StreamIsPartial, { type PartialState = I::PartialState; fn complete(&mut self) -> Self::PartialState { self.0.complete() } fn restore_partial(&mut self, state: Self::PartialState) { self.0.restore_partial(state); } #[inline(always)] fn is_partial_supported() -> bool { I::is_partial_supported() } #[inline(always)] fn is_partial(&self) -> bool { self.0.is_partial() } } impl StreamIsPartial for Located where I: StreamIsPartial, { type PartialState = I::PartialState; fn complete(&mut self) -> Self::PartialState { self.input.complete() } fn restore_partial(&mut self, state: Self::PartialState) { self.input.restore_partial(state); } #[inline(always)] fn is_partial_supported() -> bool { I::is_partial_supported() } #[inline(always)] fn is_partial(&self) -> bool { self.input.is_partial() } } impl StreamIsPartial for Stateful where I: StreamIsPartial, { type PartialState = I::PartialState; fn complete(&mut self) -> Self::PartialState { self.input.complete() } fn restore_partial(&mut self, state: Self::PartialState) { self.input.restore_partial(state); } #[inline(always)] fn is_partial_supported() -> bool { I::is_partial_supported() } #[inline(always)] fn is_partial(&self) -> bool { self.input.is_partial() } } impl StreamIsPartial for Partial where I: StreamIsPartial, { type PartialState = bool; fn complete(&mut self) -> Self::PartialState { core::mem::replace(&mut self.partial, false) } fn restore_partial(&mut self, state: Self::PartialState) { self.partial = state; } #[inline(always)] fn is_partial_supported() -> bool { true } #[inline(always)] fn is_partial(&self) -> bool { self.partial } } /// Useful functions to calculate the offset between slices and show a hexdump of a slice pub trait Offset { /// Offset between the first byte of `start` and the first byte of `self` fn offset_from(&self, start: &Start) -> usize; } impl<'a, T> Offset for &'a [T] { #[inline] fn offset_from(&self, start: &Self) -> usize { let fst = (*start).as_ptr(); let snd = (*self).as_ptr(); debug_assert!( fst <= snd, "`Offset::offset_to` only accepts slices of `self`" ); snd as usize - fst as usize } } impl<'a, T> Offset<<&'a [T] as Stream>::Checkpoint> for &'a [T] where T: Clone + crate::lib::std::fmt::Debug, { #[inline(always)] fn offset_from(&self, other: &<&'a [T] as Stream>::Checkpoint) -> usize { self.checkpoint().offset_from(other) } } impl<'a> Offset for &'a str { #[inline(always)] fn offset_from(&self, start: &Self) -> usize { self.as_bytes().offset_from(&start.as_bytes()) } } impl<'a> Offset<<&'a str as Stream>::Checkpoint> for &'a str { #[inline(always)] fn offset_from(&self, other: &<&'a str as Stream>::Checkpoint) -> usize { self.checkpoint().offset_from(other) } } impl<'a> Offset for &'a Bytes { #[inline(always)] fn offset_from(&self, start: &Self) -> usize { self.as_bytes().offset_from(&start.as_bytes()) } } impl<'a> Offset<<&'a Bytes as Stream>::Checkpoint> for &'a Bytes { #[inline(always)] fn offset_from(&self, other: &<&'a Bytes as Stream>::Checkpoint) -> usize { self.checkpoint().offset_from(other) } } impl<'a> Offset for &'a BStr { #[inline(always)] fn offset_from(&self, start: &Self) -> usize { self.as_bytes().offset_from(&start.as_bytes()) } } impl<'a> Offset<<&'a BStr as Stream>::Checkpoint> for &'a BStr { #[inline(always)] fn offset_from(&self, other: &<&'a BStr as Stream>::Checkpoint) -> usize { self.checkpoint().offset_from(other) } } impl Offset for (I, usize) where I: Offset, { #[inline(always)] fn offset_from(&self, start: &Self) -> usize { self.0.offset_from(&start.0) * 8 + self.1 - start.1 } } impl Offset<<(I, usize) as Stream>::Checkpoint> for (I, usize) where I: Stream + Clone, { #[inline(always)] fn offset_from(&self, other: &<(I, usize) as Stream>::Checkpoint) -> usize { self.checkpoint().offset_from(other) } } impl Offset for Located where I: Stream, { #[inline(always)] fn offset_from(&self, other: &Self) -> usize { self.offset_from(&other.checkpoint()) } } impl Offset< as Stream>::Checkpoint> for Located where I: Stream, { #[inline(always)] fn offset_from(&self, other: & as Stream>::Checkpoint) -> usize { self.checkpoint().offset_from(other) } } impl Offset for Stateful where I: Stream, S: Clone + crate::lib::std::fmt::Debug, { #[inline(always)] fn offset_from(&self, start: &Self) -> usize { self.offset_from(&start.checkpoint()) } } impl Offset< as Stream>::Checkpoint> for Stateful where I: Stream, S: Clone + crate::lib::std::fmt::Debug, { #[inline(always)] fn offset_from(&self, other: & as Stream>::Checkpoint) -> usize { self.checkpoint().offset_from(other) } } impl Offset for Partial where I: Stream, { #[inline(always)] fn offset_from(&self, start: &Self) -> usize { self.offset_from(&start.checkpoint()) } } impl Offset< as Stream>::Checkpoint> for Partial where I: Stream, { #[inline(always)] fn offset_from(&self, other: & as Stream>::Checkpoint) -> usize { self.checkpoint().offset_from(other) } } impl Offset for Checkpoint where I: Offset, { #[inline(always)] fn offset_from(&self, start: &Self) -> usize { self.0.offset_from(&start.0) } } /// Helper trait for types that can be viewed as a byte slice pub trait AsBytes { /// Casts the input type to a byte slice fn as_bytes(&self) -> &[u8]; } impl<'a> AsBytes for &'a [u8] { #[inline(always)] fn as_bytes(&self) -> &[u8] { self } } impl<'a> AsBytes for &'a Bytes { #[inline(always)] fn as_bytes(&self) -> &[u8] { (*self).as_bytes() } } impl AsBytes for Located where I: AsBytes, { #[inline(always)] fn as_bytes(&self) -> &[u8] { self.input.as_bytes() } } impl AsBytes for Stateful where I: AsBytes, { #[inline(always)] fn as_bytes(&self) -> &[u8] { self.input.as_bytes() } } impl AsBytes for Partial where I: AsBytes, { #[inline(always)] fn as_bytes(&self) -> &[u8] { self.input.as_bytes() } } /// Helper trait for types that can be viewed as a byte slice pub trait AsBStr { /// Casts the input type to a byte slice fn as_bstr(&self) -> &[u8]; } impl<'a> AsBStr for &'a [u8] { #[inline(always)] fn as_bstr(&self) -> &[u8] { self } } impl<'a> AsBStr for &'a BStr { #[inline(always)] fn as_bstr(&self) -> &[u8] { (*self).as_bytes() } } impl<'a> AsBStr for &'a str { #[inline(always)] fn as_bstr(&self) -> &[u8] { (*self).as_bytes() } } impl AsBStr for Located where I: AsBStr, { #[inline(always)] fn as_bstr(&self) -> &[u8] { self.input.as_bstr() } } impl AsBStr for Stateful where I: AsBStr, { #[inline(always)] fn as_bstr(&self) -> &[u8] { self.input.as_bstr() } } impl AsBStr for Partial where I: AsBStr, { #[inline(always)] fn as_bstr(&self) -> &[u8] { self.input.as_bstr() } } /// Result of [`Compare::compare`] #[derive(Debug, Eq, PartialEq)] pub enum CompareResult { /// Comparison was successful Ok, /// We need more data to be sure Incomplete, /// Comparison failed Error, } /// Abstracts comparison operations pub trait Compare { /// Compares self to another value for equality fn compare(&self, t: T) -> CompareResult; /// Compares self to another value for equality /// independently of the case. /// /// Warning: for `&str`, the comparison is done /// by lowercasing both strings and comparing /// the result. This is a temporary solution until /// a better one appears fn compare_no_case(&self, t: T) -> CompareResult; } fn lowercase_byte(c: u8) -> u8 { match c { b'A'..=b'Z' => c - b'A' + b'a', _ => c, } } impl<'a, 'b> Compare<&'b [u8]> for &'a [u8] { #[inline] fn compare(&self, t: &'b [u8]) -> CompareResult { let pos = self.iter().zip(t.iter()).position(|(a, b)| a != b); match pos { Some(_) => CompareResult::Error, None => { if self.len() >= t.len() { CompareResult::Ok } else { CompareResult::Incomplete } } } } #[inline] fn compare_no_case(&self, t: &'b [u8]) -> CompareResult { if self .iter() .zip(t) .any(|(a, b)| lowercase_byte(*a) != lowercase_byte(*b)) { CompareResult::Error } else if self.len() < t.len() { CompareResult::Incomplete } else { CompareResult::Ok } } } impl<'a, const LEN: usize> Compare<[u8; LEN]> for &'a [u8] { #[inline(always)] fn compare(&self, t: [u8; LEN]) -> CompareResult { self.compare(&t[..]) } #[inline(always)] fn compare_no_case(&self, t: [u8; LEN]) -> CompareResult { self.compare_no_case(&t[..]) } } impl<'a, 'b, const LEN: usize> Compare<&'b [u8; LEN]> for &'a [u8] { #[inline(always)] fn compare(&self, t: &'b [u8; LEN]) -> CompareResult { self.compare(&t[..]) } #[inline(always)] fn compare_no_case(&self, t: &'b [u8; LEN]) -> CompareResult { self.compare_no_case(&t[..]) } } impl<'a, 'b> Compare<&'b str> for &'a [u8] { #[inline(always)] fn compare(&self, t: &'b str) -> CompareResult { self.compare(t.as_bytes()) } #[inline(always)] fn compare_no_case(&self, t: &'b str) -> CompareResult { self.compare_no_case(t.as_bytes()) } } impl<'a, 'b> Compare<&'b str> for &'a str { #[inline(always)] fn compare(&self, t: &'b str) -> CompareResult { self.as_bytes().compare(t.as_bytes()) } //FIXME: this version is too simple and does not use the current locale #[inline] fn compare_no_case(&self, t: &'b str) -> CompareResult { let pos = self .chars() .zip(t.chars()) .position(|(a, b)| a.to_lowercase().ne(b.to_lowercase())); match pos { Some(_) => CompareResult::Error, None => { if self.len() >= t.len() { CompareResult::Ok } else { CompareResult::Incomplete } } } } } impl<'a, 'b> Compare<&'b [u8]> for &'a str { #[inline(always)] fn compare(&self, t: &'b [u8]) -> CompareResult { AsBStr::as_bstr(self).compare(t) } #[inline(always)] fn compare_no_case(&self, t: &'b [u8]) -> CompareResult { AsBStr::as_bstr(self).compare_no_case(t) } } impl<'a, T> Compare for &'a Bytes where &'a [u8]: Compare, { #[inline(always)] fn compare(&self, t: T) -> CompareResult { let bytes = (*self).as_bytes(); bytes.compare(t) } #[inline(always)] fn compare_no_case(&self, t: T) -> CompareResult { let bytes = (*self).as_bytes(); bytes.compare_no_case(t) } } impl<'a, T> Compare for &'a BStr where &'a [u8]: Compare, { #[inline(always)] fn compare(&self, t: T) -> CompareResult { let bytes = (*self).as_bytes(); bytes.compare(t) } #[inline(always)] fn compare_no_case(&self, t: T) -> CompareResult { let bytes = (*self).as_bytes(); bytes.compare_no_case(t) } } impl Compare for Located where I: Compare, { #[inline(always)] fn compare(&self, other: U) -> CompareResult { self.input.compare(other) } #[inline(always)] fn compare_no_case(&self, other: U) -> CompareResult { self.input.compare_no_case(other) } } impl Compare for Stateful where I: Compare, { #[inline(always)] fn compare(&self, other: U) -> CompareResult { self.input.compare(other) } #[inline(always)] fn compare_no_case(&self, other: U) -> CompareResult { self.input.compare_no_case(other) } } impl Compare for Partial where I: Compare, { #[inline(always)] fn compare(&self, t: T) -> CompareResult { self.input.compare(t) } #[inline(always)] fn compare_no_case(&self, t: T) -> CompareResult { self.input.compare_no_case(t) } } /// Look for a slice in self pub trait FindSlice { /// Returns the offset of the slice if it is found fn find_slice(&self, substr: T) -> Option; } impl<'i, 's> FindSlice<&'s [u8]> for &'i [u8] { #[inline(always)] fn find_slice(&self, substr: &'s [u8]) -> Option { memmem(self, substr) } } impl<'i> FindSlice for &'i [u8] { #[inline(always)] fn find_slice(&self, substr: u8) -> Option { memchr(substr, self) } } impl<'i, 's> FindSlice<&'s str> for &'i [u8] { #[inline(always)] fn find_slice(&self, substr: &'s str) -> Option { self.find_slice(substr.as_bytes()) } } impl<'i, 's> FindSlice<&'s str> for &'i str { #[inline(always)] fn find_slice(&self, substr: &'s str) -> Option { self.find(substr) } } impl<'i> FindSlice for &'i str { #[inline(always)] fn find_slice(&self, substr: char) -> Option { self.find(substr) } } impl<'i, S> FindSlice for &'i Bytes where &'i [u8]: FindSlice, { #[inline(always)] fn find_slice(&self, substr: S) -> Option { let bytes = (*self).as_bytes(); let offset = bytes.find_slice(substr); offset } } impl<'i, S> FindSlice for &'i BStr where &'i [u8]: FindSlice, { #[inline(always)] fn find_slice(&self, substr: S) -> Option { let bytes = (*self).as_bytes(); let offset = bytes.find_slice(substr); offset } } impl FindSlice for Located where I: FindSlice, { #[inline(always)] fn find_slice(&self, substr: T) -> Option { self.input.find_slice(substr) } } impl FindSlice for Stateful where I: FindSlice, { #[inline(always)] fn find_slice(&self, substr: T) -> Option { self.input.find_slice(substr) } } impl FindSlice for Partial where I: FindSlice, { #[inline(always)] fn find_slice(&self, substr: T) -> Option { self.input.find_slice(substr) } } /// Used to integrate `str`'s `parse()` method pub trait ParseSlice { /// Succeeds if `parse()` succeededThe /// /// The byte slice implementation will first convert it to a `&str`, then apply the `parse()` /// function fn parse_slice(&self) -> Option; } impl<'a, R: FromStr> ParseSlice for &'a [u8] { #[inline(always)] fn parse_slice(&self) -> Option { from_utf8(self).ok().and_then(|s| s.parse().ok()) } } impl<'a, R: FromStr> ParseSlice for &'a str { #[inline(always)] fn parse_slice(&self) -> Option { self.parse().ok() } } /// Convert a `Stream` into an appropriate `Output` type pub trait UpdateSlice: Stream { /// Convert an `Output` type to be used as `Stream` fn update_slice(self, inner: Self::Slice) -> Self; } impl<'a, T> UpdateSlice for &'a [T] where T: Clone + crate::lib::std::fmt::Debug, { #[inline(always)] fn update_slice(self, inner: Self::Slice) -> Self { inner } } impl<'a> UpdateSlice for &'a str { #[inline(always)] fn update_slice(self, inner: Self::Slice) -> Self { inner } } impl<'a> UpdateSlice for &'a Bytes { #[inline(always)] fn update_slice(self, inner: Self::Slice) -> Self { Bytes::new(inner) } } impl<'a> UpdateSlice for &'a BStr { #[inline(always)] fn update_slice(self, inner: Self::Slice) -> Self { BStr::new(inner) } } impl UpdateSlice for Located where I: UpdateSlice, { #[inline(always)] fn update_slice(mut self, inner: Self::Slice) -> Self { self.input = I::update_slice(self.input, inner); self } } impl UpdateSlice for Stateful where I: UpdateSlice, S: Clone + crate::lib::std::fmt::Debug, { #[inline(always)] fn update_slice(mut self, inner: Self::Slice) -> Self { self.input = I::update_slice(self.input, inner); self } } impl UpdateSlice for Partial where I: UpdateSlice, { #[inline(always)] fn update_slice(self, inner: Self::Slice) -> Self { Partial { input: I::update_slice(self.input, inner), partial: self.partial, } } } /// Ensure checkpoint details are kept privazte #[derive(Copy, Clone, Debug)] pub struct Checkpoint(T); /// A range bounded inclusively for counting parses performed #[derive(PartialEq, Eq)] pub struct Range { pub(crate) start_inclusive: usize, pub(crate) end_inclusive: Option, } impl Range { #[inline(always)] fn raw(start_inclusive: usize, end_inclusive: Option) -> Self { Self { start_inclusive, end_inclusive, } } } impl crate::lib::std::ops::RangeBounds for Range { #[inline(always)] fn start_bound(&self) -> crate::lib::std::ops::Bound<&usize> { crate::lib::std::ops::Bound::Included(&self.start_inclusive) } #[inline(always)] fn end_bound(&self) -> crate::lib::std::ops::Bound<&usize> { if let Some(end_inclusive) = &self.end_inclusive { crate::lib::std::ops::Bound::Included(end_inclusive) } else { crate::lib::std::ops::Bound::Unbounded } } } impl From for Range { #[inline(always)] fn from(fixed: usize) -> Self { (fixed..=fixed).into() } } impl From> for Range { #[inline(always)] fn from(range: crate::lib::std::ops::Range) -> Self { let start_inclusive = range.start; let end_inclusive = Some(range.end.saturating_sub(1)); Self::raw(start_inclusive, end_inclusive) } } impl From for Range { #[inline(always)] fn from(_: crate::lib::std::ops::RangeFull) -> Self { let start_inclusive = 0; let end_inclusive = None; Self::raw(start_inclusive, end_inclusive) } } impl From> for Range { #[inline(always)] fn from(range: crate::lib::std::ops::RangeFrom) -> Self { let start_inclusive = range.start; let end_inclusive = None; Self::raw(start_inclusive, end_inclusive) } } impl From> for Range { #[inline(always)] fn from(range: crate::lib::std::ops::RangeTo) -> Self { let start_inclusive = 0; let end_inclusive = Some(range.end.saturating_sub(1)); Self::raw(start_inclusive, end_inclusive) } } impl From> for Range { #[inline(always)] fn from(range: crate::lib::std::ops::RangeInclusive) -> Self { let start_inclusive = *range.start(); let end_inclusive = Some(*range.end()); Self::raw(start_inclusive, end_inclusive) } } impl From> for Range { #[inline(always)] fn from(range: crate::lib::std::ops::RangeToInclusive) -> Self { let start_inclusive = 0; let end_inclusive = Some(range.end); Self::raw(start_inclusive, end_inclusive) } } impl crate::lib::std::fmt::Display for Range { fn fmt(&self, f: &mut crate::lib::std::fmt::Formatter<'_>) -> crate::lib::std::fmt::Result { self.start_inclusive.fmt(f)?; match self.end_inclusive { Some(e) if e == self.start_inclusive => {} Some(e) => { "..=".fmt(f)?; e.fmt(f)?; } None => { "..".fmt(f)?; } } Ok(()) } } impl crate::lib::std::fmt::Debug for Range { fn fmt(&self, f: &mut crate::lib::std::fmt::Formatter<'_>) -> crate::lib::std::fmt::Result { write!(f, "{self}") } } /// Abstracts something which can extend an `Extend`. /// Used to build modified input slices in `escaped_transform` pub trait Accumulate: Sized { /// Create a new `Extend` of the correct type fn initial(capacity: Option) -> Self; /// Accumulate the input into an accumulator fn accumulate(&mut self, acc: T); } impl Accumulate for () { #[inline(always)] fn initial(_capacity: Option) -> Self {} #[inline(always)] fn accumulate(&mut self, _acc: T) {} } impl Accumulate for usize { #[inline(always)] fn initial(_capacity: Option) -> Self { 0 } #[inline(always)] fn accumulate(&mut self, _acc: T) { *self += 1; } } #[cfg(feature = "alloc")] impl Accumulate for Vec { #[inline(always)] fn initial(capacity: Option) -> Self { match capacity { Some(capacity) => Vec::with_capacity(clamp_capacity::(capacity)), None => Vec::new(), } } #[inline(always)] fn accumulate(&mut self, acc: T) { self.push(acc); } } #[cfg(feature = "alloc")] impl<'i, T: Clone> Accumulate<&'i [T]> for Vec { #[inline(always)] fn initial(capacity: Option) -> Self { match capacity { Some(capacity) => Vec::with_capacity(clamp_capacity::(capacity)), None => Vec::new(), } } #[inline(always)] fn accumulate(&mut self, acc: &'i [T]) { self.extend(acc.iter().cloned()); } } #[cfg(feature = "alloc")] impl Accumulate for String { #[inline(always)] fn initial(capacity: Option) -> Self { match capacity { Some(capacity) => String::with_capacity(clamp_capacity::(capacity)), None => String::new(), } } #[inline(always)] fn accumulate(&mut self, acc: char) { self.push(acc); } } #[cfg(feature = "alloc")] impl<'i> Accumulate<&'i str> for String { #[inline(always)] fn initial(capacity: Option) -> Self { match capacity { Some(capacity) => String::with_capacity(clamp_capacity::(capacity)), None => String::new(), } } #[inline(always)] fn accumulate(&mut self, acc: &'i str) { self.push_str(acc); } } #[cfg(feature = "alloc")] impl Accumulate<(K, V)> for BTreeMap where K: crate::lib::std::cmp::Ord, { #[inline(always)] fn initial(_capacity: Option) -> Self { BTreeMap::new() } #[inline(always)] fn accumulate(&mut self, (key, value): (K, V)) { self.insert(key, value); } } #[cfg(feature = "std")] impl Accumulate<(K, V)> for HashMap where K: crate::lib::std::cmp::Eq + crate::lib::std::hash::Hash, { #[inline(always)] fn initial(capacity: Option) -> Self { match capacity { Some(capacity) => HashMap::with_capacity(clamp_capacity::<(K, V)>(capacity)), None => HashMap::new(), } } #[inline(always)] fn accumulate(&mut self, (key, value): (K, V)) { self.insert(key, value); } } #[cfg(feature = "alloc")] #[inline] pub(crate) fn clamp_capacity(capacity: usize) -> usize { /// Don't pre-allocate more than 64KiB when calling `Vec::with_capacity`. /// /// Pre-allocating memory is a nice optimization but count fields can't /// always be trusted. We should clamp initial capacities to some reasonable /// amount. This reduces the risk of a bogus count value triggering a panic /// due to an OOM error. /// /// This does not affect correctness. `winnow` will always read the full number /// of elements regardless of the capacity cap. const MAX_INITIAL_CAPACITY_BYTES: usize = 65536; let max_initial_capacity = MAX_INITIAL_CAPACITY_BYTES / crate::lib::std::mem::size_of::().max(1); capacity.min(max_initial_capacity) } /// Helper trait to convert numbers to usize. /// /// By default, usize implements `From` and `From` but not /// `From` and `From` because that would be invalid on some /// platforms. This trait implements the conversion for platforms /// with 32 and 64 bits pointer platforms pub trait ToUsize { /// converts self to usize fn to_usize(&self) -> usize; } impl ToUsize for u8 { #[inline(always)] fn to_usize(&self) -> usize { *self as usize } } impl ToUsize for u16 { #[inline(always)] fn to_usize(&self) -> usize { *self as usize } } impl ToUsize for usize { #[inline(always)] fn to_usize(&self) -> usize { *self } } #[cfg(any(target_pointer_width = "32", target_pointer_width = "64"))] impl ToUsize for u32 { #[inline(always)] fn to_usize(&self) -> usize { *self as usize } } #[cfg(target_pointer_width = "64")] impl ToUsize for u64 { #[inline(always)] fn to_usize(&self) -> usize { *self as usize } } /// Transforms a token into a char for basic string parsing #[allow(clippy::len_without_is_empty)] #[allow(clippy::wrong_self_convention)] pub trait AsChar { /// Makes a char from self /// /// # Example /// /// ``` /// use winnow::stream::AsChar as _; /// /// assert_eq!('a'.as_char(), 'a'); /// assert_eq!(u8::MAX.as_char(), std::char::from_u32(u8::MAX as u32).unwrap()); /// ``` fn as_char(self) -> char; /// Tests that self is an alphabetic character /// /// **Warning:** for `&str` it recognizes alphabetic /// characters outside of the 52 ASCII letters fn is_alpha(self) -> bool; /// Tests that self is an alphabetic character /// or a decimal digit fn is_alphanum(self) -> bool; /// Tests that self is a decimal digit fn is_dec_digit(self) -> bool; /// Tests that self is an hex digit fn is_hex_digit(self) -> bool; /// Tests that self is an octal digit fn is_oct_digit(self) -> bool; /// Gets the len in bytes for self fn len(self) -> usize; /// Tests that self is ASCII space or tab fn is_space(self) -> bool; /// Tests if byte is ASCII newline: \n fn is_newline(self) -> bool; } impl AsChar for u8 { #[inline(always)] fn as_char(self) -> char { self as char } #[inline] fn is_alpha(self) -> bool { matches!(self, 0x41..=0x5A | 0x61..=0x7A) } #[inline] fn is_alphanum(self) -> bool { self.is_alpha() || self.is_dec_digit() } #[inline] fn is_dec_digit(self) -> bool { matches!(self, 0x30..=0x39) } #[inline] fn is_hex_digit(self) -> bool { matches!(self, 0x30..=0x39 | 0x41..=0x46 | 0x61..=0x66) } #[inline] fn is_oct_digit(self) -> bool { matches!(self, 0x30..=0x37) } #[inline] fn len(self) -> usize { 1 } #[inline] fn is_space(self) -> bool { self == b' ' || self == b'\t' } #[inline] fn is_newline(self) -> bool { self == b'\n' } } impl<'a> AsChar for &'a u8 { #[inline(always)] fn as_char(self) -> char { *self as char } #[inline] fn is_alpha(self) -> bool { matches!(*self, 0x41..=0x5A | 0x61..=0x7A) } #[inline] fn is_alphanum(self) -> bool { self.is_alpha() || self.is_dec_digit() } #[inline] fn is_dec_digit(self) -> bool { matches!(*self, 0x30..=0x39) } #[inline] fn is_hex_digit(self) -> bool { matches!(*self, 0x30..=0x39 | 0x41..=0x46 | 0x61..=0x66) } #[inline] fn is_oct_digit(self) -> bool { matches!(*self, 0x30..=0x37) } #[inline] fn len(self) -> usize { 1 } #[inline] fn is_space(self) -> bool { *self == b' ' || *self == b'\t' } #[inline] fn is_newline(self) -> bool { *self == b'\n' } } impl AsChar for char { #[inline(always)] fn as_char(self) -> char { self } #[inline] fn is_alpha(self) -> bool { self.is_ascii_alphabetic() } #[inline] fn is_alphanum(self) -> bool { self.is_alpha() || self.is_dec_digit() } #[inline] fn is_dec_digit(self) -> bool { self.is_ascii_digit() } #[inline] fn is_hex_digit(self) -> bool { self.is_ascii_hexdigit() } #[inline] fn is_oct_digit(self) -> bool { self.is_digit(8) } #[inline] fn len(self) -> usize { self.len_utf8() } #[inline] fn is_space(self) -> bool { self == ' ' || self == '\t' } #[inline] fn is_newline(self) -> bool { self == '\n' } } impl<'a> AsChar for &'a char { #[inline(always)] fn as_char(self) -> char { *self } #[inline] fn is_alpha(self) -> bool { self.is_ascii_alphabetic() } #[inline] fn is_alphanum(self) -> bool { self.is_alpha() || self.is_dec_digit() } #[inline] fn is_dec_digit(self) -> bool { self.is_ascii_digit() } #[inline] fn is_hex_digit(self) -> bool { self.is_ascii_hexdigit() } #[inline] fn is_oct_digit(self) -> bool { self.is_digit(8) } #[inline] fn len(self) -> usize { self.len_utf8() } #[inline] fn is_space(self) -> bool { *self == ' ' || *self == '\t' } #[inline] fn is_newline(self) -> bool { *self == '\n' } } /// Check if a token in in a set of possible tokens /// /// This is generally implemented on patterns that a token may match and supports `u8` and `char` /// tokens along with the following patterns /// - `b'c'` and `'c'` /// - `b""` and `""` /// - `|c| true` /// - `b'a'..=b'z'`, `'a'..='z'` (etc for each [range type][std::ops]) /// - `(pattern1, pattern2, ...)` /// /// # Example /// /// For example, you could implement `hex_digit0` as: /// ``` /// # use winnow::prelude::*; /// # use winnow::{error::ErrMode, error::ErrorKind, error::InputError}; /// # use winnow::token::take_while; /// fn hex_digit1<'s>(input: &mut &'s str) -> PResult<&'s str, InputError<&'s str>> { /// take_while(1.., ('a'..='f', 'A'..='F', '0'..='9')).parse_next(input) /// } /// /// assert_eq!(hex_digit1.parse_peek("21cZ"), Ok(("Z", "21c"))); /// assert_eq!(hex_digit1.parse_peek("H2"), Err(ErrMode::Backtrack(InputError::new("H2", ErrorKind::Slice)))); /// assert_eq!(hex_digit1.parse_peek(""), Err(ErrMode::Backtrack(InputError::new("", ErrorKind::Slice)))); /// ``` pub trait ContainsToken { /// Returns true if self contains the token fn contains_token(&self, token: T) -> bool; } impl ContainsToken for u8 { #[inline(always)] fn contains_token(&self, token: u8) -> bool { *self == token } } impl<'a> ContainsToken<&'a u8> for u8 { #[inline(always)] fn contains_token(&self, token: &u8) -> bool { self.contains_token(*token) } } impl ContainsToken for u8 { #[inline(always)] fn contains_token(&self, token: char) -> bool { self.as_char() == token } } impl<'a> ContainsToken<&'a char> for u8 { #[inline(always)] fn contains_token(&self, token: &char) -> bool { self.contains_token(*token) } } impl ContainsToken for char { #[inline(always)] fn contains_token(&self, token: C) -> bool { *self == token.as_char() } } impl bool> ContainsToken for F { #[inline(always)] fn contains_token(&self, token: C) -> bool { self(token) } } impl ContainsToken for crate::lib::std::ops::Range { #[inline(always)] fn contains_token(&self, token: C1) -> bool { let start = self.start.clone().as_char(); let end = self.end.clone().as_char(); (start..end).contains(&token.as_char()) } } impl ContainsToken for crate::lib::std::ops::RangeInclusive { #[inline(always)] fn contains_token(&self, token: C1) -> bool { let start = self.start().clone().as_char(); let end = self.end().clone().as_char(); (start..=end).contains(&token.as_char()) } } impl ContainsToken for crate::lib::std::ops::RangeFrom { #[inline(always)] fn contains_token(&self, token: C1) -> bool { let start = self.start.clone().as_char(); (start..).contains(&token.as_char()) } } impl ContainsToken for crate::lib::std::ops::RangeTo { #[inline(always)] fn contains_token(&self, token: C1) -> bool { let end = self.end.clone().as_char(); (..end).contains(&token.as_char()) } } impl ContainsToken for crate::lib::std::ops::RangeToInclusive { #[inline(always)] fn contains_token(&self, token: C1) -> bool { let end = self.end.clone().as_char(); (..=end).contains(&token.as_char()) } } impl ContainsToken for crate::lib::std::ops::RangeFull { #[inline(always)] fn contains_token(&self, _token: C1) -> bool { true } } impl ContainsToken for &'_ [u8] { #[inline] fn contains_token(&self, token: C) -> bool { let token = token.as_char(); self.iter().any(|t| t.as_char() == token) } } impl ContainsToken for &'_ [char] { #[inline] fn contains_token(&self, token: C) -> bool { let token = token.as_char(); self.iter().any(|t| *t == token) } } impl ContainsToken for &'_ [u8; LEN] { #[inline] fn contains_token(&self, token: C) -> bool { let token = token.as_char(); self.iter().any(|t| t.as_char() == token) } } impl ContainsToken for &'_ [char; LEN] { #[inline] fn contains_token(&self, token: C) -> bool { let token = token.as_char(); self.iter().any(|t| *t == token) } } impl ContainsToken for [u8; LEN] { #[inline] fn contains_token(&self, token: C) -> bool { let token = token.as_char(); self.iter().any(|t| t.as_char() == token) } } impl ContainsToken for [char; LEN] { #[inline] fn contains_token(&self, token: C) -> bool { let token = token.as_char(); self.iter().any(|t| *t == token) } } impl ContainsToken for () { #[inline(always)] fn contains_token(&self, _token: T) -> bool { false } } macro_rules! impl_contains_token_for_tuple { ($($haystack:ident),+) => ( #[allow(non_snake_case)] impl ContainsToken for ($($haystack),+,) where T: Clone, $($haystack: ContainsToken),+ { #[inline] fn contains_token(&self, token: T) -> bool { let ($(ref $haystack),+,) = *self; $($haystack.contains_token(token.clone()) || )+ false } } ) } macro_rules! impl_contains_token_for_tuples { ($haystack1:ident, $($haystack:ident),+) => { impl_contains_token_for_tuples!(__impl $haystack1; $($haystack),+); }; (__impl $($haystack:ident),+; $haystack1:ident $(,$haystack2:ident)*) => { impl_contains_token_for_tuple!($($haystack),+); impl_contains_token_for_tuples!(__impl $($haystack),+, $haystack1; $($haystack2),*); }; (__impl $($haystack:ident),+;) => { impl_contains_token_for_tuple!($($haystack),+); } } impl_contains_token_for_tuples!( F1, F2, F3, F4, F5, F6, F7, F8, F9, F10, F11, F12, F13, F14, F15, F16, F17, F18, F19, F20, F21 ); #[cfg(feature = "simd")] #[inline(always)] fn memchr(token: u8, slice: &[u8]) -> Option { memchr::memchr(token, slice) } #[cfg(not(feature = "simd"))] #[inline(always)] fn memchr(token: u8, slice: &[u8]) -> Option { slice.iter().position(|t| *t == token) } #[cfg(feature = "simd")] #[inline(always)] fn memmem(slice: &[u8], tag: &[u8]) -> Option { if tag.len() > slice.len() { return None; } let (&substr_first, substr_rest) = match tag.split_first() { Some(split) => split, // an empty substring is found at position 0 // This matches the behavior of str.find(""). None => return Some(0), }; if substr_rest.is_empty() { return memchr::memchr(substr_first, slice); } let mut offset = 0; let haystack = &slice[..slice.len() - substr_rest.len()]; while let Some(position) = memchr::memchr(substr_first, &haystack[offset..]) { offset += position; let next_offset = offset + 1; if &slice[next_offset..][..substr_rest.len()] == substr_rest { return Some(offset); } offset = next_offset; } None } #[cfg(not(feature = "simd"))] fn memmem(slice: &[u8], tag: &[u8]) -> Option { for i in 0..slice.len() { let subslice = &slice[i..]; if subslice.starts_with(tag) { return Some(i); } } None }