diff options
author | Daniel Baumann <daniel.baumann@progress-linux.org> | 2024-05-30 03:57:31 +0000 |
---|---|---|
committer | Daniel Baumann <daniel.baumann@progress-linux.org> | 2024-05-30 03:57:31 +0000 |
commit | dc0db358abe19481e475e10c32149b53370f1a1c (patch) | |
tree | ab8ce99c4b255ce46f99ef402c27916055b899ee /vendor/pest/src | |
parent | Releasing progress-linux version 1.71.1+dfsg1-2~progress7.99u1. (diff) | |
download | rustc-dc0db358abe19481e475e10c32149b53370f1a1c.tar.xz rustc-dc0db358abe19481e475e10c32149b53370f1a1c.zip |
Merging upstream version 1.72.1+dfsg1.
Signed-off-by: Daniel Baumann <daniel.baumann@progress-linux.org>
Diffstat (limited to 'vendor/pest/src')
-rw-r--r-- | vendor/pest/src/error.rs | 37 | ||||
-rw-r--r-- | vendor/pest/src/iterators/flat_pairs.rs | 40 | ||||
-rw-r--r-- | vendor/pest/src/iterators/pair.rs | 32 | ||||
-rw-r--r-- | vendor/pest/src/iterators/pairs.rs | 215 | ||||
-rw-r--r-- | vendor/pest/src/iterators/queueable_token.rs | 5 | ||||
-rw-r--r-- | vendor/pest/src/iterators/tokens.rs | 38 | ||||
-rw-r--r-- | vendor/pest/src/parser_state.rs | 45 | ||||
-rw-r--r-- | vendor/pest/src/pratt_parser.rs | 4 | ||||
-rw-r--r-- | vendor/pest/src/unicode/mod.rs | 1 |
9 files changed, 386 insertions, 31 deletions
diff --git a/vendor/pest/src/error.rs b/vendor/pest/src/error.rs index eef004233..df3f5448e 100644 --- a/vendor/pest/src/error.rs +++ b/vendor/pest/src/error.rs @@ -74,6 +74,19 @@ pub enum LineColLocation { Span((usize, usize), (usize, usize)), } +impl From<Position<'_>> for LineColLocation { + fn from(value: Position<'_>) -> Self { + Self::Pos(value.line_col()) + } +} + +impl From<Span<'_>> for LineColLocation { + fn from(value: Span<'_>) -> Self { + let (start, end) = value.split(); + Self::Span(start.line_col(), end.line_col()) + } +} + impl<R: RuleType> Error<R> { /// Creates `Error` from `ErrorVariant` and `Position`. /// @@ -418,7 +431,7 @@ impl<R: RuleType> Error<R> { .unwrap_or_default(); let pair = (self.line_col.clone(), &self.continued_line); - if let (LineColLocation::Span(_, end), &Some(ref continued_line)) = pair { + if let (LineColLocation::Span(_, end), Some(ref continued_line)) = pair { let has_line_gap = end.0 - self.start().0 > 1; if has_line_gap { format!( @@ -892,4 +905,26 @@ mod tests { .join("\n") ); } + + #[test] + fn pos_to_lcl_conversion() { + let input = "input"; + + let pos = Position::new(input, 2).unwrap(); + + assert_eq!(LineColLocation::Pos(pos.line_col()), pos.into()); + } + + #[test] + fn span_to_lcl_conversion() { + let input = "input"; + + let span = Span::new(input, 2, 4).unwrap(); + let (start, end) = span.split(); + + assert_eq!( + LineColLocation::Span(start.line_col(), end.line_col()), + span.into() + ); + } } diff --git a/vendor/pest/src/iterators/flat_pairs.rs b/vendor/pest/src/iterators/flat_pairs.rs index 52a207406..9b92f557a 100644 --- a/vendor/pest/src/iterators/flat_pairs.rs +++ b/vendor/pest/src/iterators/flat_pairs.rs @@ -25,7 +25,7 @@ pub struct FlatPairs<'i, R> { /// # Safety /// /// All `QueueableToken`s' `input_pos` must be valid character boundary indices into `input`. - queue: Rc<Vec<QueueableToken<R>>>, + queue: Rc<Vec<QueueableToken<'i, R>>>, input: &'i str, start: usize, end: usize, @@ -35,12 +35,12 @@ pub struct FlatPairs<'i, R> { /// # Safety /// /// All `QueueableToken`s' `input_pos` must be valid character boundary indices into `input`. -pub unsafe fn new<R: RuleType>( - queue: Rc<Vec<QueueableToken<R>>>, - input: &str, +pub unsafe fn new<'i, R: RuleType>( + queue: Rc<Vec<QueueableToken<'i, R>>>, + input: &'i str, start: usize, end: usize, -) -> FlatPairs<'_, R> { +) -> FlatPairs<'i, R> { FlatPairs { queue, input, @@ -102,6 +102,13 @@ impl<'i, R: RuleType> FlatPairs<'i, R> { } } +impl<'i, R: RuleType> ExactSizeIterator for FlatPairs<'i, R> { + fn len(&self) -> usize { + // Tokens len is exactly twice as flatten pairs len + (self.end - self.start) >> 1 + } +} + impl<'i, R: RuleType> Iterator for FlatPairs<'i, R> { type Item = Pair<'i, R>; @@ -122,6 +129,11 @@ impl<'i, R: RuleType> Iterator for FlatPairs<'i, R> { Some(pair) } + + fn size_hint(&self) -> (usize, Option<usize>) { + let len = <Self as ExactSizeIterator>::len(self); + (len, Some(len)) + } } impl<'i, R: RuleType> DoubleEndedIterator for FlatPairs<'i, R> { @@ -214,4 +226,22 @@ mod tests { assert_eq!(pair.line_col(), (1, 5)); assert_eq!(pair.line_col(), pair.as_span().start_pos().line_col()); } + + #[test] + fn exact_size_iter_for_pairs() { + let pairs = AbcParser::parse(Rule::a, "abc\nefgh").unwrap().flatten(); + assert_eq!(pairs.len(), pairs.count()); + + let pairs = AbcParser::parse(Rule::a, "我很漂亮efgh").unwrap().flatten(); + assert_eq!(pairs.len(), pairs.count()); + + let pairs = AbcParser::parse(Rule::a, "abc\nefgh").unwrap().flatten(); + let pairs = pairs.rev(); + assert_eq!(pairs.len(), pairs.count()); + + let mut pairs = AbcParser::parse(Rule::a, "abc\nefgh").unwrap().flatten(); + let pairs_len = pairs.len(); + let _ = pairs.next().unwrap(); + assert_eq!(pairs.count() + 1, pairs_len); + } } diff --git a/vendor/pest/src/iterators/pair.rs b/vendor/pest/src/iterators/pair.rs index 891b90595..f1b311a40 100644 --- a/vendor/pest/src/iterators/pair.rs +++ b/vendor/pest/src/iterators/pair.rs @@ -12,6 +12,7 @@ use alloc::rc::Rc; #[cfg(feature = "pretty-print")] use alloc::string::String; use alloc::vec::Vec; +use core::borrow::Borrow; use core::fmt; use core::hash::{Hash, Hasher}; use core::ptr; @@ -40,7 +41,7 @@ pub struct Pair<'i, R> { /// # Safety /// /// All `QueueableToken`s' `input_pos` must be valid character boundary indices into `input`. - queue: Rc<Vec<QueueableToken<R>>>, + queue: Rc<Vec<QueueableToken<'i, R>>>, input: &'i str, /// Token index into `queue`. start: usize, @@ -50,12 +51,12 @@ pub struct Pair<'i, R> { /// # Safety /// /// All `QueueableToken`s' `input_pos` must be valid character boundary indices into `input`. -pub unsafe fn new<R: RuleType>( - queue: Rc<Vec<QueueableToken<R>>>, - input: &str, +pub unsafe fn new<'i, R: RuleType>( + queue: Rc<Vec<QueueableToken<'i, R>>>, + input: &'i str, line_index: Rc<LineIndex>, start: usize, -) -> Pair<'_, R> { +) -> Pair<'i, R> { Pair { queue, input, @@ -181,6 +182,15 @@ impl<'i, R: RuleType> Pair<'i, R> { unsafe { span::Span::new_unchecked(self.input, start, end) } } + /// Get current node tag + #[inline] + pub fn as_node_tag(&self) -> Option<&str> { + match &self.queue[self.pair()] { + QueueableToken::End { tag, .. } => tag.as_ref().map(|x| x.borrow()), + _ => None, + } + } + /// Returns the inner `Pairs` between the `Pair`, consuming it. /// /// # Examples @@ -291,9 +301,13 @@ impl<'i, R: RuleType> Pairs<'i, R> { impl<'i, R: RuleType> fmt::Debug for Pair<'i, R> { fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { - f.debug_struct("Pair") - .field("rule", &self.as_rule()) - .field("span", &self.as_span()) + let pair = &mut f.debug_struct("Pair"); + pair.field("rule", &self.as_rule()); + // In order not to break compatibility + if let Some(s) = self.as_node_tag() { + pair.field("node_tag", &s); + } + pair.field("span", &self.as_span()) .field("inner", &self.clone().into_inner().collect::<Vec<_>>()) .finish() } @@ -336,7 +350,7 @@ impl<'i, R: Eq> Eq for Pair<'i, R> {} impl<'i, R: Hash> Hash for Pair<'i, R> { fn hash<H: Hasher>(&self, state: &mut H) { - (&*self.queue as *const Vec<QueueableToken<R>>).hash(state); + (&*self.queue as *const Vec<QueueableToken<'i, R>>).hash(state); (self.input as *const str).hash(state); self.start.hash(state); } diff --git a/vendor/pest/src/iterators/pairs.rs b/vendor/pest/src/iterators/pairs.rs index c21a7fae1..ab7df75ee 100644 --- a/vendor/pest/src/iterators/pairs.rs +++ b/vendor/pest/src/iterators/pairs.rs @@ -13,6 +13,7 @@ use alloc::string::String; use alloc::vec::Vec; use core::fmt; use core::hash::{Hash, Hasher}; +use core::iter::Filter; use core::ptr; use core::str; @@ -33,30 +34,44 @@ use crate::RuleType; /// [`Pair::into_inner`]: struct.Pair.html#method.into_inner #[derive(Clone)] pub struct Pairs<'i, R> { - queue: Rc<Vec<QueueableToken<R>>>, + queue: Rc<Vec<QueueableToken<'i, R>>>, input: &'i str, start: usize, end: usize, + pairs_count: usize, line_index: Rc<LineIndex>, } -pub fn new<R: RuleType>( - queue: Rc<Vec<QueueableToken<R>>>, - input: &str, +pub fn new<'i, R: RuleType>( + queue: Rc<Vec<QueueableToken<'i, R>>>, + input: &'i str, line_index: Option<Rc<LineIndex>>, start: usize, end: usize, -) -> Pairs<'_, R> { +) -> Pairs<'i, R> { let line_index = match line_index { Some(line_index) => line_index, None => Rc::new(LineIndex::new(input)), }; + let mut pairs_count = 0; + let mut cursor = start; + while cursor < end { + cursor = match queue[cursor] { + QueueableToken::Start { + end_token_index, .. + } => end_token_index, + _ => unreachable!(), + } + 1; + pairs_count += 1; + } + Pairs { queue, input, start, end, + pairs_count, line_index, } } @@ -159,6 +174,114 @@ impl<'i, R: RuleType> Pairs<'i, R> { unsafe { flat_pairs::new(self.queue, self.input, self.start, self.end) } } + /// Finds the first pair that has its node or branch tagged with the provided + /// label. + /// + /// # Examples + /// + /// Try to recognize the branch between add and mul + /// ``` + /// use pest::{state, ParseResult, ParserState}; + /// #[allow(non_camel_case_types)] + /// #[derive(Clone, Copy, Debug, Eq, Hash, Ord, PartialEq, PartialOrd)] + /// enum Rule { + /// number, // 0..9 + /// add, // num + num + /// mul, // num * num + /// } + /// fn mark_branch( + /// state: Box<ParserState<'_, Rule>>, + /// ) -> ParseResult<Box<ParserState<'_, Rule>>> { + /// expr(state, Rule::mul, "*") + /// .and_then(|state| state.tag_node(std::borrow::Cow::Borrowed("mul"))) + /// .or_else(|state| expr(state, Rule::add, "+")) + /// .and_then(|state| state.tag_node(std::borrow::Cow::Borrowed("add"))) + /// } + /// fn expr<'a>( + /// state: Box<ParserState<'a, Rule>>, + /// r: Rule, + /// o: &'static str, + /// ) -> ParseResult<Box<ParserState<'a, Rule>>> { + /// state.rule(r, |state| { + /// state.sequence(|state| { + /// number(state) + /// .and_then(|state| state.tag_node(std::borrow::Cow::Borrowed("lhs"))) + /// .and_then(|state| state.match_string(o)) + /// .and_then(number) + /// .and_then(|state| state.tag_node(std::borrow::Cow::Borrowed("rhs"))) + /// }) + /// }) + /// } + /// fn number(state: Box<ParserState<'_, Rule>>) -> ParseResult<Box<ParserState<'_, Rule>>> { + /// state.rule(Rule::number, |state| state.match_range('0'..'9')) + /// } + /// let input = "1+2"; + /// let pairs = state(input, mark_branch).unwrap(); + /// assert_eq!(pairs.find_first_tagged("add").unwrap().as_rule(), Rule::add); + /// assert_eq!(pairs.find_first_tagged("mul"), None); + /// ``` + #[inline] + pub fn find_first_tagged(&self, tag: &'i str) -> Option<Pair<'i, R>> { + self.clone().find_tagged(tag).next() + } + + /// Returns the iterator over pairs that have their node or branch tagged + /// with the provided label. + /// + /// # Examples + /// + /// Try to recognize the node between left and right hand side + /// ``` + /// use pest::{state, ParseResult, ParserState}; + /// #[allow(non_camel_case_types)] + /// #[derive(Clone, Copy, Debug, Eq, Hash, Ord, PartialEq, PartialOrd)] + /// enum Rule { + /// number, // 0..9 + /// add, // num + num + /// mul, // num * num + /// } + /// fn mark_branch( + /// state: Box<ParserState<'_, Rule>>, + /// ) -> ParseResult<Box<ParserState<'_, Rule>>> { + /// expr(state, Rule::mul, "*") + /// .and_then(|state| state.tag_node(std::borrow::Cow::Borrowed("mul"))) + /// .or_else(|state| expr(state, Rule::add, "+")) + /// .and_then(|state| state.tag_node(std::borrow::Cow::Borrowed("add"))) + /// } + /// fn expr<'a>( + /// state: Box<ParserState<'a, Rule>>, + /// r: Rule, + /// o: &'static str, + /// ) -> ParseResult<Box<ParserState<'a, Rule>>> { + /// state.rule(r, |state| { + /// state.sequence(|state| { + /// number(state) + /// .and_then(|state| state.tag_node(std::borrow::Cow::Borrowed("lhs"))) + /// .and_then(|state| state.match_string(o)) + /// .and_then(number) + /// .and_then(|state| state.tag_node(std::borrow::Cow::Borrowed("rhs"))) + /// }) + /// }) + /// } + /// fn number(state: Box<ParserState<'_, Rule>>) -> ParseResult<Box<ParserState<'_, Rule>>> { + /// state.rule(Rule::number, |state| state.match_range('0'..'9')) + /// } + /// + /// let input = "1+2"; + /// let pairs = state(input, mark_branch).unwrap(); + /// let mut left_numbers = pairs.find_tagged("lhs"); + /// assert_eq!(left_numbers.next().unwrap().as_str(), "1"); + /// assert_eq!(left_numbers.next(), None); + /// ``` + #[inline] + pub fn find_tagged( + self, + tag: &'i str, + ) -> Filter<FlatPairs<'i, R>, impl FnMut(&Pair<'i, R>) -> bool + '_> { + self.flatten() + .filter(move |pair: &Pair<'i, R>| matches!(pair.as_node_tag(), Some(nt) if nt == tag)) + } + /// Returns the `Tokens` for the `Pairs`. /// /// # Examples @@ -237,6 +360,13 @@ impl<'i, R: RuleType> Pairs<'i, R> { } } +impl<'i, R: RuleType> ExactSizeIterator for Pairs<'i, R> { + #[inline] + fn len(&self) -> usize { + self.pairs_count + } +} + impl<'i, R: RuleType> Iterator for Pairs<'i, R> { type Item = Pair<'i, R>; @@ -244,8 +374,14 @@ impl<'i, R: RuleType> Iterator for Pairs<'i, R> { let pair = self.peek()?; self.start = self.pair() + 1; + self.pairs_count -= 1; Some(pair) } + + fn size_hint(&self) -> (usize, Option<usize>) { + let len = <Self as ExactSizeIterator>::len(self); + (len, Some(len)) + } } impl<'i, R: RuleType> DoubleEndedIterator for Pairs<'i, R> { @@ -255,6 +391,7 @@ impl<'i, R: RuleType> DoubleEndedIterator for Pairs<'i, R> { } self.end = self.pair_from_end(); + self.pairs_count -= 1; let pair = unsafe { pair::new( @@ -301,7 +438,7 @@ impl<'i, R: Eq> Eq for Pairs<'i, R> {} impl<'i, R: Hash> Hash for Pairs<'i, R> { fn hash<H: Hasher>(&self, state: &mut H) { - (&*self.queue as *const Vec<QueueableToken<R>>).hash(state); + (&*self.queue as *const Vec<QueueableToken<'i, R>>).hash(state); (self.input as *const str).hash(state); self.start.hash(state); self.end.hash(state); @@ -330,6 +467,7 @@ mod tests { use super::super::super::macros::tests::*; use super::super::super::Parser; use alloc::borrow::ToOwned; + use alloc::boxed::Box; use alloc::format; use alloc::vec; use alloc::vec::Vec; @@ -479,4 +617,69 @@ mod tests { assert_eq!(pair.as_str(), "abc"); assert_eq!(pair.line_col(), (1, 1)); } + + #[test] + fn test_tag_node_branch() { + use crate::{state, ParseResult, ParserState}; + #[allow(non_camel_case_types)] + #[derive(Clone, Copy, Debug, Eq, Hash, Ord, PartialEq, PartialOrd)] + enum Rule { + number, // 0..9 + add, // num + num + mul, // num * num + } + fn mark_branch( + state: Box<ParserState<'_, Rule>>, + ) -> ParseResult<Box<ParserState<'_, Rule>>> { + expr(state, Rule::mul, "*") + .and_then(|state| state.tag_node(alloc::borrow::Cow::Borrowed("mul"))) + .or_else(|state| expr(state, Rule::add, "+")) + .and_then(|state| state.tag_node(alloc::borrow::Cow::Borrowed("add"))) + } + fn expr<'a>( + state: Box<ParserState<'a, Rule>>, + r: Rule, + o: &'static str, + ) -> ParseResult<Box<ParserState<'a, Rule>>> { + state.rule(r, |state| { + state.sequence(|state| { + number(state) + .and_then(|state| state.tag_node(alloc::borrow::Cow::Borrowed("lhs"))) + .and_then(|state| state.match_string(o)) + .and_then(number) + .and_then(|state| state.tag_node(alloc::borrow::Cow::Borrowed("rhs"))) + }) + }) + } + fn number(state: Box<ParserState<'_, Rule>>) -> ParseResult<Box<ParserState<'_, Rule>>> { + state.rule(Rule::number, |state| state.match_range('0'..'9')) + } + let input = "1+2"; + let pairs = state(input, mark_branch).unwrap(); + assert_eq!(pairs.find_first_tagged("add").unwrap().as_rule(), Rule::add); + assert_eq!(pairs.find_first_tagged("mul"), None); + + let mut left_numbers = pairs.clone().find_tagged("lhs"); + + assert_eq!(left_numbers.next().unwrap().as_str(), "1"); + assert_eq!(left_numbers.next(), None); + let mut right_numbers = pairs.find_tagged("rhs"); + + assert_eq!(right_numbers.next().unwrap().as_str(), "2"); + assert_eq!(right_numbers.next(), None); + } + + #[test] + fn exact_size_iter_for_pairs() { + let pairs = AbcParser::parse(Rule::a, "abc\nefgh").unwrap(); + assert_eq!(pairs.len(), pairs.count()); + + let pairs = AbcParser::parse(Rule::a, "abc\nefgh").unwrap().rev(); + assert_eq!(pairs.len(), pairs.count()); + + let mut pairs = AbcParser::parse(Rule::a, "abc\nefgh").unwrap(); + let pairs_len = pairs.len(); + let _ = pairs.next().unwrap(); + assert_eq!(pairs.count() + 1, pairs_len); + } } diff --git a/vendor/pest/src/iterators/queueable_token.rs b/vendor/pest/src/iterators/queueable_token.rs index 7d56749bb..67426092b 100644 --- a/vendor/pest/src/iterators/queueable_token.rs +++ b/vendor/pest/src/iterators/queueable_token.rs @@ -7,6 +7,8 @@ // option. All files in the project carrying such notice may not be copied, // modified, or distributed except according to those terms. +use alloc::borrow::Cow; + // This structure serves to improve performance over Token objects in two ways: // // * it is smaller than a Token, leading to both less memory use when stored in the queue but also @@ -14,7 +16,7 @@ // * it finds its pair in O(1) time instead of O(N), since pair positions are known at parse time // and can easily be stored instead of recomputed #[derive(Debug)] -pub enum QueueableToken<R> { +pub enum QueueableToken<'i, R> { Start { end_token_index: usize, input_pos: usize, @@ -22,6 +24,7 @@ pub enum QueueableToken<R> { End { start_token_index: usize, rule: R, + tag: Option<Cow<'i, str>>, input_pos: usize, }, } diff --git a/vendor/pest/src/iterators/tokens.rs b/vendor/pest/src/iterators/tokens.rs index 0d462711e..41cbc472d 100644 --- a/vendor/pest/src/iterators/tokens.rs +++ b/vendor/pest/src/iterators/tokens.rs @@ -27,19 +27,19 @@ pub struct Tokens<'i, R> { /// # Safety: /// /// All `QueueableToken`s' `input_pos` must be valid character boundary indices into `input`. - queue: Rc<Vec<QueueableToken<R>>>, + queue: Rc<Vec<QueueableToken<'i, R>>>, input: &'i str, start: usize, end: usize, } // TODO(safety): QueueableTokens must be valid indices into input. -pub fn new<R: RuleType>( - queue: Rc<Vec<QueueableToken<R>>>, - input: &str, +pub fn new<'i, R: RuleType>( + queue: Rc<Vec<QueueableToken<'i, R>>>, + input: &'i str, start: usize, end: usize, -) -> Tokens<'_, R> { +) -> Tokens<'i, R> { if cfg!(debug_assertions) { for tok in queue.iter() { match *tok { @@ -92,6 +92,12 @@ impl<'i, R: RuleType> Tokens<'i, R> { } } +impl<'i, R: RuleType> ExactSizeIterator for Tokens<'i, R> { + fn len(&self) -> usize { + self.end - self.start + } +} + impl<'i, R: RuleType> Iterator for Tokens<'i, R> { type Item = Token<'i, R>; @@ -106,6 +112,11 @@ impl<'i, R: RuleType> Iterator for Tokens<'i, R> { Some(token) } + + fn size_hint(&self) -> (usize, Option<usize>) { + let len = <Self as ExactSizeIterator>::len(self); + (len, Some(len)) + } } impl<'i, R: RuleType> DoubleEndedIterator for Tokens<'i, R> { @@ -143,4 +154,21 @@ mod tests { let reverse_tokens = pairs.tokens().rev().collect::<Vec<Token<'_, Rule>>>(); assert_eq!(tokens, reverse_tokens); } + + #[test] + fn exact_size_iter_for_tokens() { + let tokens = AbcParser::parse(Rule::a, "abcde").unwrap().tokens(); + assert_eq!(tokens.len(), tokens.count()); + + let tokens = AbcParser::parse(Rule::a, "我很漂亮e").unwrap().tokens(); + assert_eq!(tokens.len(), tokens.count()); + + let tokens = AbcParser::parse(Rule::a, "abcde").unwrap().tokens().rev(); + assert_eq!(tokens.len(), tokens.count()); + + let mut tokens = AbcParser::parse(Rule::a, "abcde").unwrap().tokens(); + let tokens_len = tokens.len(); + let _ = tokens.next().unwrap(); + assert_eq!(tokens.count() + 1, tokens_len); + } } diff --git a/vendor/pest/src/parser_state.rs b/vendor/pest/src/parser_state.rs index f58de00c8..5a10b420b 100644 --- a/vendor/pest/src/parser_state.rs +++ b/vendor/pest/src/parser_state.rs @@ -7,7 +7,7 @@ // option. All files in the project carrying such notice may not be copied, // modified, or distributed except according to those terms. -use alloc::borrow::ToOwned; +use alloc::borrow::{Cow, ToOwned}; use alloc::boxed::Box; use alloc::rc::Rc; use alloc::vec; @@ -128,7 +128,7 @@ impl CallLimitTracker { #[derive(Debug)] pub struct ParserState<'i, R: RuleType> { position: Position<'i>, - queue: Vec<QueueableToken<R>>, + queue: Vec<QueueableToken<'i, R>>, lookahead: Lookahead, pos_attempts: Vec<R>, neg_attempts: Vec<R>, @@ -345,6 +345,7 @@ impl<'i, R: RuleType> ParserState<'i, R> { new_state.queue.push(QueueableToken::End { start_token_index: index, rule, + tag: None, input_pos: new_pos, }); } @@ -373,6 +374,46 @@ impl<'i, R: RuleType> ParserState<'i, R> { } } + /// Tag current node + /// + /// # Examples + /// + /// Try to recognize the one specified in a set of characters + /// + /// ``` + /// use pest::{state, ParseResult, ParserState, iterators::Pair}; + /// #[allow(non_camel_case_types)] + /// #[derive(Clone, Copy, Debug, Eq, Hash, Ord, PartialEq, PartialOrd)] + /// enum Rule { + /// character, + /// } + /// fn mark_c(state: Box<ParserState<Rule>>) -> ParseResult<Box<ParserState<Rule>>> { + /// state.sequence(|state| { + /// character(state) + /// .and_then(|state| character(state)) + /// .and_then(|state| character(state)) + /// .and_then(|state| state.tag_node(std::borrow::Cow::Borrowed("c"))) + /// .and_then(|state| character(state)) + /// }) + /// } + /// fn character(state: Box<ParserState<Rule>>) -> ParseResult<Box<ParserState<Rule>>> { + /// state.rule(Rule::character, |state| state.match_range('a'..'z')) + /// } + /// + /// let input = "abcd"; + /// let pairs = state(input, mark_c).unwrap(); + /// // find all node tag as `c` + /// let find: Vec<Pair<Rule>> = pairs.filter(|s| s.as_node_tag() == Some("c")).collect(); + /// assert_eq!(find[0].as_str(), "c") + /// ``` + #[inline] + pub fn tag_node(mut self: Box<Self>, tag: Cow<'i, str>) -> ParseResult<Box<Self>> { + if let Some(QueueableToken::End { tag: old, .. }) = self.queue.last_mut() { + *old = Some(tag) + } + Ok(self) + } + fn attempts_at(&self, pos: usize) -> usize { if self.attempt_pos == pos { self.pos_attempts.len() + self.neg_attempts.len() diff --git a/vendor/pest/src/pratt_parser.rs b/vendor/pest/src/pratt_parser.rs index 76ffdf755..f042f8252 100644 --- a/vendor/pest/src/pratt_parser.rs +++ b/vendor/pest/src/pratt_parser.rs @@ -142,8 +142,8 @@ impl<R: RuleType> BitOr for Op<R> { /// .op(Op::infix(Rule::add, Assoc::Left) | Op::infix(Rule::sub, Assoc::Left)) /// .op(Op::infix(Rule::mul, Assoc::Left) | Op::infix(Rule::div, Assoc::Left)) /// .op(Op::infix(Rule::pow, Assoc::Right)) -/// .op(Op::postfix(Rule::fac)) -/// .op(Op::prefix(Rule::neg)); +/// .op(Op::prefix(Rule::neg)) +/// .op(Op::postfix(Rule::fac)); /// ``` /// /// To parse an expression, call the [`map_primary`], [`map_prefix`], [`map_postfix`], diff --git a/vendor/pest/src/unicode/mod.rs b/vendor/pest/src/unicode/mod.rs index 8a56bd6e8..6d7cb1d69 100644 --- a/vendor/pest/src/unicode/mod.rs +++ b/vendor/pest/src/unicode/mod.rs @@ -56,6 +56,7 @@ char_property_functions! { ALPHABETIC, BIDI_CONTROL, CASE_IGNORABLE, CASED, CHANGES_WHEN_CASEFOLDED, CHANGES_WHEN_CASEMAPPED, CHANGES_WHEN_LOWERCASED, CHANGES_WHEN_TITLECASED, CHANGES_WHEN_UPPERCASED, DASH, DEFAULT_IGNORABLE_CODE_POINT, DEPRECATED, DIACRITIC, + EMOJI, EMOJI_COMPONENT, EMOJI_MODIFIER, EMOJI_MODIFIER_BASE, EMOJI_PRESENTATION, EXTENDED_PICTOGRAPHIC, EXTENDER, GRAPHEME_BASE, GRAPHEME_EXTEND, GRAPHEME_LINK, HEX_DIGIT, HYPHEN, IDS_BINARY_OPERATOR, IDS_TRINARY_OPERATOR, ID_CONTINUE, ID_START, IDEOGRAPHIC, JOIN_CONTROL, LOGICAL_ORDER_EXCEPTION, LOWERCASE, MATH, NONCHARACTER_CODE_POINT, OTHER_ALPHABETIC, |