summaryrefslogtreecommitdiffstats
path: root/vendor/pest/src
diff options
context:
space:
mode:
authorDaniel Baumann <daniel.baumann@progress-linux.org>2024-05-30 03:57:31 +0000
committerDaniel Baumann <daniel.baumann@progress-linux.org>2024-05-30 03:57:31 +0000
commitdc0db358abe19481e475e10c32149b53370f1a1c (patch)
treeab8ce99c4b255ce46f99ef402c27916055b899ee /vendor/pest/src
parentReleasing progress-linux version 1.71.1+dfsg1-2~progress7.99u1. (diff)
downloadrustc-dc0db358abe19481e475e10c32149b53370f1a1c.tar.xz
rustc-dc0db358abe19481e475e10c32149b53370f1a1c.zip
Merging upstream version 1.72.1+dfsg1.
Signed-off-by: Daniel Baumann <daniel.baumann@progress-linux.org>
Diffstat (limited to 'vendor/pest/src')
-rw-r--r--vendor/pest/src/error.rs37
-rw-r--r--vendor/pest/src/iterators/flat_pairs.rs40
-rw-r--r--vendor/pest/src/iterators/pair.rs32
-rw-r--r--vendor/pest/src/iterators/pairs.rs215
-rw-r--r--vendor/pest/src/iterators/queueable_token.rs5
-rw-r--r--vendor/pest/src/iterators/tokens.rs38
-rw-r--r--vendor/pest/src/parser_state.rs45
-rw-r--r--vendor/pest/src/pratt_parser.rs4
-rw-r--r--vendor/pest/src/unicode/mod.rs1
9 files changed, 386 insertions, 31 deletions
diff --git a/vendor/pest/src/error.rs b/vendor/pest/src/error.rs
index eef004233..df3f5448e 100644
--- a/vendor/pest/src/error.rs
+++ b/vendor/pest/src/error.rs
@@ -74,6 +74,19 @@ pub enum LineColLocation {
Span((usize, usize), (usize, usize)),
}
+impl From<Position<'_>> for LineColLocation {
+ fn from(value: Position<'_>) -> Self {
+ Self::Pos(value.line_col())
+ }
+}
+
+impl From<Span<'_>> for LineColLocation {
+ fn from(value: Span<'_>) -> Self {
+ let (start, end) = value.split();
+ Self::Span(start.line_col(), end.line_col())
+ }
+}
+
impl<R: RuleType> Error<R> {
/// Creates `Error` from `ErrorVariant` and `Position`.
///
@@ -418,7 +431,7 @@ impl<R: RuleType> Error<R> {
.unwrap_or_default();
let pair = (self.line_col.clone(), &self.continued_line);
- if let (LineColLocation::Span(_, end), &Some(ref continued_line)) = pair {
+ if let (LineColLocation::Span(_, end), Some(ref continued_line)) = pair {
let has_line_gap = end.0 - self.start().0 > 1;
if has_line_gap {
format!(
@@ -892,4 +905,26 @@ mod tests {
.join("\n")
);
}
+
+ #[test]
+ fn pos_to_lcl_conversion() {
+ let input = "input";
+
+ let pos = Position::new(input, 2).unwrap();
+
+ assert_eq!(LineColLocation::Pos(pos.line_col()), pos.into());
+ }
+
+ #[test]
+ fn span_to_lcl_conversion() {
+ let input = "input";
+
+ let span = Span::new(input, 2, 4).unwrap();
+ let (start, end) = span.split();
+
+ assert_eq!(
+ LineColLocation::Span(start.line_col(), end.line_col()),
+ span.into()
+ );
+ }
}
diff --git a/vendor/pest/src/iterators/flat_pairs.rs b/vendor/pest/src/iterators/flat_pairs.rs
index 52a207406..9b92f557a 100644
--- a/vendor/pest/src/iterators/flat_pairs.rs
+++ b/vendor/pest/src/iterators/flat_pairs.rs
@@ -25,7 +25,7 @@ pub struct FlatPairs<'i, R> {
/// # Safety
///
/// All `QueueableToken`s' `input_pos` must be valid character boundary indices into `input`.
- queue: Rc<Vec<QueueableToken<R>>>,
+ queue: Rc<Vec<QueueableToken<'i, R>>>,
input: &'i str,
start: usize,
end: usize,
@@ -35,12 +35,12 @@ pub struct FlatPairs<'i, R> {
/// # Safety
///
/// All `QueueableToken`s' `input_pos` must be valid character boundary indices into `input`.
-pub unsafe fn new<R: RuleType>(
- queue: Rc<Vec<QueueableToken<R>>>,
- input: &str,
+pub unsafe fn new<'i, R: RuleType>(
+ queue: Rc<Vec<QueueableToken<'i, R>>>,
+ input: &'i str,
start: usize,
end: usize,
-) -> FlatPairs<'_, R> {
+) -> FlatPairs<'i, R> {
FlatPairs {
queue,
input,
@@ -102,6 +102,13 @@ impl<'i, R: RuleType> FlatPairs<'i, R> {
}
}
+impl<'i, R: RuleType> ExactSizeIterator for FlatPairs<'i, R> {
+ fn len(&self) -> usize {
+ // Tokens len is exactly twice as flatten pairs len
+ (self.end - self.start) >> 1
+ }
+}
+
impl<'i, R: RuleType> Iterator for FlatPairs<'i, R> {
type Item = Pair<'i, R>;
@@ -122,6 +129,11 @@ impl<'i, R: RuleType> Iterator for FlatPairs<'i, R> {
Some(pair)
}
+
+ fn size_hint(&self) -> (usize, Option<usize>) {
+ let len = <Self as ExactSizeIterator>::len(self);
+ (len, Some(len))
+ }
}
impl<'i, R: RuleType> DoubleEndedIterator for FlatPairs<'i, R> {
@@ -214,4 +226,22 @@ mod tests {
assert_eq!(pair.line_col(), (1, 5));
assert_eq!(pair.line_col(), pair.as_span().start_pos().line_col());
}
+
+ #[test]
+ fn exact_size_iter_for_pairs() {
+ let pairs = AbcParser::parse(Rule::a, "abc\nefgh").unwrap().flatten();
+ assert_eq!(pairs.len(), pairs.count());
+
+ let pairs = AbcParser::parse(Rule::a, "我很漂亮efgh").unwrap().flatten();
+ assert_eq!(pairs.len(), pairs.count());
+
+ let pairs = AbcParser::parse(Rule::a, "abc\nefgh").unwrap().flatten();
+ let pairs = pairs.rev();
+ assert_eq!(pairs.len(), pairs.count());
+
+ let mut pairs = AbcParser::parse(Rule::a, "abc\nefgh").unwrap().flatten();
+ let pairs_len = pairs.len();
+ let _ = pairs.next().unwrap();
+ assert_eq!(pairs.count() + 1, pairs_len);
+ }
}
diff --git a/vendor/pest/src/iterators/pair.rs b/vendor/pest/src/iterators/pair.rs
index 891b90595..f1b311a40 100644
--- a/vendor/pest/src/iterators/pair.rs
+++ b/vendor/pest/src/iterators/pair.rs
@@ -12,6 +12,7 @@ use alloc::rc::Rc;
#[cfg(feature = "pretty-print")]
use alloc::string::String;
use alloc::vec::Vec;
+use core::borrow::Borrow;
use core::fmt;
use core::hash::{Hash, Hasher};
use core::ptr;
@@ -40,7 +41,7 @@ pub struct Pair<'i, R> {
/// # Safety
///
/// All `QueueableToken`s' `input_pos` must be valid character boundary indices into `input`.
- queue: Rc<Vec<QueueableToken<R>>>,
+ queue: Rc<Vec<QueueableToken<'i, R>>>,
input: &'i str,
/// Token index into `queue`.
start: usize,
@@ -50,12 +51,12 @@ pub struct Pair<'i, R> {
/// # Safety
///
/// All `QueueableToken`s' `input_pos` must be valid character boundary indices into `input`.
-pub unsafe fn new<R: RuleType>(
- queue: Rc<Vec<QueueableToken<R>>>,
- input: &str,
+pub unsafe fn new<'i, R: RuleType>(
+ queue: Rc<Vec<QueueableToken<'i, R>>>,
+ input: &'i str,
line_index: Rc<LineIndex>,
start: usize,
-) -> Pair<'_, R> {
+) -> Pair<'i, R> {
Pair {
queue,
input,
@@ -181,6 +182,15 @@ impl<'i, R: RuleType> Pair<'i, R> {
unsafe { span::Span::new_unchecked(self.input, start, end) }
}
+ /// Get current node tag
+ #[inline]
+ pub fn as_node_tag(&self) -> Option<&str> {
+ match &self.queue[self.pair()] {
+ QueueableToken::End { tag, .. } => tag.as_ref().map(|x| x.borrow()),
+ _ => None,
+ }
+ }
+
/// Returns the inner `Pairs` between the `Pair`, consuming it.
///
/// # Examples
@@ -291,9 +301,13 @@ impl<'i, R: RuleType> Pairs<'i, R> {
impl<'i, R: RuleType> fmt::Debug for Pair<'i, R> {
fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
- f.debug_struct("Pair")
- .field("rule", &self.as_rule())
- .field("span", &self.as_span())
+ let pair = &mut f.debug_struct("Pair");
+ pair.field("rule", &self.as_rule());
+ // In order not to break compatibility
+ if let Some(s) = self.as_node_tag() {
+ pair.field("node_tag", &s);
+ }
+ pair.field("span", &self.as_span())
.field("inner", &self.clone().into_inner().collect::<Vec<_>>())
.finish()
}
@@ -336,7 +350,7 @@ impl<'i, R: Eq> Eq for Pair<'i, R> {}
impl<'i, R: Hash> Hash for Pair<'i, R> {
fn hash<H: Hasher>(&self, state: &mut H) {
- (&*self.queue as *const Vec<QueueableToken<R>>).hash(state);
+ (&*self.queue as *const Vec<QueueableToken<'i, R>>).hash(state);
(self.input as *const str).hash(state);
self.start.hash(state);
}
diff --git a/vendor/pest/src/iterators/pairs.rs b/vendor/pest/src/iterators/pairs.rs
index c21a7fae1..ab7df75ee 100644
--- a/vendor/pest/src/iterators/pairs.rs
+++ b/vendor/pest/src/iterators/pairs.rs
@@ -13,6 +13,7 @@ use alloc::string::String;
use alloc::vec::Vec;
use core::fmt;
use core::hash::{Hash, Hasher};
+use core::iter::Filter;
use core::ptr;
use core::str;
@@ -33,30 +34,44 @@ use crate::RuleType;
/// [`Pair::into_inner`]: struct.Pair.html#method.into_inner
#[derive(Clone)]
pub struct Pairs<'i, R> {
- queue: Rc<Vec<QueueableToken<R>>>,
+ queue: Rc<Vec<QueueableToken<'i, R>>>,
input: &'i str,
start: usize,
end: usize,
+ pairs_count: usize,
line_index: Rc<LineIndex>,
}
-pub fn new<R: RuleType>(
- queue: Rc<Vec<QueueableToken<R>>>,
- input: &str,
+pub fn new<'i, R: RuleType>(
+ queue: Rc<Vec<QueueableToken<'i, R>>>,
+ input: &'i str,
line_index: Option<Rc<LineIndex>>,
start: usize,
end: usize,
-) -> Pairs<'_, R> {
+) -> Pairs<'i, R> {
let line_index = match line_index {
Some(line_index) => line_index,
None => Rc::new(LineIndex::new(input)),
};
+ let mut pairs_count = 0;
+ let mut cursor = start;
+ while cursor < end {
+ cursor = match queue[cursor] {
+ QueueableToken::Start {
+ end_token_index, ..
+ } => end_token_index,
+ _ => unreachable!(),
+ } + 1;
+ pairs_count += 1;
+ }
+
Pairs {
queue,
input,
start,
end,
+ pairs_count,
line_index,
}
}
@@ -159,6 +174,114 @@ impl<'i, R: RuleType> Pairs<'i, R> {
unsafe { flat_pairs::new(self.queue, self.input, self.start, self.end) }
}
+ /// Finds the first pair that has its node or branch tagged with the provided
+ /// label.
+ ///
+ /// # Examples
+ ///
+ /// Try to recognize the branch between add and mul
+ /// ```
+ /// use pest::{state, ParseResult, ParserState};
+ /// #[allow(non_camel_case_types)]
+ /// #[derive(Clone, Copy, Debug, Eq, Hash, Ord, PartialEq, PartialOrd)]
+ /// enum Rule {
+ /// number, // 0..9
+ /// add, // num + num
+ /// mul, // num * num
+ /// }
+ /// fn mark_branch(
+ /// state: Box<ParserState<'_, Rule>>,
+ /// ) -> ParseResult<Box<ParserState<'_, Rule>>> {
+ /// expr(state, Rule::mul, "*")
+ /// .and_then(|state| state.tag_node(std::borrow::Cow::Borrowed("mul")))
+ /// .or_else(|state| expr(state, Rule::add, "+"))
+ /// .and_then(|state| state.tag_node(std::borrow::Cow::Borrowed("add")))
+ /// }
+ /// fn expr<'a>(
+ /// state: Box<ParserState<'a, Rule>>,
+ /// r: Rule,
+ /// o: &'static str,
+ /// ) -> ParseResult<Box<ParserState<'a, Rule>>> {
+ /// state.rule(r, |state| {
+ /// state.sequence(|state| {
+ /// number(state)
+ /// .and_then(|state| state.tag_node(std::borrow::Cow::Borrowed("lhs")))
+ /// .and_then(|state| state.match_string(o))
+ /// .and_then(number)
+ /// .and_then(|state| state.tag_node(std::borrow::Cow::Borrowed("rhs")))
+ /// })
+ /// })
+ /// }
+ /// fn number(state: Box<ParserState<'_, Rule>>) -> ParseResult<Box<ParserState<'_, Rule>>> {
+ /// state.rule(Rule::number, |state| state.match_range('0'..'9'))
+ /// }
+ /// let input = "1+2";
+ /// let pairs = state(input, mark_branch).unwrap();
+ /// assert_eq!(pairs.find_first_tagged("add").unwrap().as_rule(), Rule::add);
+ /// assert_eq!(pairs.find_first_tagged("mul"), None);
+ /// ```
+ #[inline]
+ pub fn find_first_tagged(&self, tag: &'i str) -> Option<Pair<'i, R>> {
+ self.clone().find_tagged(tag).next()
+ }
+
+ /// Returns the iterator over pairs that have their node or branch tagged
+ /// with the provided label.
+ ///
+ /// # Examples
+ ///
+ /// Try to recognize the node between left and right hand side
+ /// ```
+ /// use pest::{state, ParseResult, ParserState};
+ /// #[allow(non_camel_case_types)]
+ /// #[derive(Clone, Copy, Debug, Eq, Hash, Ord, PartialEq, PartialOrd)]
+ /// enum Rule {
+ /// number, // 0..9
+ /// add, // num + num
+ /// mul, // num * num
+ /// }
+ /// fn mark_branch(
+ /// state: Box<ParserState<'_, Rule>>,
+ /// ) -> ParseResult<Box<ParserState<'_, Rule>>> {
+ /// expr(state, Rule::mul, "*")
+ /// .and_then(|state| state.tag_node(std::borrow::Cow::Borrowed("mul")))
+ /// .or_else(|state| expr(state, Rule::add, "+"))
+ /// .and_then(|state| state.tag_node(std::borrow::Cow::Borrowed("add")))
+ /// }
+ /// fn expr<'a>(
+ /// state: Box<ParserState<'a, Rule>>,
+ /// r: Rule,
+ /// o: &'static str,
+ /// ) -> ParseResult<Box<ParserState<'a, Rule>>> {
+ /// state.rule(r, |state| {
+ /// state.sequence(|state| {
+ /// number(state)
+ /// .and_then(|state| state.tag_node(std::borrow::Cow::Borrowed("lhs")))
+ /// .and_then(|state| state.match_string(o))
+ /// .and_then(number)
+ /// .and_then(|state| state.tag_node(std::borrow::Cow::Borrowed("rhs")))
+ /// })
+ /// })
+ /// }
+ /// fn number(state: Box<ParserState<'_, Rule>>) -> ParseResult<Box<ParserState<'_, Rule>>> {
+ /// state.rule(Rule::number, |state| state.match_range('0'..'9'))
+ /// }
+ ///
+ /// let input = "1+2";
+ /// let pairs = state(input, mark_branch).unwrap();
+ /// let mut left_numbers = pairs.find_tagged("lhs");
+ /// assert_eq!(left_numbers.next().unwrap().as_str(), "1");
+ /// assert_eq!(left_numbers.next(), None);
+ /// ```
+ #[inline]
+ pub fn find_tagged(
+ self,
+ tag: &'i str,
+ ) -> Filter<FlatPairs<'i, R>, impl FnMut(&Pair<'i, R>) -> bool + '_> {
+ self.flatten()
+ .filter(move |pair: &Pair<'i, R>| matches!(pair.as_node_tag(), Some(nt) if nt == tag))
+ }
+
/// Returns the `Tokens` for the `Pairs`.
///
/// # Examples
@@ -237,6 +360,13 @@ impl<'i, R: RuleType> Pairs<'i, R> {
}
}
+impl<'i, R: RuleType> ExactSizeIterator for Pairs<'i, R> {
+ #[inline]
+ fn len(&self) -> usize {
+ self.pairs_count
+ }
+}
+
impl<'i, R: RuleType> Iterator for Pairs<'i, R> {
type Item = Pair<'i, R>;
@@ -244,8 +374,14 @@ impl<'i, R: RuleType> Iterator for Pairs<'i, R> {
let pair = self.peek()?;
self.start = self.pair() + 1;
+ self.pairs_count -= 1;
Some(pair)
}
+
+ fn size_hint(&self) -> (usize, Option<usize>) {
+ let len = <Self as ExactSizeIterator>::len(self);
+ (len, Some(len))
+ }
}
impl<'i, R: RuleType> DoubleEndedIterator for Pairs<'i, R> {
@@ -255,6 +391,7 @@ impl<'i, R: RuleType> DoubleEndedIterator for Pairs<'i, R> {
}
self.end = self.pair_from_end();
+ self.pairs_count -= 1;
let pair = unsafe {
pair::new(
@@ -301,7 +438,7 @@ impl<'i, R: Eq> Eq for Pairs<'i, R> {}
impl<'i, R: Hash> Hash for Pairs<'i, R> {
fn hash<H: Hasher>(&self, state: &mut H) {
- (&*self.queue as *const Vec<QueueableToken<R>>).hash(state);
+ (&*self.queue as *const Vec<QueueableToken<'i, R>>).hash(state);
(self.input as *const str).hash(state);
self.start.hash(state);
self.end.hash(state);
@@ -330,6 +467,7 @@ mod tests {
use super::super::super::macros::tests::*;
use super::super::super::Parser;
use alloc::borrow::ToOwned;
+ use alloc::boxed::Box;
use alloc::format;
use alloc::vec;
use alloc::vec::Vec;
@@ -479,4 +617,69 @@ mod tests {
assert_eq!(pair.as_str(), "abc");
assert_eq!(pair.line_col(), (1, 1));
}
+
+ #[test]
+ fn test_tag_node_branch() {
+ use crate::{state, ParseResult, ParserState};
+ #[allow(non_camel_case_types)]
+ #[derive(Clone, Copy, Debug, Eq, Hash, Ord, PartialEq, PartialOrd)]
+ enum Rule {
+ number, // 0..9
+ add, // num + num
+ mul, // num * num
+ }
+ fn mark_branch(
+ state: Box<ParserState<'_, Rule>>,
+ ) -> ParseResult<Box<ParserState<'_, Rule>>> {
+ expr(state, Rule::mul, "*")
+ .and_then(|state| state.tag_node(alloc::borrow::Cow::Borrowed("mul")))
+ .or_else(|state| expr(state, Rule::add, "+"))
+ .and_then(|state| state.tag_node(alloc::borrow::Cow::Borrowed("add")))
+ }
+ fn expr<'a>(
+ state: Box<ParserState<'a, Rule>>,
+ r: Rule,
+ o: &'static str,
+ ) -> ParseResult<Box<ParserState<'a, Rule>>> {
+ state.rule(r, |state| {
+ state.sequence(|state| {
+ number(state)
+ .and_then(|state| state.tag_node(alloc::borrow::Cow::Borrowed("lhs")))
+ .and_then(|state| state.match_string(o))
+ .and_then(number)
+ .and_then(|state| state.tag_node(alloc::borrow::Cow::Borrowed("rhs")))
+ })
+ })
+ }
+ fn number(state: Box<ParserState<'_, Rule>>) -> ParseResult<Box<ParserState<'_, Rule>>> {
+ state.rule(Rule::number, |state| state.match_range('0'..'9'))
+ }
+ let input = "1+2";
+ let pairs = state(input, mark_branch).unwrap();
+ assert_eq!(pairs.find_first_tagged("add").unwrap().as_rule(), Rule::add);
+ assert_eq!(pairs.find_first_tagged("mul"), None);
+
+ let mut left_numbers = pairs.clone().find_tagged("lhs");
+
+ assert_eq!(left_numbers.next().unwrap().as_str(), "1");
+ assert_eq!(left_numbers.next(), None);
+ let mut right_numbers = pairs.find_tagged("rhs");
+
+ assert_eq!(right_numbers.next().unwrap().as_str(), "2");
+ assert_eq!(right_numbers.next(), None);
+ }
+
+ #[test]
+ fn exact_size_iter_for_pairs() {
+ let pairs = AbcParser::parse(Rule::a, "abc\nefgh").unwrap();
+ assert_eq!(pairs.len(), pairs.count());
+
+ let pairs = AbcParser::parse(Rule::a, "abc\nefgh").unwrap().rev();
+ assert_eq!(pairs.len(), pairs.count());
+
+ let mut pairs = AbcParser::parse(Rule::a, "abc\nefgh").unwrap();
+ let pairs_len = pairs.len();
+ let _ = pairs.next().unwrap();
+ assert_eq!(pairs.count() + 1, pairs_len);
+ }
}
diff --git a/vendor/pest/src/iterators/queueable_token.rs b/vendor/pest/src/iterators/queueable_token.rs
index 7d56749bb..67426092b 100644
--- a/vendor/pest/src/iterators/queueable_token.rs
+++ b/vendor/pest/src/iterators/queueable_token.rs
@@ -7,6 +7,8 @@
// option. All files in the project carrying such notice may not be copied,
// modified, or distributed except according to those terms.
+use alloc::borrow::Cow;
+
// This structure serves to improve performance over Token objects in two ways:
//
// * it is smaller than a Token, leading to both less memory use when stored in the queue but also
@@ -14,7 +16,7 @@
// * it finds its pair in O(1) time instead of O(N), since pair positions are known at parse time
// and can easily be stored instead of recomputed
#[derive(Debug)]
-pub enum QueueableToken<R> {
+pub enum QueueableToken<'i, R> {
Start {
end_token_index: usize,
input_pos: usize,
@@ -22,6 +24,7 @@ pub enum QueueableToken<R> {
End {
start_token_index: usize,
rule: R,
+ tag: Option<Cow<'i, str>>,
input_pos: usize,
},
}
diff --git a/vendor/pest/src/iterators/tokens.rs b/vendor/pest/src/iterators/tokens.rs
index 0d462711e..41cbc472d 100644
--- a/vendor/pest/src/iterators/tokens.rs
+++ b/vendor/pest/src/iterators/tokens.rs
@@ -27,19 +27,19 @@ pub struct Tokens<'i, R> {
/// # Safety:
///
/// All `QueueableToken`s' `input_pos` must be valid character boundary indices into `input`.
- queue: Rc<Vec<QueueableToken<R>>>,
+ queue: Rc<Vec<QueueableToken<'i, R>>>,
input: &'i str,
start: usize,
end: usize,
}
// TODO(safety): QueueableTokens must be valid indices into input.
-pub fn new<R: RuleType>(
- queue: Rc<Vec<QueueableToken<R>>>,
- input: &str,
+pub fn new<'i, R: RuleType>(
+ queue: Rc<Vec<QueueableToken<'i, R>>>,
+ input: &'i str,
start: usize,
end: usize,
-) -> Tokens<'_, R> {
+) -> Tokens<'i, R> {
if cfg!(debug_assertions) {
for tok in queue.iter() {
match *tok {
@@ -92,6 +92,12 @@ impl<'i, R: RuleType> Tokens<'i, R> {
}
}
+impl<'i, R: RuleType> ExactSizeIterator for Tokens<'i, R> {
+ fn len(&self) -> usize {
+ self.end - self.start
+ }
+}
+
impl<'i, R: RuleType> Iterator for Tokens<'i, R> {
type Item = Token<'i, R>;
@@ -106,6 +112,11 @@ impl<'i, R: RuleType> Iterator for Tokens<'i, R> {
Some(token)
}
+
+ fn size_hint(&self) -> (usize, Option<usize>) {
+ let len = <Self as ExactSizeIterator>::len(self);
+ (len, Some(len))
+ }
}
impl<'i, R: RuleType> DoubleEndedIterator for Tokens<'i, R> {
@@ -143,4 +154,21 @@ mod tests {
let reverse_tokens = pairs.tokens().rev().collect::<Vec<Token<'_, Rule>>>();
assert_eq!(tokens, reverse_tokens);
}
+
+ #[test]
+ fn exact_size_iter_for_tokens() {
+ let tokens = AbcParser::parse(Rule::a, "abcde").unwrap().tokens();
+ assert_eq!(tokens.len(), tokens.count());
+
+ let tokens = AbcParser::parse(Rule::a, "我很漂亮e").unwrap().tokens();
+ assert_eq!(tokens.len(), tokens.count());
+
+ let tokens = AbcParser::parse(Rule::a, "abcde").unwrap().tokens().rev();
+ assert_eq!(tokens.len(), tokens.count());
+
+ let mut tokens = AbcParser::parse(Rule::a, "abcde").unwrap().tokens();
+ let tokens_len = tokens.len();
+ let _ = tokens.next().unwrap();
+ assert_eq!(tokens.count() + 1, tokens_len);
+ }
}
diff --git a/vendor/pest/src/parser_state.rs b/vendor/pest/src/parser_state.rs
index f58de00c8..5a10b420b 100644
--- a/vendor/pest/src/parser_state.rs
+++ b/vendor/pest/src/parser_state.rs
@@ -7,7 +7,7 @@
// option. All files in the project carrying such notice may not be copied,
// modified, or distributed except according to those terms.
-use alloc::borrow::ToOwned;
+use alloc::borrow::{Cow, ToOwned};
use alloc::boxed::Box;
use alloc::rc::Rc;
use alloc::vec;
@@ -128,7 +128,7 @@ impl CallLimitTracker {
#[derive(Debug)]
pub struct ParserState<'i, R: RuleType> {
position: Position<'i>,
- queue: Vec<QueueableToken<R>>,
+ queue: Vec<QueueableToken<'i, R>>,
lookahead: Lookahead,
pos_attempts: Vec<R>,
neg_attempts: Vec<R>,
@@ -345,6 +345,7 @@ impl<'i, R: RuleType> ParserState<'i, R> {
new_state.queue.push(QueueableToken::End {
start_token_index: index,
rule,
+ tag: None,
input_pos: new_pos,
});
}
@@ -373,6 +374,46 @@ impl<'i, R: RuleType> ParserState<'i, R> {
}
}
+ /// Tag current node
+ ///
+ /// # Examples
+ ///
+ /// Try to recognize the one specified in a set of characters
+ ///
+ /// ```
+ /// use pest::{state, ParseResult, ParserState, iterators::Pair};
+ /// #[allow(non_camel_case_types)]
+ /// #[derive(Clone, Copy, Debug, Eq, Hash, Ord, PartialEq, PartialOrd)]
+ /// enum Rule {
+ /// character,
+ /// }
+ /// fn mark_c(state: Box<ParserState<Rule>>) -> ParseResult<Box<ParserState<Rule>>> {
+ /// state.sequence(|state| {
+ /// character(state)
+ /// .and_then(|state| character(state))
+ /// .and_then(|state| character(state))
+ /// .and_then(|state| state.tag_node(std::borrow::Cow::Borrowed("c")))
+ /// .and_then(|state| character(state))
+ /// })
+ /// }
+ /// fn character(state: Box<ParserState<Rule>>) -> ParseResult<Box<ParserState<Rule>>> {
+ /// state.rule(Rule::character, |state| state.match_range('a'..'z'))
+ /// }
+ ///
+ /// let input = "abcd";
+ /// let pairs = state(input, mark_c).unwrap();
+ /// // find all node tag as `c`
+ /// let find: Vec<Pair<Rule>> = pairs.filter(|s| s.as_node_tag() == Some("c")).collect();
+ /// assert_eq!(find[0].as_str(), "c")
+ /// ```
+ #[inline]
+ pub fn tag_node(mut self: Box<Self>, tag: Cow<'i, str>) -> ParseResult<Box<Self>> {
+ if let Some(QueueableToken::End { tag: old, .. }) = self.queue.last_mut() {
+ *old = Some(tag)
+ }
+ Ok(self)
+ }
+
fn attempts_at(&self, pos: usize) -> usize {
if self.attempt_pos == pos {
self.pos_attempts.len() + self.neg_attempts.len()
diff --git a/vendor/pest/src/pratt_parser.rs b/vendor/pest/src/pratt_parser.rs
index 76ffdf755..f042f8252 100644
--- a/vendor/pest/src/pratt_parser.rs
+++ b/vendor/pest/src/pratt_parser.rs
@@ -142,8 +142,8 @@ impl<R: RuleType> BitOr for Op<R> {
/// .op(Op::infix(Rule::add, Assoc::Left) | Op::infix(Rule::sub, Assoc::Left))
/// .op(Op::infix(Rule::mul, Assoc::Left) | Op::infix(Rule::div, Assoc::Left))
/// .op(Op::infix(Rule::pow, Assoc::Right))
-/// .op(Op::postfix(Rule::fac))
-/// .op(Op::prefix(Rule::neg));
+/// .op(Op::prefix(Rule::neg))
+/// .op(Op::postfix(Rule::fac));
/// ```
///
/// To parse an expression, call the [`map_primary`], [`map_prefix`], [`map_postfix`],
diff --git a/vendor/pest/src/unicode/mod.rs b/vendor/pest/src/unicode/mod.rs
index 8a56bd6e8..6d7cb1d69 100644
--- a/vendor/pest/src/unicode/mod.rs
+++ b/vendor/pest/src/unicode/mod.rs
@@ -56,6 +56,7 @@ char_property_functions! {
ALPHABETIC, BIDI_CONTROL, CASE_IGNORABLE, CASED, CHANGES_WHEN_CASEFOLDED,
CHANGES_WHEN_CASEMAPPED, CHANGES_WHEN_LOWERCASED, CHANGES_WHEN_TITLECASED,
CHANGES_WHEN_UPPERCASED, DASH, DEFAULT_IGNORABLE_CODE_POINT, DEPRECATED, DIACRITIC,
+ EMOJI, EMOJI_COMPONENT, EMOJI_MODIFIER, EMOJI_MODIFIER_BASE, EMOJI_PRESENTATION, EXTENDED_PICTOGRAPHIC,
EXTENDER, GRAPHEME_BASE, GRAPHEME_EXTEND, GRAPHEME_LINK, HEX_DIGIT, HYPHEN,
IDS_BINARY_OPERATOR, IDS_TRINARY_OPERATOR, ID_CONTINUE, ID_START, IDEOGRAPHIC, JOIN_CONTROL,
LOGICAL_ORDER_EXCEPTION, LOWERCASE, MATH, NONCHARACTER_CODE_POINT, OTHER_ALPHABETIC,