From dc0db358abe19481e475e10c32149b53370f1a1c Mon Sep 17 00:00:00 2001 From: Daniel Baumann Date: Thu, 30 May 2024 05:57:31 +0200 Subject: Merging upstream version 1.72.1+dfsg1. Signed-off-by: Daniel Baumann --- vendor/pest/.cargo-checksum.json | 2 +- vendor/pest/Cargo.lock | 26 ++-- vendor/pest/Cargo.toml | 10 +- vendor/pest/_README.md | 5 +- vendor/pest/src/error.rs | 37 ++++- vendor/pest/src/iterators/flat_pairs.rs | 40 ++++- vendor/pest/src/iterators/pair.rs | 32 ++-- vendor/pest/src/iterators/pairs.rs | 215 ++++++++++++++++++++++++++- vendor/pest/src/iterators/queueable_token.rs | 5 +- vendor/pest/src/iterators/tokens.rs | 38 ++++- vendor/pest/src/parser_state.rs | 45 +++++- vendor/pest/src/pratt_parser.rs | 4 +- vendor/pest/src/unicode/mod.rs | 1 + 13 files changed, 408 insertions(+), 52 deletions(-) (limited to 'vendor/pest') diff --git a/vendor/pest/.cargo-checksum.json b/vendor/pest/.cargo-checksum.json index d757c410a..ef140a3d2 100644 --- a/vendor/pest/.cargo-checksum.json +++ b/vendor/pest/.cargo-checksum.json @@ -1 +1 @@ -{"files":{"Cargo.lock":"0136178e8376039afab5a8cb6987cb1f67f3b0dcd65baa0466fc5171c17695a8","Cargo.toml":"fdfa43fbe47568456de975a22602558fb1b7c31402c79198e25577400721ceb4","LICENSE-APACHE":"a60eea817514531668d7e00765731449fe14d059d3249e0bc93b36de45f759f2","LICENSE-MIT":"23f18e03dc49df91622fe2a76176497404e46ced8a715d9d2b67a7446571cca3","_README.md":"647dbcab0b7ed9837cc066cb3731aa2db7fbecb0e9aa187995a0a7a03af3938d","examples/parens.rs":"2b022b5ed1fbc9b41a4359e3b79648317415217768f88234c3e3efc0db34bcce","src/error.rs":"7db24da89a3811cc4372a04cbc2dcfb7642a0ccf2702cfe5c027cfa45cb18ebd","src/iterators/flat_pairs.rs":"2ff507a2db033fed38711cbe5fa9a72f115ff24c427a94aa3dd2725c736fcea0","src/iterators/line_index.rs":"19729b5da43527a52910908d67f406f137bced5ab535e879a26032f5b97f9f48","src/iterators/mod.rs":"b8ac4e586c7b49403c694ceded242a9ed9c499d995e3db2df19b27aaff3d4e4d","src/iterators/pair.rs":"dbe805e6aba154c9275ae7beb70c7508acb426daad688dd5f26ab39a0b27ca97","src/iterators/pairs.rs":"5245b5f7c2997e34874114e57c008db9f51519d7cd32a3a0c9729bc9a37bf795","src/iterators/queueable_token.rs":"b6c4fd1d43437ea7c6cab45717c1fb66f71fc91aaf3ed9cf9f5686fd8ccfa8f9","src/iterators/tokens.rs":"487eb495f3713df56ae33fb664624f3d396d7f6d4aa872ede33413d561c5d21c","src/lib.rs":"ceecf1e44c6e4cb44383d9e41d93d6744d6924d41d6fd2826757415ca9dea2d4","src/macros.rs":"95637b87ecad9db34e3b23985d32ef56a0ea1f99f729281f9c2afe8057d0ea61","src/parser.rs":"3c453a737eac42b113c91dba459c49d8417fcc46aa2d8d3d3297d29de079cb42","src/parser_state.rs":"91cc987cf6fdbf449969a4ee3818990e3c26d9423c384b5c1cac5849784b8d2e","src/position.rs":"b0418c3db0836d44429d2d4659bf1335150ab22251f0205cb23e38449d2ed8b5","src/pratt_parser.rs":"d2df2a965e1dab123a1a2d3065c4c2c37c5b96d607464be442534bde86face31","src/prec_climber.rs":"e068f7abe105d39f56ba057e32c32450793b4180d243640cb1008c61f588ce48","src/span.rs":"ec467df8a54982bf747c55c43f825c349787353409bac1e6cddde461f8dfae26","src/stack.rs":"97b6fe59e60941c1388fa5fecc2a4fbf78e1cc7f1a3a7c4848cb68ae9d187628","src/token.rs":"8c3a09c7f9a5c059f94900cd8c16b3eb17f893154f395e2f1cfa1a472cfd2ba6","src/unicode/binary.rs":"16dbd15ca792f80ce470f58480dd3f69a71785f781e6bd83eb49a6763ab13e09","src/unicode/category.rs":"6d9ed866aceb905f30b79248d46820762674ffc67661df3cbf4a2d1d7d3aa374","src/unicode/mod.rs":"e45485c756fa9205571e4c5ba7c3a86793dc8868c2761ba3991abcc059ff5907","src/unicode/script.rs":"50fa653d154d00925119f836796d18249997b57655188f3c87dc3ddadd74df43","tests/calculator.rs":"b6045cf150ed62c14d848c8f439065e943f68efd088f62f5ff8fedceff435d7a","tests/json.rs":"74d222d9255d211d835e8215a74a002ba1ce8259d0d1106dc63c2bfd4c087ee5"},"package":"7b1403e8401ad5dedea73c626b99758535b342502f8d1e361f4a2dd952749122"} \ No newline at end of file +{"files":{"Cargo.lock":"e66c2b707181d1aa46a41b24b8e0ba499f740739d21bc135969ebfd36d32bf31","Cargo.toml":"5a344ae3873819879cdb2fcc53c53bb4136bb938aaa691c89a9f4d9170e0edc4","LICENSE-APACHE":"a60eea817514531668d7e00765731449fe14d059d3249e0bc93b36de45f759f2","LICENSE-MIT":"23f18e03dc49df91622fe2a76176497404e46ced8a715d9d2b67a7446571cca3","_README.md":"b49ff086f7c9e37c8bb432f445168061d74bdaddf713d968b15f5316f940f285","examples/parens.rs":"2b022b5ed1fbc9b41a4359e3b79648317415217768f88234c3e3efc0db34bcce","src/error.rs":"951470a3a78a9297d73f9f27c8232a7d067dfc50f9fc8c08794742c5bceb7de9","src/iterators/flat_pairs.rs":"9c9c284d06bdd55ff8461cb62b93b22d9f4f67f22808ac7b40041bf9fd28d9ab","src/iterators/line_index.rs":"19729b5da43527a52910908d67f406f137bced5ab535e879a26032f5b97f9f48","src/iterators/mod.rs":"b8ac4e586c7b49403c694ceded242a9ed9c499d995e3db2df19b27aaff3d4e4d","src/iterators/pair.rs":"5f5996a95cf98151e0458a58cc713fc695454d695744902c26ce2452acfc5ddb","src/iterators/pairs.rs":"5db30a2dcac2897fa37b8bad05f0913a2ef7979a0f474951fd638d91158bc26e","src/iterators/queueable_token.rs":"50b126c8d5cc68008c938420f8e3d1adcf401a85a64f3deed15f18cab5df9329","src/iterators/tokens.rs":"19060114c6b4f61da67a46e4e9cdb7b4ab8e91d55c0d11e26f1a5d54986f634b","src/lib.rs":"ceecf1e44c6e4cb44383d9e41d93d6744d6924d41d6fd2826757415ca9dea2d4","src/macros.rs":"95637b87ecad9db34e3b23985d32ef56a0ea1f99f729281f9c2afe8057d0ea61","src/parser.rs":"3c453a737eac42b113c91dba459c49d8417fcc46aa2d8d3d3297d29de079cb42","src/parser_state.rs":"064f676428d306b5a351ca044c0ab70e581500fe54c6e9d14c6ec0654220ea8a","src/position.rs":"b0418c3db0836d44429d2d4659bf1335150ab22251f0205cb23e38449d2ed8b5","src/pratt_parser.rs":"81840653126031e4f069fab09a6d0773c59defc4c9686f4c50a7d08dcb52251f","src/prec_climber.rs":"e068f7abe105d39f56ba057e32c32450793b4180d243640cb1008c61f588ce48","src/span.rs":"ec467df8a54982bf747c55c43f825c349787353409bac1e6cddde461f8dfae26","src/stack.rs":"97b6fe59e60941c1388fa5fecc2a4fbf78e1cc7f1a3a7c4848cb68ae9d187628","src/token.rs":"8c3a09c7f9a5c059f94900cd8c16b3eb17f893154f395e2f1cfa1a472cfd2ba6","src/unicode/binary.rs":"16dbd15ca792f80ce470f58480dd3f69a71785f781e6bd83eb49a6763ab13e09","src/unicode/category.rs":"6d9ed866aceb905f30b79248d46820762674ffc67661df3cbf4a2d1d7d3aa374","src/unicode/mod.rs":"99ab177391d588f821e2da6fba9fdae9a3a01b882ed93893d50b36cd61f8708f","src/unicode/script.rs":"50fa653d154d00925119f836796d18249997b57655188f3c87dc3ddadd74df43","tests/calculator.rs":"b6045cf150ed62c14d848c8f439065e943f68efd088f62f5ff8fedceff435d7a","tests/json.rs":"74d222d9255d211d835e8215a74a002ba1ce8259d0d1106dc63c2bfd4c087ee5"},"package":"f73935e4d55e2abf7f130186537b19e7a4abc886a0252380b59248af473a3fc9"} \ No newline at end of file diff --git a/vendor/pest/Cargo.lock b/vendor/pest/Cargo.lock index 4014d8a18..7a7d3f194 100644 --- a/vendor/pest/Cargo.lock +++ b/vendor/pest/Cargo.lock @@ -16,7 +16,7 @@ checksum = "2dffe52ecf27772e601905b7522cb4ef790d2cc203488bbd0e2fe85fcb74566d" [[package]] name = "pest" -version = "2.5.7" +version = "2.7.0" dependencies = [ "memchr", "serde", @@ -27,18 +27,18 @@ dependencies = [ [[package]] name = "proc-macro2" -version = "1.0.54" +version = "1.0.60" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "e472a104799c74b514a57226160104aa483546de37e839ec50e3c2e41dd87534" +checksum = "dec2b086b7a862cf4de201096214fa870344cf922b2b30c167badb3af3195406" dependencies = [ "unicode-ident", ] [[package]] name = "quote" -version = "1.0.26" +version = "1.0.28" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "4424af4bf778aae2051a77b60283332f386554255d722233d09fbfc7e30da2fc" +checksum = "1b9ab9c7eadfd8df19006f1cf1a4aed13540ed5cbc047010ece5826e10825488" dependencies = [ "proc-macro2", ] @@ -51,15 +51,15 @@ checksum = "f91339c0467de62360649f8d3e185ca8de4224ff281f66000de5eb2a77a79041" [[package]] name = "serde" -version = "1.0.159" +version = "1.0.164" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "3c04e8343c3daeec41f58990b9d77068df31209f2af111e059e9fe9646693065" +checksum = "9e8c8cf938e98f769bc164923b06dce91cea1751522f46f8466461af04c9027d" [[package]] name = "serde_json" -version = "1.0.95" +version = "1.0.97" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "d721eca97ac802aa7777b701877c8004d950fc142651367300d21c1cc0194744" +checksum = "bdf3bf93142acad5821c99197022e170842cdbc1c30482b98750c688c640842a" dependencies = [ "itoa", "ryu", @@ -68,9 +68,9 @@ dependencies = [ [[package]] name = "syn" -version = "2.0.12" +version = "2.0.18" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "79d9531f94112cfc3e4c8f5f02cb2b58f72c97b7efd85f70203cc6d8efda5927" +checksum = "32d41677bcbe24c20c52e7c70b0d8db04134c5d1066bf98662e2871ad200ea3e" dependencies = [ "proc-macro2", "quote", @@ -105,6 +105,6 @@ checksum = "9e79c4d996edb816c91e4308506774452e55e95c3c9de07b6729e17e15a5ef81" [[package]] name = "unicode-ident" -version = "1.0.8" +version = "1.0.9" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "e5464a87b239f13a63a501f2701565754bae92d243d4bb7eb12f6d57d2269bf4" +checksum = "b15811caf2415fb889178633e7724bad2509101cde276048e013b9def5e51fa0" diff --git a/vendor/pest/Cargo.toml b/vendor/pest/Cargo.toml index 9b477863b..2358ffbff 100644 --- a/vendor/pest/Cargo.toml +++ b/vendor/pest/Cargo.toml @@ -11,9 +11,9 @@ [package] edition = "2021" -rust-version = "1.56" +rust-version = "1.60" name = "pest" -version = "2.5.7" +version = "2.7.0" authors = ["Dragoș Tiselice "] description = "The Elegant Parser" homepage = "https://pest.rs/" @@ -53,10 +53,10 @@ default-features = false const_prec_climber = [] default = ["std"] pretty-print = [ - "serde", - "serde_json", + "dep:serde", + "dep:serde_json", ] std = [ "ucd-trie/std", - "thiserror", + "dep:thiserror", ] diff --git a/vendor/pest/_README.md b/vendor/pest/_README.md index cb8055eb0..2bfc5664a 100644 --- a/vendor/pest/_README.md +++ b/vendor/pest/_README.md @@ -11,7 +11,7 @@ [![pest Continuous Integration](https://github.com/pest-parser/pest/actions/workflows/ci.yml/badge.svg)](https://github.com/pest-parser/pest/actions/workflows/ci.yml) [![codecov](https://codecov.io/gh/pest-parser/pest/branch/master/graph/badge.svg)](https://codecov.io/gh/pest-parser/pest) -Rustc Version 1.56.1+ +Rustc Version 1.60.0+ [![Crates.io](https://img.shields.io/crates/d/pest.svg)](https://crates.io/crates/pest) [![Crates.io](https://img.shields.io/crates/v/pest.svg)](https://crates.io/crates/pest) @@ -199,10 +199,11 @@ You can find more projects and ecosystem tools in the [awesome-pest](https://git * [qubit](https://github.com/abhimanyu003/qubit) * [caith](https://github.com/Geobert/caith) (a dice roller crate) * [Melody](https://github.com/yoav-lavi/melody) +* [json5-nodes](https://github.com/jlyonsmith/json5-nodes) ## Minimum Supported Rust Version (MSRV) -This library should always compile with default features on **Rust 1.56.1** +This library should always compile with default features on **Rust 1.60.0** or **Rust 1.61** with `const_prec_climber`. ## no_std support diff --git a/vendor/pest/src/error.rs b/vendor/pest/src/error.rs index eef004233..df3f5448e 100644 --- a/vendor/pest/src/error.rs +++ b/vendor/pest/src/error.rs @@ -74,6 +74,19 @@ pub enum LineColLocation { Span((usize, usize), (usize, usize)), } +impl From> for LineColLocation { + fn from(value: Position<'_>) -> Self { + Self::Pos(value.line_col()) + } +} + +impl From> for LineColLocation { + fn from(value: Span<'_>) -> Self { + let (start, end) = value.split(); + Self::Span(start.line_col(), end.line_col()) + } +} + impl Error { /// Creates `Error` from `ErrorVariant` and `Position`. /// @@ -418,7 +431,7 @@ impl Error { .unwrap_or_default(); let pair = (self.line_col.clone(), &self.continued_line); - if let (LineColLocation::Span(_, end), &Some(ref continued_line)) = pair { + if let (LineColLocation::Span(_, end), Some(ref continued_line)) = pair { let has_line_gap = end.0 - self.start().0 > 1; if has_line_gap { format!( @@ -892,4 +905,26 @@ mod tests { .join("\n") ); } + + #[test] + fn pos_to_lcl_conversion() { + let input = "input"; + + let pos = Position::new(input, 2).unwrap(); + + assert_eq!(LineColLocation::Pos(pos.line_col()), pos.into()); + } + + #[test] + fn span_to_lcl_conversion() { + let input = "input"; + + let span = Span::new(input, 2, 4).unwrap(); + let (start, end) = span.split(); + + assert_eq!( + LineColLocation::Span(start.line_col(), end.line_col()), + span.into() + ); + } } diff --git a/vendor/pest/src/iterators/flat_pairs.rs b/vendor/pest/src/iterators/flat_pairs.rs index 52a207406..9b92f557a 100644 --- a/vendor/pest/src/iterators/flat_pairs.rs +++ b/vendor/pest/src/iterators/flat_pairs.rs @@ -25,7 +25,7 @@ pub struct FlatPairs<'i, R> { /// # Safety /// /// All `QueueableToken`s' `input_pos` must be valid character boundary indices into `input`. - queue: Rc>>, + queue: Rc>>, input: &'i str, start: usize, end: usize, @@ -35,12 +35,12 @@ pub struct FlatPairs<'i, R> { /// # Safety /// /// All `QueueableToken`s' `input_pos` must be valid character boundary indices into `input`. -pub unsafe fn new( - queue: Rc>>, - input: &str, +pub unsafe fn new<'i, R: RuleType>( + queue: Rc>>, + input: &'i str, start: usize, end: usize, -) -> FlatPairs<'_, R> { +) -> FlatPairs<'i, R> { FlatPairs { queue, input, @@ -102,6 +102,13 @@ impl<'i, R: RuleType> FlatPairs<'i, R> { } } +impl<'i, R: RuleType> ExactSizeIterator for FlatPairs<'i, R> { + fn len(&self) -> usize { + // Tokens len is exactly twice as flatten pairs len + (self.end - self.start) >> 1 + } +} + impl<'i, R: RuleType> Iterator for FlatPairs<'i, R> { type Item = Pair<'i, R>; @@ -122,6 +129,11 @@ impl<'i, R: RuleType> Iterator for FlatPairs<'i, R> { Some(pair) } + + fn size_hint(&self) -> (usize, Option) { + let len = ::len(self); + (len, Some(len)) + } } impl<'i, R: RuleType> DoubleEndedIterator for FlatPairs<'i, R> { @@ -214,4 +226,22 @@ mod tests { assert_eq!(pair.line_col(), (1, 5)); assert_eq!(pair.line_col(), pair.as_span().start_pos().line_col()); } + + #[test] + fn exact_size_iter_for_pairs() { + let pairs = AbcParser::parse(Rule::a, "abc\nefgh").unwrap().flatten(); + assert_eq!(pairs.len(), pairs.count()); + + let pairs = AbcParser::parse(Rule::a, "我很漂亮efgh").unwrap().flatten(); + assert_eq!(pairs.len(), pairs.count()); + + let pairs = AbcParser::parse(Rule::a, "abc\nefgh").unwrap().flatten(); + let pairs = pairs.rev(); + assert_eq!(pairs.len(), pairs.count()); + + let mut pairs = AbcParser::parse(Rule::a, "abc\nefgh").unwrap().flatten(); + let pairs_len = pairs.len(); + let _ = pairs.next().unwrap(); + assert_eq!(pairs.count() + 1, pairs_len); + } } diff --git a/vendor/pest/src/iterators/pair.rs b/vendor/pest/src/iterators/pair.rs index 891b90595..f1b311a40 100644 --- a/vendor/pest/src/iterators/pair.rs +++ b/vendor/pest/src/iterators/pair.rs @@ -12,6 +12,7 @@ use alloc::rc::Rc; #[cfg(feature = "pretty-print")] use alloc::string::String; use alloc::vec::Vec; +use core::borrow::Borrow; use core::fmt; use core::hash::{Hash, Hasher}; use core::ptr; @@ -40,7 +41,7 @@ pub struct Pair<'i, R> { /// # Safety /// /// All `QueueableToken`s' `input_pos` must be valid character boundary indices into `input`. - queue: Rc>>, + queue: Rc>>, input: &'i str, /// Token index into `queue`. start: usize, @@ -50,12 +51,12 @@ pub struct Pair<'i, R> { /// # Safety /// /// All `QueueableToken`s' `input_pos` must be valid character boundary indices into `input`. -pub unsafe fn new( - queue: Rc>>, - input: &str, +pub unsafe fn new<'i, R: RuleType>( + queue: Rc>>, + input: &'i str, line_index: Rc, start: usize, -) -> Pair<'_, R> { +) -> Pair<'i, R> { Pair { queue, input, @@ -181,6 +182,15 @@ impl<'i, R: RuleType> Pair<'i, R> { unsafe { span::Span::new_unchecked(self.input, start, end) } } + /// Get current node tag + #[inline] + pub fn as_node_tag(&self) -> Option<&str> { + match &self.queue[self.pair()] { + QueueableToken::End { tag, .. } => tag.as_ref().map(|x| x.borrow()), + _ => None, + } + } + /// Returns the inner `Pairs` between the `Pair`, consuming it. /// /// # Examples @@ -291,9 +301,13 @@ impl<'i, R: RuleType> Pairs<'i, R> { impl<'i, R: RuleType> fmt::Debug for Pair<'i, R> { fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { - f.debug_struct("Pair") - .field("rule", &self.as_rule()) - .field("span", &self.as_span()) + let pair = &mut f.debug_struct("Pair"); + pair.field("rule", &self.as_rule()); + // In order not to break compatibility + if let Some(s) = self.as_node_tag() { + pair.field("node_tag", &s); + } + pair.field("span", &self.as_span()) .field("inner", &self.clone().into_inner().collect::>()) .finish() } @@ -336,7 +350,7 @@ impl<'i, R: Eq> Eq for Pair<'i, R> {} impl<'i, R: Hash> Hash for Pair<'i, R> { fn hash(&self, state: &mut H) { - (&*self.queue as *const Vec>).hash(state); + (&*self.queue as *const Vec>).hash(state); (self.input as *const str).hash(state); self.start.hash(state); } diff --git a/vendor/pest/src/iterators/pairs.rs b/vendor/pest/src/iterators/pairs.rs index c21a7fae1..ab7df75ee 100644 --- a/vendor/pest/src/iterators/pairs.rs +++ b/vendor/pest/src/iterators/pairs.rs @@ -13,6 +13,7 @@ use alloc::string::String; use alloc::vec::Vec; use core::fmt; use core::hash::{Hash, Hasher}; +use core::iter::Filter; use core::ptr; use core::str; @@ -33,30 +34,44 @@ use crate::RuleType; /// [`Pair::into_inner`]: struct.Pair.html#method.into_inner #[derive(Clone)] pub struct Pairs<'i, R> { - queue: Rc>>, + queue: Rc>>, input: &'i str, start: usize, end: usize, + pairs_count: usize, line_index: Rc, } -pub fn new( - queue: Rc>>, - input: &str, +pub fn new<'i, R: RuleType>( + queue: Rc>>, + input: &'i str, line_index: Option>, start: usize, end: usize, -) -> Pairs<'_, R> { +) -> Pairs<'i, R> { let line_index = match line_index { Some(line_index) => line_index, None => Rc::new(LineIndex::new(input)), }; + let mut pairs_count = 0; + let mut cursor = start; + while cursor < end { + cursor = match queue[cursor] { + QueueableToken::Start { + end_token_index, .. + } => end_token_index, + _ => unreachable!(), + } + 1; + pairs_count += 1; + } + Pairs { queue, input, start, end, + pairs_count, line_index, } } @@ -159,6 +174,114 @@ impl<'i, R: RuleType> Pairs<'i, R> { unsafe { flat_pairs::new(self.queue, self.input, self.start, self.end) } } + /// Finds the first pair that has its node or branch tagged with the provided + /// label. + /// + /// # Examples + /// + /// Try to recognize the branch between add and mul + /// ``` + /// use pest::{state, ParseResult, ParserState}; + /// #[allow(non_camel_case_types)] + /// #[derive(Clone, Copy, Debug, Eq, Hash, Ord, PartialEq, PartialOrd)] + /// enum Rule { + /// number, // 0..9 + /// add, // num + num + /// mul, // num * num + /// } + /// fn mark_branch( + /// state: Box>, + /// ) -> ParseResult>> { + /// expr(state, Rule::mul, "*") + /// .and_then(|state| state.tag_node(std::borrow::Cow::Borrowed("mul"))) + /// .or_else(|state| expr(state, Rule::add, "+")) + /// .and_then(|state| state.tag_node(std::borrow::Cow::Borrowed("add"))) + /// } + /// fn expr<'a>( + /// state: Box>, + /// r: Rule, + /// o: &'static str, + /// ) -> ParseResult>> { + /// state.rule(r, |state| { + /// state.sequence(|state| { + /// number(state) + /// .and_then(|state| state.tag_node(std::borrow::Cow::Borrowed("lhs"))) + /// .and_then(|state| state.match_string(o)) + /// .and_then(number) + /// .and_then(|state| state.tag_node(std::borrow::Cow::Borrowed("rhs"))) + /// }) + /// }) + /// } + /// fn number(state: Box>) -> ParseResult>> { + /// state.rule(Rule::number, |state| state.match_range('0'..'9')) + /// } + /// let input = "1+2"; + /// let pairs = state(input, mark_branch).unwrap(); + /// assert_eq!(pairs.find_first_tagged("add").unwrap().as_rule(), Rule::add); + /// assert_eq!(pairs.find_first_tagged("mul"), None); + /// ``` + #[inline] + pub fn find_first_tagged(&self, tag: &'i str) -> Option> { + self.clone().find_tagged(tag).next() + } + + /// Returns the iterator over pairs that have their node or branch tagged + /// with the provided label. + /// + /// # Examples + /// + /// Try to recognize the node between left and right hand side + /// ``` + /// use pest::{state, ParseResult, ParserState}; + /// #[allow(non_camel_case_types)] + /// #[derive(Clone, Copy, Debug, Eq, Hash, Ord, PartialEq, PartialOrd)] + /// enum Rule { + /// number, // 0..9 + /// add, // num + num + /// mul, // num * num + /// } + /// fn mark_branch( + /// state: Box>, + /// ) -> ParseResult>> { + /// expr(state, Rule::mul, "*") + /// .and_then(|state| state.tag_node(std::borrow::Cow::Borrowed("mul"))) + /// .or_else(|state| expr(state, Rule::add, "+")) + /// .and_then(|state| state.tag_node(std::borrow::Cow::Borrowed("add"))) + /// } + /// fn expr<'a>( + /// state: Box>, + /// r: Rule, + /// o: &'static str, + /// ) -> ParseResult>> { + /// state.rule(r, |state| { + /// state.sequence(|state| { + /// number(state) + /// .and_then(|state| state.tag_node(std::borrow::Cow::Borrowed("lhs"))) + /// .and_then(|state| state.match_string(o)) + /// .and_then(number) + /// .and_then(|state| state.tag_node(std::borrow::Cow::Borrowed("rhs"))) + /// }) + /// }) + /// } + /// fn number(state: Box>) -> ParseResult>> { + /// state.rule(Rule::number, |state| state.match_range('0'..'9')) + /// } + /// + /// let input = "1+2"; + /// let pairs = state(input, mark_branch).unwrap(); + /// let mut left_numbers = pairs.find_tagged("lhs"); + /// assert_eq!(left_numbers.next().unwrap().as_str(), "1"); + /// assert_eq!(left_numbers.next(), None); + /// ``` + #[inline] + pub fn find_tagged( + self, + tag: &'i str, + ) -> Filter, impl FnMut(&Pair<'i, R>) -> bool + '_> { + self.flatten() + .filter(move |pair: &Pair<'i, R>| matches!(pair.as_node_tag(), Some(nt) if nt == tag)) + } + /// Returns the `Tokens` for the `Pairs`. /// /// # Examples @@ -237,6 +360,13 @@ impl<'i, R: RuleType> Pairs<'i, R> { } } +impl<'i, R: RuleType> ExactSizeIterator for Pairs<'i, R> { + #[inline] + fn len(&self) -> usize { + self.pairs_count + } +} + impl<'i, R: RuleType> Iterator for Pairs<'i, R> { type Item = Pair<'i, R>; @@ -244,8 +374,14 @@ impl<'i, R: RuleType> Iterator for Pairs<'i, R> { let pair = self.peek()?; self.start = self.pair() + 1; + self.pairs_count -= 1; Some(pair) } + + fn size_hint(&self) -> (usize, Option) { + let len = ::len(self); + (len, Some(len)) + } } impl<'i, R: RuleType> DoubleEndedIterator for Pairs<'i, R> { @@ -255,6 +391,7 @@ impl<'i, R: RuleType> DoubleEndedIterator for Pairs<'i, R> { } self.end = self.pair_from_end(); + self.pairs_count -= 1; let pair = unsafe { pair::new( @@ -301,7 +438,7 @@ impl<'i, R: Eq> Eq for Pairs<'i, R> {} impl<'i, R: Hash> Hash for Pairs<'i, R> { fn hash(&self, state: &mut H) { - (&*self.queue as *const Vec>).hash(state); + (&*self.queue as *const Vec>).hash(state); (self.input as *const str).hash(state); self.start.hash(state); self.end.hash(state); @@ -330,6 +467,7 @@ mod tests { use super::super::super::macros::tests::*; use super::super::super::Parser; use alloc::borrow::ToOwned; + use alloc::boxed::Box; use alloc::format; use alloc::vec; use alloc::vec::Vec; @@ -479,4 +617,69 @@ mod tests { assert_eq!(pair.as_str(), "abc"); assert_eq!(pair.line_col(), (1, 1)); } + + #[test] + fn test_tag_node_branch() { + use crate::{state, ParseResult, ParserState}; + #[allow(non_camel_case_types)] + #[derive(Clone, Copy, Debug, Eq, Hash, Ord, PartialEq, PartialOrd)] + enum Rule { + number, // 0..9 + add, // num + num + mul, // num * num + } + fn mark_branch( + state: Box>, + ) -> ParseResult>> { + expr(state, Rule::mul, "*") + .and_then(|state| state.tag_node(alloc::borrow::Cow::Borrowed("mul"))) + .or_else(|state| expr(state, Rule::add, "+")) + .and_then(|state| state.tag_node(alloc::borrow::Cow::Borrowed("add"))) + } + fn expr<'a>( + state: Box>, + r: Rule, + o: &'static str, + ) -> ParseResult>> { + state.rule(r, |state| { + state.sequence(|state| { + number(state) + .and_then(|state| state.tag_node(alloc::borrow::Cow::Borrowed("lhs"))) + .and_then(|state| state.match_string(o)) + .and_then(number) + .and_then(|state| state.tag_node(alloc::borrow::Cow::Borrowed("rhs"))) + }) + }) + } + fn number(state: Box>) -> ParseResult>> { + state.rule(Rule::number, |state| state.match_range('0'..'9')) + } + let input = "1+2"; + let pairs = state(input, mark_branch).unwrap(); + assert_eq!(pairs.find_first_tagged("add").unwrap().as_rule(), Rule::add); + assert_eq!(pairs.find_first_tagged("mul"), None); + + let mut left_numbers = pairs.clone().find_tagged("lhs"); + + assert_eq!(left_numbers.next().unwrap().as_str(), "1"); + assert_eq!(left_numbers.next(), None); + let mut right_numbers = pairs.find_tagged("rhs"); + + assert_eq!(right_numbers.next().unwrap().as_str(), "2"); + assert_eq!(right_numbers.next(), None); + } + + #[test] + fn exact_size_iter_for_pairs() { + let pairs = AbcParser::parse(Rule::a, "abc\nefgh").unwrap(); + assert_eq!(pairs.len(), pairs.count()); + + let pairs = AbcParser::parse(Rule::a, "abc\nefgh").unwrap().rev(); + assert_eq!(pairs.len(), pairs.count()); + + let mut pairs = AbcParser::parse(Rule::a, "abc\nefgh").unwrap(); + let pairs_len = pairs.len(); + let _ = pairs.next().unwrap(); + assert_eq!(pairs.count() + 1, pairs_len); + } } diff --git a/vendor/pest/src/iterators/queueable_token.rs b/vendor/pest/src/iterators/queueable_token.rs index 7d56749bb..67426092b 100644 --- a/vendor/pest/src/iterators/queueable_token.rs +++ b/vendor/pest/src/iterators/queueable_token.rs @@ -7,6 +7,8 @@ // option. All files in the project carrying such notice may not be copied, // modified, or distributed except according to those terms. +use alloc::borrow::Cow; + // This structure serves to improve performance over Token objects in two ways: // // * it is smaller than a Token, leading to both less memory use when stored in the queue but also @@ -14,7 +16,7 @@ // * it finds its pair in O(1) time instead of O(N), since pair positions are known at parse time // and can easily be stored instead of recomputed #[derive(Debug)] -pub enum QueueableToken { +pub enum QueueableToken<'i, R> { Start { end_token_index: usize, input_pos: usize, @@ -22,6 +24,7 @@ pub enum QueueableToken { End { start_token_index: usize, rule: R, + tag: Option>, input_pos: usize, }, } diff --git a/vendor/pest/src/iterators/tokens.rs b/vendor/pest/src/iterators/tokens.rs index 0d462711e..41cbc472d 100644 --- a/vendor/pest/src/iterators/tokens.rs +++ b/vendor/pest/src/iterators/tokens.rs @@ -27,19 +27,19 @@ pub struct Tokens<'i, R> { /// # Safety: /// /// All `QueueableToken`s' `input_pos` must be valid character boundary indices into `input`. - queue: Rc>>, + queue: Rc>>, input: &'i str, start: usize, end: usize, } // TODO(safety): QueueableTokens must be valid indices into input. -pub fn new( - queue: Rc>>, - input: &str, +pub fn new<'i, R: RuleType>( + queue: Rc>>, + input: &'i str, start: usize, end: usize, -) -> Tokens<'_, R> { +) -> Tokens<'i, R> { if cfg!(debug_assertions) { for tok in queue.iter() { match *tok { @@ -92,6 +92,12 @@ impl<'i, R: RuleType> Tokens<'i, R> { } } +impl<'i, R: RuleType> ExactSizeIterator for Tokens<'i, R> { + fn len(&self) -> usize { + self.end - self.start + } +} + impl<'i, R: RuleType> Iterator for Tokens<'i, R> { type Item = Token<'i, R>; @@ -106,6 +112,11 @@ impl<'i, R: RuleType> Iterator for Tokens<'i, R> { Some(token) } + + fn size_hint(&self) -> (usize, Option) { + let len = ::len(self); + (len, Some(len)) + } } impl<'i, R: RuleType> DoubleEndedIterator for Tokens<'i, R> { @@ -143,4 +154,21 @@ mod tests { let reverse_tokens = pairs.tokens().rev().collect::>>(); assert_eq!(tokens, reverse_tokens); } + + #[test] + fn exact_size_iter_for_tokens() { + let tokens = AbcParser::parse(Rule::a, "abcde").unwrap().tokens(); + assert_eq!(tokens.len(), tokens.count()); + + let tokens = AbcParser::parse(Rule::a, "我很漂亮e").unwrap().tokens(); + assert_eq!(tokens.len(), tokens.count()); + + let tokens = AbcParser::parse(Rule::a, "abcde").unwrap().tokens().rev(); + assert_eq!(tokens.len(), tokens.count()); + + let mut tokens = AbcParser::parse(Rule::a, "abcde").unwrap().tokens(); + let tokens_len = tokens.len(); + let _ = tokens.next().unwrap(); + assert_eq!(tokens.count() + 1, tokens_len); + } } diff --git a/vendor/pest/src/parser_state.rs b/vendor/pest/src/parser_state.rs index f58de00c8..5a10b420b 100644 --- a/vendor/pest/src/parser_state.rs +++ b/vendor/pest/src/parser_state.rs @@ -7,7 +7,7 @@ // option. All files in the project carrying such notice may not be copied, // modified, or distributed except according to those terms. -use alloc::borrow::ToOwned; +use alloc::borrow::{Cow, ToOwned}; use alloc::boxed::Box; use alloc::rc::Rc; use alloc::vec; @@ -128,7 +128,7 @@ impl CallLimitTracker { #[derive(Debug)] pub struct ParserState<'i, R: RuleType> { position: Position<'i>, - queue: Vec>, + queue: Vec>, lookahead: Lookahead, pos_attempts: Vec, neg_attempts: Vec, @@ -345,6 +345,7 @@ impl<'i, R: RuleType> ParserState<'i, R> { new_state.queue.push(QueueableToken::End { start_token_index: index, rule, + tag: None, input_pos: new_pos, }); } @@ -373,6 +374,46 @@ impl<'i, R: RuleType> ParserState<'i, R> { } } + /// Tag current node + /// + /// # Examples + /// + /// Try to recognize the one specified in a set of characters + /// + /// ``` + /// use pest::{state, ParseResult, ParserState, iterators::Pair}; + /// #[allow(non_camel_case_types)] + /// #[derive(Clone, Copy, Debug, Eq, Hash, Ord, PartialEq, PartialOrd)] + /// enum Rule { + /// character, + /// } + /// fn mark_c(state: Box>) -> ParseResult>> { + /// state.sequence(|state| { + /// character(state) + /// .and_then(|state| character(state)) + /// .and_then(|state| character(state)) + /// .and_then(|state| state.tag_node(std::borrow::Cow::Borrowed("c"))) + /// .and_then(|state| character(state)) + /// }) + /// } + /// fn character(state: Box>) -> ParseResult>> { + /// state.rule(Rule::character, |state| state.match_range('a'..'z')) + /// } + /// + /// let input = "abcd"; + /// let pairs = state(input, mark_c).unwrap(); + /// // find all node tag as `c` + /// let find: Vec> = pairs.filter(|s| s.as_node_tag() == Some("c")).collect(); + /// assert_eq!(find[0].as_str(), "c") + /// ``` + #[inline] + pub fn tag_node(mut self: Box, tag: Cow<'i, str>) -> ParseResult> { + if let Some(QueueableToken::End { tag: old, .. }) = self.queue.last_mut() { + *old = Some(tag) + } + Ok(self) + } + fn attempts_at(&self, pos: usize) -> usize { if self.attempt_pos == pos { self.pos_attempts.len() + self.neg_attempts.len() diff --git a/vendor/pest/src/pratt_parser.rs b/vendor/pest/src/pratt_parser.rs index 76ffdf755..f042f8252 100644 --- a/vendor/pest/src/pratt_parser.rs +++ b/vendor/pest/src/pratt_parser.rs @@ -142,8 +142,8 @@ impl BitOr for Op { /// .op(Op::infix(Rule::add, Assoc::Left) | Op::infix(Rule::sub, Assoc::Left)) /// .op(Op::infix(Rule::mul, Assoc::Left) | Op::infix(Rule::div, Assoc::Left)) /// .op(Op::infix(Rule::pow, Assoc::Right)) -/// .op(Op::postfix(Rule::fac)) -/// .op(Op::prefix(Rule::neg)); +/// .op(Op::prefix(Rule::neg)) +/// .op(Op::postfix(Rule::fac)); /// ``` /// /// To parse an expression, call the [`map_primary`], [`map_prefix`], [`map_postfix`], diff --git a/vendor/pest/src/unicode/mod.rs b/vendor/pest/src/unicode/mod.rs index 8a56bd6e8..6d7cb1d69 100644 --- a/vendor/pest/src/unicode/mod.rs +++ b/vendor/pest/src/unicode/mod.rs @@ -56,6 +56,7 @@ char_property_functions! { ALPHABETIC, BIDI_CONTROL, CASE_IGNORABLE, CASED, CHANGES_WHEN_CASEFOLDED, CHANGES_WHEN_CASEMAPPED, CHANGES_WHEN_LOWERCASED, CHANGES_WHEN_TITLECASED, CHANGES_WHEN_UPPERCASED, DASH, DEFAULT_IGNORABLE_CODE_POINT, DEPRECATED, DIACRITIC, + EMOJI, EMOJI_COMPONENT, EMOJI_MODIFIER, EMOJI_MODIFIER_BASE, EMOJI_PRESENTATION, EXTENDED_PICTOGRAPHIC, EXTENDER, GRAPHEME_BASE, GRAPHEME_EXTEND, GRAPHEME_LINK, HEX_DIGIT, HYPHEN, IDS_BINARY_OPERATOR, IDS_TRINARY_OPERATOR, ID_CONTINUE, ID_START, IDEOGRAPHIC, JOIN_CONTROL, LOGICAL_ORDER_EXCEPTION, LOWERCASE, MATH, NONCHARACTER_CODE_POINT, OTHER_ALPHABETIC, -- cgit v1.2.3