diff options
Diffstat (limited to 'vendor/pest/src/iterators')
-rw-r--r-- | vendor/pest/src/iterators/flat_pairs.rs | 43 | ||||
-rw-r--r-- | vendor/pest/src/iterators/line_index.rs | 91 | ||||
-rw-r--r-- | vendor/pest/src/iterators/mod.rs | 1 | ||||
-rw-r--r-- | vendor/pest/src/iterators/pair.rs | 26 | ||||
-rw-r--r-- | vendor/pest/src/iterators/pairs.rs | 60 |
5 files changed, 214 insertions, 7 deletions
diff --git a/vendor/pest/src/iterators/flat_pairs.rs b/vendor/pest/src/iterators/flat_pairs.rs index 6d310272e..52a207406 100644 --- a/vendor/pest/src/iterators/flat_pairs.rs +++ b/vendor/pest/src/iterators/flat_pairs.rs @@ -11,6 +11,7 @@ use alloc::rc::Rc; use alloc::vec::Vec; use core::fmt; +use super::line_index::LineIndex; use super::pair::{self, Pair}; use super::queueable_token::QueueableToken; use super::tokens::{self, Tokens}; @@ -28,6 +29,7 @@ pub struct FlatPairs<'i, R> { input: &'i str, start: usize, end: usize, + line_index: Rc<LineIndex>, } /// # Safety @@ -42,6 +44,7 @@ pub unsafe fn new<R: RuleType>( FlatPairs { queue, input, + line_index: Rc::new(LineIndex::new(input)), start, end, } @@ -107,8 +110,14 @@ impl<'i, R: RuleType> Iterator for FlatPairs<'i, R> { return None; } - let pair = unsafe { pair::new(Rc::clone(&self.queue), self.input, self.start) }; - + let pair = unsafe { + pair::new( + Rc::clone(&self.queue), + self.input, + Rc::clone(&self.line_index), + self.start, + ) + }; self.next_start(); Some(pair) @@ -123,7 +132,14 @@ impl<'i, R: RuleType> DoubleEndedIterator for FlatPairs<'i, R> { self.next_start_from_end(); - let pair = unsafe { pair::new(Rc::clone(&self.queue), self.input, self.end) }; + let pair = unsafe { + pair::new( + Rc::clone(&self.queue), + self.input, + Rc::clone(&self.line_index), + self.end, + ) + }; Some(pair) } @@ -142,6 +158,7 @@ impl<'i, R: Clone> Clone for FlatPairs<'i, R> { FlatPairs { queue: Rc::clone(&self.queue), input: self.input, + line_index: Rc::clone(&self.line_index), start: self.start, end: self.end, } @@ -177,4 +194,24 @@ mod tests { vec![Rule::c, Rule::b, Rule::a] ); } + + #[test] + fn test_line_col() { + let mut pairs = AbcParser::parse(Rule::a, "abcNe\nabcde").unwrap().flatten(); + + let pair = pairs.next().unwrap(); + assert_eq!(pair.as_str(), "abc"); + assert_eq!(pair.line_col(), (1, 1)); + assert_eq!(pair.line_col(), pair.as_span().start_pos().line_col()); + + let pair = pairs.next().unwrap(); + assert_eq!(pair.as_str(), "b"); + assert_eq!(pair.line_col(), (1, 2)); + assert_eq!(pair.line_col(), pair.as_span().start_pos().line_col()); + + let pair = pairs.next().unwrap(); + assert_eq!(pair.as_str(), "e"); + assert_eq!(pair.line_col(), (1, 5)); + assert_eq!(pair.line_col(), pair.as_span().start_pos().line_col()); + } } diff --git a/vendor/pest/src/iterators/line_index.rs b/vendor/pest/src/iterators/line_index.rs new file mode 100644 index 000000000..54871e1bd --- /dev/null +++ b/vendor/pest/src/iterators/line_index.rs @@ -0,0 +1,91 @@ +//! `LineIndex` to make a line_offsets, each item is an byte offset (start from 0) of the beginning of the line. +//! +//! For example, the text: `"hello 你好\nworld"`, the line_offsets will store `[0, 13]`. +//! +//! Then `line_col` with a offset just need to find the line index by binary search. +//! +//! Inspired by rust-analyzer's `LineIndex`: +//! <https://github.com/rust-lang/rust/blob/1.67.0/src/tools/rust-analyzer/crates/ide-db/src/line_index.rs> +use alloc::vec::Vec; + +#[derive(Clone)] +pub struct LineIndex { + /// Offset (bytes) the the beginning of each line, zero-based + line_offsets: Vec<usize>, +} + +impl LineIndex { + pub fn new(text: &str) -> LineIndex { + let mut line_offsets: Vec<usize> = alloc::vec![0]; + + let mut offset = 0; + + for c in text.chars() { + offset += c.len_utf8(); + if c == '\n' { + line_offsets.push(offset); + } + } + + LineIndex { line_offsets } + } + + /// Returns (line, col) of pos. + /// + /// The pos is a byte offset, start from 0, e.g. "ab" is 2, "你好" is 6 + pub fn line_col(&self, input: &str, pos: usize) -> (usize, usize) { + let line = self.line_offsets.partition_point(|&it| it <= pos) - 1; + let first_offset = self.line_offsets[line]; + + // Get line str from original input, then we can get column offset + let line_str = &input[first_offset..pos]; + let col = line_str.chars().count(); + + (line + 1, col + 1) + } +} + +#[cfg(test)] +mod tests { + use super::*; + + #[allow(clippy::zero_prefixed_literal)] + #[test] + fn test_line_index() { + let text = "hello 你好 A🎈C\nworld"; + let table = [ + (00, 1, 1, 'h'), + (01, 1, 2, 'e'), + (02, 1, 3, 'l'), + (03, 1, 4, 'l'), + (04, 1, 5, 'o'), + (05, 1, 6, ' '), + (06, 1, 7, '你'), + (09, 1, 8, '好'), + (12, 1, 9, ' '), + (13, 1, 10, 'A'), + (14, 1, 11, '🎈'), + (18, 1, 12, 'C'), + (19, 1, 13, '\n'), + (20, 2, 1, 'w'), + (21, 2, 2, 'o'), + (22, 2, 3, 'r'), + (23, 2, 4, 'l'), + (24, 2, 5, 'd'), + ]; + + let index = LineIndex::new(text); + for &(offset, line, col, c) in table.iter() { + let res = index.line_col(text, offset); + assert_eq!( + (res.0, res.1), + (line, col), + "Expected: ({}, {}, {}, {:?})", + offset, + line, + col, + c + ); + } + } +} diff --git a/vendor/pest/src/iterators/mod.rs b/vendor/pest/src/iterators/mod.rs index 1a7896371..7f81019ac 100644 --- a/vendor/pest/src/iterators/mod.rs +++ b/vendor/pest/src/iterators/mod.rs @@ -10,6 +10,7 @@ //! Types and iterators for parser output. mod flat_pairs; +mod line_index; mod pair; pub(crate) mod pairs; mod queueable_token; diff --git a/vendor/pest/src/iterators/pair.rs b/vendor/pest/src/iterators/pair.rs index 62c95e037..891b90595 100644 --- a/vendor/pest/src/iterators/pair.rs +++ b/vendor/pest/src/iterators/pair.rs @@ -20,6 +20,7 @@ use core::str; #[cfg(feature = "pretty-print")] use serde::ser::SerializeStruct; +use super::line_index::LineIndex; use super::pairs::{self, Pairs}; use super::queueable_token::QueueableToken; use super::tokens::{self, Tokens}; @@ -43,6 +44,7 @@ pub struct Pair<'i, R> { input: &'i str, /// Token index into `queue`. start: usize, + line_index: Rc<LineIndex>, } /// # Safety @@ -51,12 +53,14 @@ pub struct Pair<'i, R> { pub unsafe fn new<R: RuleType>( queue: Rc<Vec<QueueableToken<R>>>, input: &str, + line_index: Rc<LineIndex>, start: usize, ) -> Pair<'_, R> { Pair { queue, input, start, + line_index, } } @@ -202,7 +206,13 @@ impl<'i, R: RuleType> Pair<'i, R> { pub fn into_inner(self) -> Pairs<'i, R> { let pair = self.pair(); - pairs::new(self.queue, self.input, self.start + 1, pair) + pairs::new( + self.queue, + self.input, + Some(self.line_index), + self.start + 1, + pair, + ) } /// Returns the `Tokens` for the `Pair`. @@ -241,6 +251,12 @@ impl<'i, R: RuleType> Pair<'i, R> { ::serde_json::to_string_pretty(self).expect("Failed to pretty-print Pair to json.") } + /// Returns the `line`, `col` of this pair start. + pub fn line_col(&self) -> (usize, usize) { + let pos = self.pos(self.start); + self.line_index.line_col(self.input, pos) + } + fn pair(&self) -> usize { match self.queue[self.start] { QueueableToken::Start { @@ -263,7 +279,13 @@ impl<'i, R: RuleType> Pairs<'i, R> { /// Create a new `Pairs` iterator containing just the single `Pair`. pub fn single(pair: Pair<'i, R>) -> Self { let end = pair.pair(); - pairs::new(pair.queue, pair.input, pair.start, end) + pairs::new( + pair.queue, + pair.input, + Some(pair.line_index), + pair.start, + end, + ) } } diff --git a/vendor/pest/src/iterators/pairs.rs b/vendor/pest/src/iterators/pairs.rs index e478cebf2..c21a7fae1 100644 --- a/vendor/pest/src/iterators/pairs.rs +++ b/vendor/pest/src/iterators/pairs.rs @@ -20,6 +20,7 @@ use core::str; use serde::ser::SerializeStruct; use super::flat_pairs::{self, FlatPairs}; +use super::line_index::LineIndex; use super::pair::{self, Pair}; use super::queueable_token::QueueableToken; use super::tokens::{self, Tokens}; @@ -36,19 +37,27 @@ pub struct Pairs<'i, R> { input: &'i str, start: usize, end: usize, + line_index: Rc<LineIndex>, } pub fn new<R: RuleType>( queue: Rc<Vec<QueueableToken<R>>>, input: &str, + line_index: Option<Rc<LineIndex>>, start: usize, end: usize, ) -> Pairs<'_, R> { + let line_index = match line_index { + Some(line_index) => line_index, + None => Rc::new(LineIndex::new(input)), + }; + Pairs { queue, input, start, end, + line_index, } } @@ -181,7 +190,14 @@ impl<'i, R: RuleType> Pairs<'i, R> { #[inline] pub fn peek(&self) -> Option<Pair<'i, R>> { if self.start < self.end { - Some(unsafe { pair::new(Rc::clone(&self.queue), self.input, self.start) }) + Some(unsafe { + pair::new( + Rc::clone(&self.queue), + self.input, + Rc::clone(&self.line_index), + self.start, + ) + }) } else { None } @@ -226,6 +242,7 @@ impl<'i, R: RuleType> Iterator for Pairs<'i, R> { fn next(&mut self) -> Option<Self::Item> { let pair = self.peek()?; + self.start = self.pair() + 1; Some(pair) } @@ -239,7 +256,14 @@ impl<'i, R: RuleType> DoubleEndedIterator for Pairs<'i, R> { self.end = self.pair_from_end(); - let pair = unsafe { pair::new(Rc::clone(&self.queue), self.input, self.end) }; + let pair = unsafe { + pair::new( + Rc::clone(&self.queue), + self.input, + Rc::clone(&self.line_index), + self.end, + ) + }; Some(pair) } @@ -423,4 +447,36 @@ mod tests { vec![Rule::c, Rule::a] ); } + + #[test] + fn test_line_col() { + let mut pairs = AbcParser::parse(Rule::a, "abc\nefgh").unwrap(); + let pair = pairs.next().unwrap(); + assert_eq!(pair.as_str(), "abc"); + assert_eq!(pair.line_col(), (1, 1)); + + let pair = pairs.next().unwrap(); + assert_eq!(pair.as_str(), "e"); + assert_eq!(pair.line_col(), (2, 1)); + + let pair = pairs.next().unwrap(); + assert_eq!(pair.as_str(), "fgh"); + assert_eq!(pair.line_col(), (2, 2)); + } + + #[test] + fn test_rev_iter_line_col() { + let mut pairs = AbcParser::parse(Rule::a, "abc\nefgh").unwrap().rev(); + let pair = pairs.next().unwrap(); + assert_eq!(pair.as_str(), "fgh"); + assert_eq!(pair.line_col(), (2, 2)); + + let pair = pairs.next().unwrap(); + assert_eq!(pair.as_str(), "e"); + assert_eq!(pair.line_col(), (2, 1)); + + let pair = pairs.next().unwrap(); + assert_eq!(pair.as_str(), "abc"); + assert_eq!(pair.line_col(), (1, 1)); + } } |