diff options
Diffstat (limited to 'vendor/pest/src/position.rs')
-rw-r--r-- | vendor/pest/src/position.rs | 176 |
1 files changed, 129 insertions, 47 deletions
diff --git a/vendor/pest/src/position.rs b/vendor/pest/src/position.rs index c76589856..f91f8291e 100644 --- a/vendor/pest/src/position.rs +++ b/vendor/pest/src/position.rs @@ -32,7 +32,7 @@ impl<'i> Position<'i> { /// # Safety: /// /// `input[pos..]` must be a valid codepoint boundary (should not panic when indexing thus). - pub(crate) unsafe fn new_unchecked(input: &str, pos: usize) -> Position { + pub(crate) unsafe fn new_unchecked(input: &str, pos: usize) -> Position<'_> { debug_assert!(input.get(pos..).is_some()); Position { input, pos } } @@ -49,7 +49,7 @@ impl<'i> Position<'i> { /// assert_eq!(Position::new(heart, 1), None); /// assert_ne!(Position::new(heart, cheart.len_utf8()), None); /// ``` - pub fn new(input: &str, pos: usize) -> Option<Position> { + pub fn new(input: &str, pos: usize) -> Option<Position<'_>> { input.get(pos..).map(|_| Position { input, pos }) } @@ -125,7 +125,7 @@ impl<'i> Position<'i> { /// enum Rule {} /// /// let input = "\na"; - /// let mut state: Box<pest::ParserState<Rule>> = pest::ParserState::new(input); + /// let mut state: Box<pest::ParserState<'_, Rule>> = pest::ParserState::new(input); /// let mut result = state.match_string("\na"); /// assert!(result.is_ok()); /// assert_eq!(result.unwrap().position().line_col(), (2, 2)); @@ -135,45 +135,14 @@ impl<'i> Position<'i> { if self.pos > self.input.len() { panic!("position out of bounds"); } - - let mut pos = self.pos; - // Position's pos is always a UTF-8 border. - let slice = &self.input[..pos]; - let mut chars = slice.chars().peekable(); - - let mut line_col = (1, 1); - - while pos != 0 { - match chars.next() { - Some('\r') => { - if let Some(&'\n') = chars.peek() { - chars.next(); - - if pos == 1 { - pos -= 1; - } else { - pos -= 2; - } - - line_col = (line_col.0 + 1, 1); - } else { - pos -= 1; - line_col = (line_col.0, line_col.1 + 1); - } - } - Some('\n') => { - pos -= 1; - line_col = (line_col.0 + 1, 1); - } - Some(c) => { - pos -= c.len_utf8(); - line_col = (line_col.0, line_col.1 + 1); - } - None => unreachable!(), - } + #[cfg(feature = "fast-line-col")] + { + fast_line_col(self.input, self.pos) + } + #[cfg(not(feature = "fast-line-col"))] + { + original_line_col(self.input, self.pos) } - - line_col } /// Returns the entire line of the input that contains this `Position`. @@ -187,7 +156,7 @@ impl<'i> Position<'i> { /// enum Rule {} /// /// let input = "\na"; - /// let mut state: Box<pest::ParserState<Rule>> = pest::ParserState::new(input); + /// let mut state: Box<pest::ParserState<'_, Rule>> = pest::ParserState::new(input); /// let mut result = state.match_string("\na"); /// assert!(result.is_ok()); /// assert_eq!(result.unwrap().position().line_of(), "a"); @@ -256,7 +225,7 @@ impl<'i> Position<'i> { let skipped = { let mut len = 0; // Position's pos is always a UTF-8 border. - let mut chars = (&self.input[self.pos..]).chars(); + let mut chars = self.input[self.pos..].chars(); for _ in 0..n { if let Some(c) = chars.next() { len += c.len_utf8(); @@ -278,7 +247,7 @@ impl<'i> Position<'i> { let skipped = { let mut len = 0; // Position's pos is always a UTF-8 border. - let mut chars = (&self.input[..self.pos]).chars().rev(); + let mut chars = self.input[..self.pos].chars().rev(); for _ in 0..n { if let Some(c) = chars.next() { len += c.len_utf8(); @@ -297,6 +266,60 @@ impl<'i> Position<'i> { /// this function will return `false` but its `pos` will *still* be updated. #[inline] pub(crate) fn skip_until(&mut self, strings: &[&str]) -> bool { + #[cfg(not(feature = "memchr"))] + { + self.skip_until_basic(strings) + } + #[cfg(feature = "memchr")] + { + match strings { + [] => (), + [s1] => { + if let Some(from) = + memchr::memmem::find(&self.input.as_bytes()[self.pos..], s1.as_bytes()) + { + self.pos += from; + return true; + } + } + [s1, s2] if !s1.is_empty() && !s2.is_empty() => { + let b1 = s1.as_bytes()[0]; + let b2 = s2.as_bytes()[0]; + let miter = memchr::memchr2_iter(b1, b2, &self.input.as_bytes()[self.pos..]); + for from in miter { + let start = &self.input[self.pos + from..]; + if start.starts_with(s1) || start.starts_with(s2) { + self.pos += from; + return true; + } + } + } + [s1, s2, s3] if !s1.is_empty() && !s2.is_empty() && s3.is_empty() => { + let b1 = s1.as_bytes()[0]; + let b2 = s2.as_bytes()[0]; + let b3 = s2.as_bytes()[0]; + let miter = + memchr::memchr3_iter(b1, b2, b3, &self.input.as_bytes()[self.pos..]); + for from in miter { + let start = &self.input[self.pos + from..]; + if start.starts_with(s1) || start.starts_with(s2) || start.starts_with(s3) { + self.pos += from; + return true; + } + } + } + _ => { + return self.skip_until_basic(strings); + } + } + self.pos = self.input.len(); + false + } + } + + #[inline] + fn skip_until_basic(&mut self, strings: &[&str]) -> bool { + // TODO: optimize with Aho-Corasick, e.g. https://crates.io/crates/daachorse? for from in self.pos..self.input.len() { let bytes = if let Some(string) = self.input.get(from..) { string.as_bytes() @@ -332,7 +355,7 @@ impl<'i> Position<'i> { where F: FnOnce(char) -> bool, { - if let Some(c) = (&self.input[self.pos..]).chars().next() { + if let Some(c) = self.input[self.pos..].chars().next() { if f(c) { self.pos += c.len_utf8(); true @@ -383,7 +406,7 @@ impl<'i> Position<'i> { /// otherwise. If no match was made, `pos` will not be updated. #[inline] pub(crate) fn match_range(&mut self, range: Range<char>) -> bool { - if let Some(c) = (&self.input[self.pos..]).chars().next() { + if let Some(c) = self.input[self.pos..].chars().next() { if range.start <= c && c <= range.end { self.pos += c.len_utf8(); return true; @@ -395,7 +418,7 @@ impl<'i> Position<'i> { } impl<'i> fmt::Debug for Position<'i> { - fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result { + fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { f.debug_struct("Position").field("pos", &self.pos).finish() } } @@ -432,6 +455,63 @@ impl<'i> Hash for Position<'i> { } } +#[inline] +#[cfg(not(feature = "fast-line-col"))] +fn original_line_col(input: &str, mut pos: usize) -> (usize, usize) { + // Position's pos is always a UTF-8 border. + let slice = &input[..pos]; + let mut chars = slice.chars().peekable(); + + let mut line_col = (1, 1); + + while pos != 0 { + match chars.next() { + Some('\r') => { + if let Some(&'\n') = chars.peek() { + chars.next(); + + if pos == 1 { + pos -= 1; + } else { + pos -= 2; + } + + line_col = (line_col.0 + 1, 1); + } else { + pos -= 1; + line_col = (line_col.0, line_col.1 + 1); + } + } + Some('\n') => { + pos -= 1; + line_col = (line_col.0 + 1, 1); + } + Some(c) => { + pos -= c.len_utf8(); + line_col = (line_col.0, line_col.1 + 1); + } + None => unreachable!(), + } + } + + line_col +} + +#[inline] +#[cfg(feature = "fast-line-col")] +fn fast_line_col(input: &str, pos: usize) -> (usize, usize) { + // Position's pos is always a UTF-8 border. + let slice = &input[..pos]; + + let prec_ln = memchr::memrchr(b'\n', slice.as_bytes()); + if let Some(prec_nl_pos) = prec_ln { + let lines = bytecount::count(slice[..=prec_nl_pos].as_bytes(), b'\n') + 1; + (lines, slice[prec_nl_pos..].chars().count()) + } else { + (1, slice.chars().count() + 1) + } +} + #[cfg(test)] mod tests { use super::*; @@ -465,6 +545,8 @@ mod tests { assert_eq!(Position::new(input, 7).unwrap().line_col(), (3, 1)); assert_eq!(Position::new(input, 8).unwrap().line_col(), (3, 2)); assert_eq!(Position::new(input, 11).unwrap().line_col(), (3, 3)); + let input = "abcd嗨"; + assert_eq!(Position::new(input, 7).unwrap().line_col(), (1, 6)); } #[test] |