diff options
Diffstat (limited to 'vendor/pest/src/position.rs')
-rw-r--r-- | vendor/pest/src/position.rs | 619 |
1 files changed, 619 insertions, 0 deletions
diff --git a/vendor/pest/src/position.rs b/vendor/pest/src/position.rs new file mode 100644 index 000000000..76e3383ba --- /dev/null +++ b/vendor/pest/src/position.rs @@ -0,0 +1,619 @@ +// pest. The Elegant Parser +// Copyright (c) 2018 Dragoș Tiselice +// +// Licensed under the Apache License, Version 2.0 +// <LICENSE-APACHE or http://www.apache.org/licenses/LICENSE-2.0> or the MIT +// license <LICENSE-MIT or http://opensource.org/licenses/MIT>, at your +// option. All files in the project carrying such notice may not be copied, +// modified, or distributed except according to those terms. + +use std::cmp::Ordering; +use std::fmt; +use std::hash::{Hash, Hasher}; +use std::ops::Range; +use std::ptr; +use std::str; + +use span; + +/// A cursor position in a `&str` which provides useful methods to manually parse that string. +#[derive(Clone)] +pub struct Position<'i> { + input: &'i str, + /// # Safety: + /// + /// `input[pos..]` must be a valid codepoint boundary (should not panic when indexing thus). + pos: usize, +} + +impl<'i> Position<'i> { + /// Create a new `Position` without checking invariants. (Checked with `debug_assertions`.) + /// + /// # Safety: + /// + /// `input[pos..]` must be a valid codepoint boundary (should not panic when indexing thus). + pub(crate) unsafe fn new_unchecked(input: &str, pos: usize) -> Position { + debug_assert!(input.get(pos..).is_some()); + Position { input, pos } + } + + /// Attempts to create a new `Position` at the given position. If the specified position is + /// an invalid index, or the specified position is not a valid UTF8 boundary, then None is + /// returned. + /// + /// # Examples + /// ``` + /// # use pest::Position; + /// let cheart = '💖'; + /// let heart = "💖"; + /// assert_eq!(Position::new(heart, 1), None); + /// assert_ne!(Position::new(heart, cheart.len_utf8()), None); + /// ``` + #[allow(clippy::new_ret_no_self)] + pub fn new(input: &str, pos: usize) -> Option<Position> { + input.get(pos..).map(|_| Position { input, pos }) + } + + /// Creates a `Position` at the start of a `&str`. + /// + /// # Examples + /// + /// ``` + /// # use pest::Position; + /// let start = Position::from_start(""); + /// assert_eq!(start.pos(), 0); + /// ``` + #[inline] + pub fn from_start(input: &'i str) -> Position<'i> { + // Position 0 is always safe because it's always a valid UTF-8 border. + Position { input, pos: 0 } + } + + /// Returns the byte position of this `Position` as a `usize`. + /// + /// # Examples + /// + /// ``` + /// # use pest::Position; + /// let input = "ab"; + /// let mut start = Position::from_start(input); + /// + /// assert_eq!(start.pos(), 0); + /// ``` + #[inline] + pub fn pos(&self) -> usize { + self.pos + } + + /// Creates a `Span` from two `Position`s. + /// + /// # Panics + /// + /// Panics if the positions come from different inputs. + /// + /// # Examples + /// + /// ``` + /// # use pest::Position; + /// let input = "ab"; + /// let start = Position::from_start(input); + /// let span = start.span(&start.clone()); + /// + /// assert_eq!(span.start(), 0); + /// assert_eq!(span.end(), 0); + /// ``` + #[inline] + pub fn span(&self, other: &Position<'i>) -> span::Span<'i> { + if ptr::eq(self.input, other.input) + /* && self.input.get(self.pos..other.pos).is_some() */ + { + // This is safe because the pos field of a Position should always be a valid str index. + unsafe { span::Span::new_unchecked(self.input, self.pos, other.pos) } + } else { + // TODO: maybe a panic if self.pos < other.pos + panic!("span created from positions from different inputs") + } + } + + /// Returns the line and column number of this `Position`. + /// + /// # Examples + /// + /// ``` + /// # use pest; + /// # #[allow(non_camel_case_types)] + /// # #[derive(Clone, Copy, Debug, Eq, Hash, Ord, PartialEq, PartialOrd)] + /// enum Rule {} + /// + /// let input = "\na"; + /// let mut state: Box<pest::ParserState<Rule>> = pest::ParserState::new(input); + /// let mut result = state.match_string("\na"); + /// assert!(result.is_ok()); + /// assert_eq!(result.unwrap().position().line_col(), (2, 2)); + /// ``` + #[inline] + pub fn line_col(&self) -> (usize, usize) { + if self.pos > self.input.len() { + panic!("position out of bounds"); + } + + let mut pos = self.pos; + // Position's pos is always a UTF-8 border. + let slice = &self.input[..pos]; + let mut chars = slice.chars().peekable(); + + let mut line_col = (1, 1); + + while pos != 0 { + match chars.next() { + Some('\r') => { + if let Some(&'\n') = chars.peek() { + chars.next(); + + if pos == 1 { + pos -= 1; + } else { + pos -= 2; + } + + line_col = (line_col.0 + 1, 1); + } else { + pos -= 1; + line_col = (line_col.0, line_col.1 + 1); + } + } + Some('\n') => { + pos -= 1; + line_col = (line_col.0 + 1, 1); + } + Some(c) => { + pos -= c.len_utf8(); + line_col = (line_col.0, line_col.1 + 1); + } + None => unreachable!(), + } + } + + line_col + } + + /// Returns the entire line of the input that contains this `Position`. + /// + /// # Examples + /// + /// ``` + /// # use pest; + /// # #[allow(non_camel_case_types)] + /// # #[derive(Clone, Copy, Debug, Eq, Hash, Ord, PartialEq, PartialOrd)] + /// enum Rule {} + /// + /// let input = "\na"; + /// let mut state: Box<pest::ParserState<Rule>> = pest::ParserState::new(input); + /// let mut result = state.match_string("\na"); + /// assert!(result.is_ok()); + /// assert_eq!(result.unwrap().position().line_of(), "a"); + /// ``` + #[inline] + pub fn line_of(&self) -> &'i str { + if self.pos > self.input.len() { + panic!("position out of bounds"); + }; + // Safe since start and end can only be valid UTF-8 borders. + &self.input[self.find_line_start()..self.find_line_end()] + } + + pub(crate) fn find_line_start(&self) -> usize { + if self.input.is_empty() { + return 0; + }; + // Position's pos is always a UTF-8 border. + let start = self + .input + .char_indices() + .rev() + .skip_while(|&(i, _)| i >= self.pos) + .find(|&(_, c)| c == '\n'); + match start { + Some((i, _)) => i + 1, + None => 0, + } + } + + pub(crate) fn find_line_end(&self) -> usize { + if self.input.is_empty() { + 0 + } else if self.pos == self.input.len() - 1 { + self.input.len() + } else { + // Position's pos is always a UTF-8 border. + let end = self + .input + .char_indices() + .skip_while(|&(i, _)| i < self.pos) + .find(|&(_, c)| c == '\n'); + match end { + Some((i, _)) => i + 1, + None => self.input.len(), + } + } + } + + /// Returns `true` when the `Position` points to the start of the input `&str`. + #[inline] + pub(crate) fn at_start(&self) -> bool { + self.pos == 0 + } + + /// Returns `true` when the `Position` points to the end of the input `&str`. + #[inline] + pub(crate) fn at_end(&self) -> bool { + self.pos == self.input.len() + } + + /// Skips `n` `char`s from the `Position` and returns `true` if the skip was possible or `false` + /// otherwise. If the return value is `false`, `pos` will not be updated. + #[inline] + pub(crate) fn skip(&mut self, n: usize) -> bool { + let skipped = { + let mut len = 0; + // Position's pos is always a UTF-8 border. + let mut chars = (&self.input[self.pos..]).chars(); + for _ in 0..n { + if let Some(c) = chars.next() { + len += c.len_utf8(); + } else { + return false; + } + } + len + }; + + self.pos += skipped; + true + } + + /// Goes back `n` `char`s from the `Position` and returns `true` if the skip was possible or `false` + /// otherwise. If the return value is `false`, `pos` will not be updated. + #[inline] + pub(crate) fn skip_back(&mut self, n: usize) -> bool { + let skipped = { + let mut len = 0; + // Position's pos is always a UTF-8 border. + let mut chars = (&self.input[..self.pos]).chars().rev(); + for _ in 0..n { + if let Some(c) = chars.next() { + len += c.len_utf8(); + } else { + return false; + } + } + len + }; + + self.pos -= skipped; + true + } + + /// Skips until one of the given `strings` is found. If none of the `strings` can be found, + /// this function will return `false` but its `pos` will *still* be updated. + #[inline] + pub(crate) fn skip_until(&mut self, strings: &[&str]) -> bool { + for from in self.pos..self.input.len() { + let bytes = if let Some(string) = self.input.get(from..) { + string.as_bytes() + } else { + continue; + }; + + for slice in strings.iter() { + let to = slice.len(); + if Some(slice.as_bytes()) == bytes.get(0..to) { + self.pos = from; + return true; + } + } + } + + self.pos = self.input.len(); + false + } + + /// Matches the char at the `Position` against a filter function and returns `true` if a match + /// was made. If no match was made, returns `false` and `pos` will not be updated. + #[inline] + pub(crate) fn match_char_by<F>(&mut self, f: F) -> bool + where + F: FnOnce(char) -> bool, + { + if let Some(c) = (&self.input[self.pos..]).chars().next() { + if f(c) { + self.pos += c.len_utf8(); + true + } else { + false + } + } else { + false + } + } + + /// Matches `string` from the `Position` and returns `true` if a match was made or `false` + /// otherwise. If no match was made, `pos` will not be updated. + #[inline] + pub(crate) fn match_string(&mut self, string: &str) -> bool { + let to = self.pos + string.len(); + + if Some(string.as_bytes()) == self.input.as_bytes().get(self.pos..to) { + self.pos = to; + true + } else { + false + } + } + + /// Case-insensitively matches `string` from the `Position` and returns `true` if a match was + /// made or `false` otherwise. If no match was made, `pos` will not be updated. + #[inline] + pub(crate) fn match_insensitive(&mut self, string: &str) -> bool { + let matched = { + let slice = &self.input[self.pos..]; + if let Some(slice) = slice.get(0..string.len()) { + slice.eq_ignore_ascii_case(string) + } else { + false + } + }; + + if matched { + self.pos += string.len(); + true + } else { + false + } + } + + /// Matches `char` `range` from the `Position` and returns `true` if a match was made or `false` + /// otherwise. If no match was made, `pos` will not be updated. + #[inline] + pub(crate) fn match_range(&mut self, range: Range<char>) -> bool { + if let Some(c) = (&self.input[self.pos..]).chars().next() { + if range.start <= c && c <= range.end { + self.pos += c.len_utf8(); + return true; + } + } + + false + } +} + +impl<'i> fmt::Debug for Position<'i> { + fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result { + f.debug_struct("Position").field("pos", &self.pos).finish() + } +} + +impl<'i> PartialEq for Position<'i> { + fn eq(&self, other: &Position<'i>) -> bool { + ptr::eq(self.input, other.input) && self.pos == other.pos + } +} + +impl<'i> Eq for Position<'i> {} + +impl<'i> PartialOrd for Position<'i> { + fn partial_cmp(&self, other: &Position<'i>) -> Option<Ordering> { + if ptr::eq(self.input, other.input) { + self.pos.partial_cmp(&other.pos) + } else { + None + } + } +} + +impl<'i> Ord for Position<'i> { + fn cmp(&self, other: &Position<'i>) -> Ordering { + self.partial_cmp(other) + .expect("cannot compare positions from different strs") + } +} + +impl<'i> Hash for Position<'i> { + fn hash<H: Hasher>(&self, state: &mut H) { + (self.input as *const str).hash(state); + self.pos.hash(state); + } +} + +#[cfg(test)] +mod tests { + use std::collections::HashSet; + + use super::*; + + #[test] + fn empty() { + let input = ""; + assert_eq!(Position::new(input, 0).unwrap().match_string(""), true); + assert_eq!(!Position::new(input, 0).unwrap().match_string("a"), true); + } + + #[test] + fn parts() { + let input = "asdasdf"; + + assert_eq!(Position::new(input, 0).unwrap().match_string("asd"), true); + assert_eq!(Position::new(input, 3).unwrap().match_string("asdf"), true); + } + + #[test] + fn line_col() { + let input = "a\rb\nc\r\nd嗨"; + + assert_eq!(Position::new(input, 0).unwrap().line_col(), (1, 1)); + assert_eq!(Position::new(input, 1).unwrap().line_col(), (1, 2)); + assert_eq!(Position::new(input, 2).unwrap().line_col(), (1, 3)); + assert_eq!(Position::new(input, 3).unwrap().line_col(), (1, 4)); + assert_eq!(Position::new(input, 4).unwrap().line_col(), (2, 1)); + assert_eq!(Position::new(input, 5).unwrap().line_col(), (2, 2)); + assert_eq!(Position::new(input, 6).unwrap().line_col(), (2, 3)); + assert_eq!(Position::new(input, 7).unwrap().line_col(), (3, 1)); + assert_eq!(Position::new(input, 8).unwrap().line_col(), (3, 2)); + assert_eq!(Position::new(input, 11).unwrap().line_col(), (3, 3)); + } + + #[test] + fn line_of() { + let input = "a\rb\nc\r\nd嗨"; + + assert_eq!(Position::new(input, 0).unwrap().line_of(), "a\rb\n"); + assert_eq!(Position::new(input, 1).unwrap().line_of(), "a\rb\n"); + assert_eq!(Position::new(input, 2).unwrap().line_of(), "a\rb\n"); + assert_eq!(Position::new(input, 3).unwrap().line_of(), "a\rb\n"); + assert_eq!(Position::new(input, 4).unwrap().line_of(), "c\r\n"); + assert_eq!(Position::new(input, 5).unwrap().line_of(), "c\r\n"); + assert_eq!(Position::new(input, 6).unwrap().line_of(), "c\r\n"); + assert_eq!(Position::new(input, 7).unwrap().line_of(), "d嗨"); + assert_eq!(Position::new(input, 8).unwrap().line_of(), "d嗨"); + assert_eq!(Position::new(input, 11).unwrap().line_of(), "d嗨"); + } + + #[test] + fn line_of_empty() { + let input = ""; + + assert_eq!(Position::new(input, 0).unwrap().line_of(), ""); + } + + #[test] + fn line_of_new_line() { + let input = "\n"; + + assert_eq!(Position::new(input, 0).unwrap().line_of(), "\n"); + } + + #[test] + fn line_of_between_new_line() { + let input = "\n\n"; + + assert_eq!(Position::new(input, 1).unwrap().line_of(), "\n"); + } + + fn measure_skip(input: &str, pos: usize, n: usize) -> Option<usize> { + let mut p = Position::new(input, pos).unwrap(); + if p.skip(n) { + Some(p.pos - pos) + } else { + None + } + } + + #[test] + fn skip_empty() { + let input = ""; + + assert_eq!(measure_skip(input, 0, 0), Some(0)); + assert_eq!(measure_skip(input, 0, 1), None); + } + + #[test] + fn skip() { + let input = "d嗨"; + + assert_eq!(measure_skip(input, 0, 0), Some(0)); + assert_eq!(measure_skip(input, 0, 1), Some(1)); + assert_eq!(measure_skip(input, 1, 1), Some(3)); + } + + #[test] + fn skip_until() { + let input = "ab ac"; + let pos = Position::from_start(input); + + let mut test_pos = pos.clone(); + test_pos.skip_until(&["a", "b"]); + assert_eq!(test_pos.pos(), 0); + + test_pos = pos.clone(); + test_pos.skip_until(&["b"]); + assert_eq!(test_pos.pos(), 1); + + test_pos = pos.clone(); + test_pos.skip_until(&["ab"]); + assert_eq!(test_pos.pos(), 0); + + test_pos = pos.clone(); + test_pos.skip_until(&["ac", "z"]); + assert_eq!(test_pos.pos(), 3); + + test_pos = pos.clone(); + assert!(!test_pos.skip_until(&["z"])); + assert_eq!(test_pos.pos(), 5); + } + + #[test] + fn match_range() { + let input = "b"; + + assert_eq!(Position::new(input, 0).unwrap().match_range('a'..'c'), true); + assert_eq!(Position::new(input, 0).unwrap().match_range('b'..'b'), true); + assert_eq!( + !Position::new(input, 0).unwrap().match_range('a'..'a'), + true + ); + assert_eq!( + !Position::new(input, 0).unwrap().match_range('c'..'c'), + true + ); + assert_eq!( + Position::new(input, 0).unwrap().match_range('a'..'嗨'), + true + ); + } + + #[test] + fn match_insensitive() { + let input = "AsdASdF"; + + assert_eq!( + Position::new(input, 0).unwrap().match_insensitive("asd"), + true + ); + assert_eq!( + Position::new(input, 3).unwrap().match_insensitive("asdf"), + true + ); + } + + #[test] + fn cmp() { + let input = "a"; + let start = Position::from_start(input); + let mut end = start.clone(); + + assert!(end.skip(1)); + let result = start.cmp(&end); + + assert_eq!(result, Ordering::Less); + } + + #[test] + #[should_panic] + fn cmp_panic() { + let input1 = "a"; + let input2 = "b"; + let pos1 = Position::from_start(input1); + let pos2 = Position::from_start(input2); + + pos1.cmp(&pos2); + } + + #[test] + fn hash() { + let input = "a"; + let start = Position::from_start(input); + let mut positions = HashSet::new(); + + positions.insert(start); + } +} |