summaryrefslogtreecommitdiffstats
path: root/vendor/pest/src/position.rs
diff options
context:
space:
mode:
Diffstat (limited to 'vendor/pest/src/position.rs')
-rw-r--r--vendor/pest/src/position.rs176
1 files changed, 129 insertions, 47 deletions
diff --git a/vendor/pest/src/position.rs b/vendor/pest/src/position.rs
index c76589856..f91f8291e 100644
--- a/vendor/pest/src/position.rs
+++ b/vendor/pest/src/position.rs
@@ -32,7 +32,7 @@ impl<'i> Position<'i> {
/// # Safety:
///
/// `input[pos..]` must be a valid codepoint boundary (should not panic when indexing thus).
- pub(crate) unsafe fn new_unchecked(input: &str, pos: usize) -> Position {
+ pub(crate) unsafe fn new_unchecked(input: &str, pos: usize) -> Position<'_> {
debug_assert!(input.get(pos..).is_some());
Position { input, pos }
}
@@ -49,7 +49,7 @@ impl<'i> Position<'i> {
/// assert_eq!(Position::new(heart, 1), None);
/// assert_ne!(Position::new(heart, cheart.len_utf8()), None);
/// ```
- pub fn new(input: &str, pos: usize) -> Option<Position> {
+ pub fn new(input: &str, pos: usize) -> Option<Position<'_>> {
input.get(pos..).map(|_| Position { input, pos })
}
@@ -125,7 +125,7 @@ impl<'i> Position<'i> {
/// enum Rule {}
///
/// let input = "\na";
- /// let mut state: Box<pest::ParserState<Rule>> = pest::ParserState::new(input);
+ /// let mut state: Box<pest::ParserState<'_, Rule>> = pest::ParserState::new(input);
/// let mut result = state.match_string("\na");
/// assert!(result.is_ok());
/// assert_eq!(result.unwrap().position().line_col(), (2, 2));
@@ -135,45 +135,14 @@ impl<'i> Position<'i> {
if self.pos > self.input.len() {
panic!("position out of bounds");
}
-
- let mut pos = self.pos;
- // Position's pos is always a UTF-8 border.
- let slice = &self.input[..pos];
- let mut chars = slice.chars().peekable();
-
- let mut line_col = (1, 1);
-
- while pos != 0 {
- match chars.next() {
- Some('\r') => {
- if let Some(&'\n') = chars.peek() {
- chars.next();
-
- if pos == 1 {
- pos -= 1;
- } else {
- pos -= 2;
- }
-
- line_col = (line_col.0 + 1, 1);
- } else {
- pos -= 1;
- line_col = (line_col.0, line_col.1 + 1);
- }
- }
- Some('\n') => {
- pos -= 1;
- line_col = (line_col.0 + 1, 1);
- }
- Some(c) => {
- pos -= c.len_utf8();
- line_col = (line_col.0, line_col.1 + 1);
- }
- None => unreachable!(),
- }
+ #[cfg(feature = "fast-line-col")]
+ {
+ fast_line_col(self.input, self.pos)
+ }
+ #[cfg(not(feature = "fast-line-col"))]
+ {
+ original_line_col(self.input, self.pos)
}
-
- line_col
}
/// Returns the entire line of the input that contains this `Position`.
@@ -187,7 +156,7 @@ impl<'i> Position<'i> {
/// enum Rule {}
///
/// let input = "\na";
- /// let mut state: Box<pest::ParserState<Rule>> = pest::ParserState::new(input);
+ /// let mut state: Box<pest::ParserState<'_, Rule>> = pest::ParserState::new(input);
/// let mut result = state.match_string("\na");
/// assert!(result.is_ok());
/// assert_eq!(result.unwrap().position().line_of(), "a");
@@ -256,7 +225,7 @@ impl<'i> Position<'i> {
let skipped = {
let mut len = 0;
// Position's pos is always a UTF-8 border.
- let mut chars = (&self.input[self.pos..]).chars();
+ let mut chars = self.input[self.pos..].chars();
for _ in 0..n {
if let Some(c) = chars.next() {
len += c.len_utf8();
@@ -278,7 +247,7 @@ impl<'i> Position<'i> {
let skipped = {
let mut len = 0;
// Position's pos is always a UTF-8 border.
- let mut chars = (&self.input[..self.pos]).chars().rev();
+ let mut chars = self.input[..self.pos].chars().rev();
for _ in 0..n {
if let Some(c) = chars.next() {
len += c.len_utf8();
@@ -297,6 +266,60 @@ impl<'i> Position<'i> {
/// this function will return `false` but its `pos` will *still* be updated.
#[inline]
pub(crate) fn skip_until(&mut self, strings: &[&str]) -> bool {
+ #[cfg(not(feature = "memchr"))]
+ {
+ self.skip_until_basic(strings)
+ }
+ #[cfg(feature = "memchr")]
+ {
+ match strings {
+ [] => (),
+ [s1] => {
+ if let Some(from) =
+ memchr::memmem::find(&self.input.as_bytes()[self.pos..], s1.as_bytes())
+ {
+ self.pos += from;
+ return true;
+ }
+ }
+ [s1, s2] if !s1.is_empty() && !s2.is_empty() => {
+ let b1 = s1.as_bytes()[0];
+ let b2 = s2.as_bytes()[0];
+ let miter = memchr::memchr2_iter(b1, b2, &self.input.as_bytes()[self.pos..]);
+ for from in miter {
+ let start = &self.input[self.pos + from..];
+ if start.starts_with(s1) || start.starts_with(s2) {
+ self.pos += from;
+ return true;
+ }
+ }
+ }
+ [s1, s2, s3] if !s1.is_empty() && !s2.is_empty() && s3.is_empty() => {
+ let b1 = s1.as_bytes()[0];
+ let b2 = s2.as_bytes()[0];
+ let b3 = s2.as_bytes()[0];
+ let miter =
+ memchr::memchr3_iter(b1, b2, b3, &self.input.as_bytes()[self.pos..]);
+ for from in miter {
+ let start = &self.input[self.pos + from..];
+ if start.starts_with(s1) || start.starts_with(s2) || start.starts_with(s3) {
+ self.pos += from;
+ return true;
+ }
+ }
+ }
+ _ => {
+ return self.skip_until_basic(strings);
+ }
+ }
+ self.pos = self.input.len();
+ false
+ }
+ }
+
+ #[inline]
+ fn skip_until_basic(&mut self, strings: &[&str]) -> bool {
+ // TODO: optimize with Aho-Corasick, e.g. https://crates.io/crates/daachorse?
for from in self.pos..self.input.len() {
let bytes = if let Some(string) = self.input.get(from..) {
string.as_bytes()
@@ -332,7 +355,7 @@ impl<'i> Position<'i> {
where
F: FnOnce(char) -> bool,
{
- if let Some(c) = (&self.input[self.pos..]).chars().next() {
+ if let Some(c) = self.input[self.pos..].chars().next() {
if f(c) {
self.pos += c.len_utf8();
true
@@ -383,7 +406,7 @@ impl<'i> Position<'i> {
/// otherwise. If no match was made, `pos` will not be updated.
#[inline]
pub(crate) fn match_range(&mut self, range: Range<char>) -> bool {
- if let Some(c) = (&self.input[self.pos..]).chars().next() {
+ if let Some(c) = self.input[self.pos..].chars().next() {
if range.start <= c && c <= range.end {
self.pos += c.len_utf8();
return true;
@@ -395,7 +418,7 @@ impl<'i> Position<'i> {
}
impl<'i> fmt::Debug for Position<'i> {
- fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
+ fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
f.debug_struct("Position").field("pos", &self.pos).finish()
}
}
@@ -432,6 +455,63 @@ impl<'i> Hash for Position<'i> {
}
}
+#[inline]
+#[cfg(not(feature = "fast-line-col"))]
+fn original_line_col(input: &str, mut pos: usize) -> (usize, usize) {
+ // Position's pos is always a UTF-8 border.
+ let slice = &input[..pos];
+ let mut chars = slice.chars().peekable();
+
+ let mut line_col = (1, 1);
+
+ while pos != 0 {
+ match chars.next() {
+ Some('\r') => {
+ if let Some(&'\n') = chars.peek() {
+ chars.next();
+
+ if pos == 1 {
+ pos -= 1;
+ } else {
+ pos -= 2;
+ }
+
+ line_col = (line_col.0 + 1, 1);
+ } else {
+ pos -= 1;
+ line_col = (line_col.0, line_col.1 + 1);
+ }
+ }
+ Some('\n') => {
+ pos -= 1;
+ line_col = (line_col.0 + 1, 1);
+ }
+ Some(c) => {
+ pos -= c.len_utf8();
+ line_col = (line_col.0, line_col.1 + 1);
+ }
+ None => unreachable!(),
+ }
+ }
+
+ line_col
+}
+
+#[inline]
+#[cfg(feature = "fast-line-col")]
+fn fast_line_col(input: &str, pos: usize) -> (usize, usize) {
+ // Position's pos is always a UTF-8 border.
+ let slice = &input[..pos];
+
+ let prec_ln = memchr::memrchr(b'\n', slice.as_bytes());
+ if let Some(prec_nl_pos) = prec_ln {
+ let lines = bytecount::count(slice[..=prec_nl_pos].as_bytes(), b'\n') + 1;
+ (lines, slice[prec_nl_pos..].chars().count())
+ } else {
+ (1, slice.chars().count() + 1)
+ }
+}
+
#[cfg(test)]
mod tests {
use super::*;
@@ -465,6 +545,8 @@ mod tests {
assert_eq!(Position::new(input, 7).unwrap().line_col(), (3, 1));
assert_eq!(Position::new(input, 8).unwrap().line_col(), (3, 2));
assert_eq!(Position::new(input, 11).unwrap().line_col(), (3, 3));
+ let input = "abcd嗨";
+ assert_eq!(Position::new(input, 7).unwrap().line_col(), (1, 6));
}
#[test]