diff options
Diffstat (limited to 'vendor/regex/src/input.rs')
-rw-r--r-- | vendor/regex/src/input.rs | 432 |
1 files changed, 0 insertions, 432 deletions
diff --git a/vendor/regex/src/input.rs b/vendor/regex/src/input.rs deleted file mode 100644 index df6c3e0c9..000000000 --- a/vendor/regex/src/input.rs +++ /dev/null @@ -1,432 +0,0 @@ -use std::char; -use std::cmp::Ordering; -use std::fmt; -use std::ops; -use std::u32; - -use crate::literal::LiteralSearcher; -use crate::prog::InstEmptyLook; -use crate::utf8::{decode_last_utf8, decode_utf8}; - -/// Represents a location in the input. -#[derive(Clone, Copy, Debug)] -pub struct InputAt { - pos: usize, - c: Char, - byte: Option<u8>, - len: usize, -} - -impl InputAt { - /// Returns true iff this position is at the beginning of the input. - pub fn is_start(&self) -> bool { - self.pos == 0 - } - - /// Returns true iff this position is past the end of the input. - pub fn is_end(&self) -> bool { - self.c.is_none() && self.byte.is_none() - } - - /// Returns the character at this position. - /// - /// If this position is just before or after the input, then an absent - /// character is returned. - pub fn char(&self) -> Char { - self.c - } - - /// Returns the byte at this position. - pub fn byte(&self) -> Option<u8> { - self.byte - } - - /// Returns the UTF-8 width of the character at this position. - pub fn len(&self) -> usize { - self.len - } - - /// Returns whether the UTF-8 width of the character at this position - /// is zero. - pub fn is_empty(&self) -> bool { - self.len == 0 - } - - /// Returns the byte offset of this position. - pub fn pos(&self) -> usize { - self.pos - } - - /// Returns the byte offset of the next position in the input. - pub fn next_pos(&self) -> usize { - self.pos + self.len - } -} - -/// An abstraction over input used in the matching engines. -pub trait Input: fmt::Debug { - /// Return an encoding of the position at byte offset `i`. - fn at(&self, i: usize) -> InputAt; - - /// Return the Unicode character occurring next to `at`. - /// - /// If no such character could be decoded, then `Char` is absent. - fn next_char(&self, at: InputAt) -> Char; - - /// Return the Unicode character occurring previous to `at`. - /// - /// If no such character could be decoded, then `Char` is absent. - fn previous_char(&self, at: InputAt) -> Char; - - /// Return true if the given empty width instruction matches at the - /// input position given. - fn is_empty_match(&self, at: InputAt, empty: &InstEmptyLook) -> bool; - - /// Scan the input for a matching prefix. - fn prefix_at( - &self, - prefixes: &LiteralSearcher, - at: InputAt, - ) -> Option<InputAt>; - - /// The number of bytes in the input. - fn len(&self) -> usize; - - /// Whether the input is empty. - fn is_empty(&self) -> bool { - self.len() == 0 - } - - /// Return the given input as a sequence of bytes. - fn as_bytes(&self) -> &[u8]; -} - -impl<'a, T: Input> Input for &'a T { - fn at(&self, i: usize) -> InputAt { - (**self).at(i) - } - - fn next_char(&self, at: InputAt) -> Char { - (**self).next_char(at) - } - - fn previous_char(&self, at: InputAt) -> Char { - (**self).previous_char(at) - } - - fn is_empty_match(&self, at: InputAt, empty: &InstEmptyLook) -> bool { - (**self).is_empty_match(at, empty) - } - - fn prefix_at( - &self, - prefixes: &LiteralSearcher, - at: InputAt, - ) -> Option<InputAt> { - (**self).prefix_at(prefixes, at) - } - - fn len(&self) -> usize { - (**self).len() - } - - fn as_bytes(&self) -> &[u8] { - (**self).as_bytes() - } -} - -/// An input reader over characters. -#[derive(Clone, Copy, Debug)] -pub struct CharInput<'t>(&'t [u8]); - -impl<'t> CharInput<'t> { - /// Return a new character input reader for the given string. - pub fn new(s: &'t [u8]) -> CharInput<'t> { - CharInput(s) - } -} - -impl<'t> ops::Deref for CharInput<'t> { - type Target = [u8]; - - fn deref(&self) -> &[u8] { - self.0 - } -} - -impl<'t> Input for CharInput<'t> { - fn at(&self, i: usize) -> InputAt { - if i >= self.len() { - InputAt { pos: self.len(), c: None.into(), byte: None, len: 0 } - } else { - let c = decode_utf8(&self[i..]).map(|(c, _)| c).into(); - InputAt { pos: i, c, byte: None, len: c.len_utf8() } - } - } - - fn next_char(&self, at: InputAt) -> Char { - at.char() - } - - fn previous_char(&self, at: InputAt) -> Char { - decode_last_utf8(&self[..at.pos()]).map(|(c, _)| c).into() - } - - fn is_empty_match(&self, at: InputAt, empty: &InstEmptyLook) -> bool { - use crate::prog::EmptyLook::*; - match empty.look { - StartLine => { - let c = self.previous_char(at); - at.pos() == 0 || c == '\n' - } - EndLine => { - let c = self.next_char(at); - at.pos() == self.len() || c == '\n' - } - StartText => at.pos() == 0, - EndText => at.pos() == self.len(), - WordBoundary => { - let (c1, c2) = (self.previous_char(at), self.next_char(at)); - c1.is_word_char() != c2.is_word_char() - } - NotWordBoundary => { - let (c1, c2) = (self.previous_char(at), self.next_char(at)); - c1.is_word_char() == c2.is_word_char() - } - WordBoundaryAscii => { - let (c1, c2) = (self.previous_char(at), self.next_char(at)); - c1.is_word_byte() != c2.is_word_byte() - } - NotWordBoundaryAscii => { - let (c1, c2) = (self.previous_char(at), self.next_char(at)); - c1.is_word_byte() == c2.is_word_byte() - } - } - } - - fn prefix_at( - &self, - prefixes: &LiteralSearcher, - at: InputAt, - ) -> Option<InputAt> { - prefixes.find(&self[at.pos()..]).map(|(s, _)| self.at(at.pos() + s)) - } - - fn len(&self) -> usize { - self.0.len() - } - - fn as_bytes(&self) -> &[u8] { - self.0 - } -} - -/// An input reader over bytes. -#[derive(Clone, Copy, Debug)] -pub struct ByteInput<'t> { - text: &'t [u8], - only_utf8: bool, -} - -impl<'t> ByteInput<'t> { - /// Return a new byte-based input reader for the given string. - pub fn new(text: &'t [u8], only_utf8: bool) -> ByteInput<'t> { - ByteInput { text, only_utf8 } - } -} - -impl<'t> ops::Deref for ByteInput<'t> { - type Target = [u8]; - - fn deref(&self) -> &[u8] { - self.text - } -} - -impl<'t> Input for ByteInput<'t> { - fn at(&self, i: usize) -> InputAt { - if i >= self.len() { - InputAt { pos: self.len(), c: None.into(), byte: None, len: 0 } - } else { - InputAt { - pos: i, - c: None.into(), - byte: self.get(i).cloned(), - len: 1, - } - } - } - - fn next_char(&self, at: InputAt) -> Char { - decode_utf8(&self[at.pos()..]).map(|(c, _)| c).into() - } - - fn previous_char(&self, at: InputAt) -> Char { - decode_last_utf8(&self[..at.pos()]).map(|(c, _)| c).into() - } - - fn is_empty_match(&self, at: InputAt, empty: &InstEmptyLook) -> bool { - use crate::prog::EmptyLook::*; - match empty.look { - StartLine => { - let c = self.previous_char(at); - at.pos() == 0 || c == '\n' - } - EndLine => { - let c = self.next_char(at); - at.pos() == self.len() || c == '\n' - } - StartText => at.pos() == 0, - EndText => at.pos() == self.len(), - WordBoundary => { - let (c1, c2) = (self.previous_char(at), self.next_char(at)); - c1.is_word_char() != c2.is_word_char() - } - NotWordBoundary => { - let (c1, c2) = (self.previous_char(at), self.next_char(at)); - c1.is_word_char() == c2.is_word_char() - } - WordBoundaryAscii => { - let (c1, c2) = (self.previous_char(at), self.next_char(at)); - if self.only_utf8 { - // If we must match UTF-8, then we can't match word - // boundaries at invalid UTF-8. - if c1.is_none() && !at.is_start() { - return false; - } - if c2.is_none() && !at.is_end() { - return false; - } - } - c1.is_word_byte() != c2.is_word_byte() - } - NotWordBoundaryAscii => { - let (c1, c2) = (self.previous_char(at), self.next_char(at)); - if self.only_utf8 { - // If we must match UTF-8, then we can't match word - // boundaries at invalid UTF-8. - if c1.is_none() && !at.is_start() { - return false; - } - if c2.is_none() && !at.is_end() { - return false; - } - } - c1.is_word_byte() == c2.is_word_byte() - } - } - } - - fn prefix_at( - &self, - prefixes: &LiteralSearcher, - at: InputAt, - ) -> Option<InputAt> { - prefixes.find(&self[at.pos()..]).map(|(s, _)| self.at(at.pos() + s)) - } - - fn len(&self) -> usize { - self.text.len() - } - - fn as_bytes(&self) -> &[u8] { - self.text - } -} - -/// An inline representation of `Option<char>`. -/// -/// This eliminates the need to do case analysis on `Option<char>` to determine -/// ordinality with other characters. -/// -/// (The `Option<char>` is not related to encoding. Instead, it is used in the -/// matching engines to represent the beginning and ending boundaries of the -/// search text.) -#[derive(Clone, Copy, Hash, PartialEq, Eq, PartialOrd, Ord)] -pub struct Char(u32); - -impl fmt::Debug for Char { - fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { - match char::from_u32(self.0) { - None => write!(f, "Empty"), - Some(c) => write!(f, "{:?}", c), - } - } -} - -impl Char { - /// Returns true iff the character is absent. - #[inline] - pub fn is_none(self) -> bool { - self.0 == u32::MAX - } - - /// Returns the length of the character's UTF-8 encoding. - /// - /// If the character is absent, then `1` is returned. - #[inline] - pub fn len_utf8(self) -> usize { - char::from_u32(self.0).map_or(1, |c| c.len_utf8()) - } - - /// Returns true iff the character is a word character. - /// - /// If the character is absent, then false is returned. - pub fn is_word_char(self) -> bool { - // is_word_character can panic if the Unicode data for \w isn't - // available. However, our compiler ensures that if a Unicode word - // boundary is used, then the data must also be available. If it isn't, - // then the compiler returns an error. - char::from_u32(self.0).map_or(false, regex_syntax::is_word_character) - } - - /// Returns true iff the byte is a word byte. - /// - /// If the byte is absent, then false is returned. - pub fn is_word_byte(self) -> bool { - match char::from_u32(self.0) { - Some(c) if c <= '\u{7F}' => regex_syntax::is_word_byte(c as u8), - None | Some(_) => false, - } - } -} - -impl From<char> for Char { - fn from(c: char) -> Char { - Char(c as u32) - } -} - -impl From<Option<char>> for Char { - fn from(c: Option<char>) -> Char { - c.map_or(Char(u32::MAX), |c| c.into()) - } -} - -impl PartialEq<char> for Char { - #[inline] - fn eq(&self, other: &char) -> bool { - self.0 == *other as u32 - } -} - -impl PartialEq<Char> for char { - #[inline] - fn eq(&self, other: &Char) -> bool { - *self as u32 == other.0 - } -} - -impl PartialOrd<char> for Char { - #[inline] - fn partial_cmp(&self, other: &char) -> Option<Ordering> { - self.0.partial_cmp(&(*other as u32)) - } -} - -impl PartialOrd<Char> for char { - #[inline] - fn partial_cmp(&self, other: &Char) -> Option<Ordering> { - (*self as u32).partial_cmp(&other.0) - } -} |