summaryrefslogtreecommitdiffstats
path: root/vendor/regex-automata-0.2.0/src/dfa/search_unsafe.rs
diff options
context:
space:
mode:
Diffstat (limited to 'vendor/regex-automata-0.2.0/src/dfa/search_unsafe.rs')
-rw-r--r--vendor/regex-automata-0.2.0/src/dfa/search_unsafe.rs321
1 files changed, 321 insertions, 0 deletions
diff --git a/vendor/regex-automata-0.2.0/src/dfa/search_unsafe.rs b/vendor/regex-automata-0.2.0/src/dfa/search_unsafe.rs
new file mode 100644
index 000000000..ea1c29ff7
--- /dev/null
+++ b/vendor/regex-automata-0.2.0/src/dfa/search_unsafe.rs
@@ -0,0 +1,321 @@
+use crate::dfa::automaton::{Automaton, State};
+use crate::MatchError;
+
+/// This is marked as `inline(always)` specifically because it supports
+/// multiple modes of searching. Namely, the 'earliest' boolean getting inlined
+/// permits eliminating a few crucial branches.
+#[inline(always)]
+pub fn find_fwd<A: Automaton + ?Sized>(
+ dfa: &A,
+ bytes: &[u8],
+ start: usize,
+ end: usize,
+ earliest: bool,
+) -> Result<Option<usize>, MatchError> {
+ assert!(start <= end);
+ assert!(start <= bytes.len());
+ assert!(end <= bytes.len());
+
+ let (mut state, mut last_match) = init_fwd(dfa, bytes, start, end)?;
+ if earliest && last_match.is_some() {
+ return Ok(last_match);
+ }
+
+ let mut at = start;
+ while at < end {
+ let byte = bytes[at];
+ state = dfa.next_state(state, byte);
+ at += 1;
+ if dfa.is_special_state(state) {
+ if dfa.is_dead_state(state) {
+ return Ok(last_match);
+ } else if dfa.is_quit_state(state) {
+ return Err(MatchError::Quit { byte, offset: at - 1 });
+ }
+ last_match = Some(at - dfa.match_offset());
+ if earliest {
+ return Ok(last_match);
+ }
+ }
+ }
+ /*
+ unsafe {
+ let mut p = bytes.as_ptr().add(start);
+ while p < bytes[end..].as_ptr() {
+ let byte = *p;
+ state = dfa.next_state_unchecked(state, byte);
+ p = p.add(1);
+ if dfa.is_special_state(state) {
+ if dfa.is_dead_state(state) {
+ return Ok(last_match);
+ } else if dfa.is_quit_state(state) {
+ return Err(MatchError::Quit {
+ byte,
+ offset: offset(bytes, p) - 1,
+ });
+ }
+ last_match = Some(offset(bytes, p) - dfa.match_offset());
+ if earliest {
+ return Ok(last_match);
+ }
+ }
+ }
+ }
+ */
+ Ok(eof_fwd(dfa, bytes, end, &mut state)?.or(last_match))
+}
+
+/// This is marked as `inline(always)` specifically because it supports
+/// multiple modes of searching. Namely, the 'earliest' boolean getting inlined
+/// permits eliminating a few crucial branches.
+#[inline(always)]
+pub fn find_rev<A: Automaton + ?Sized>(
+ dfa: &A,
+ bytes: &[u8],
+ start: usize,
+ end: usize,
+ earliest: bool,
+) -> Result<Option<usize>, MatchError> {
+ assert!(start <= end);
+ assert!(start <= bytes.len());
+ assert!(end <= bytes.len());
+
+ let (mut state, mut last_match) = init_rev(dfa, bytes, start, end)?;
+ if earliest && last_match.is_some() {
+ return Ok(last_match);
+ }
+
+ let mut at = end;
+ while at > start {
+ at -= 1;
+ let byte = bytes[at];
+ state = dfa.next_state(state, byte);
+ if dfa.is_special_state(state) {
+ if dfa.is_dead_state(state) {
+ return Ok(last_match);
+ } else if dfa.is_quit_state(state) {
+ return Err(MatchError::Quit { byte, offset: at });
+ }
+ last_match = Some(at + dfa.match_offset());
+ if earliest {
+ return Ok(last_match);
+ }
+ }
+ }
+ /*
+ unsafe {
+ let mut p = bytes.as_ptr().add(end);
+ while p > bytes[start..].as_ptr() {
+ p = p.sub(1);
+ let byte = *p;
+ state = dfa.next_state_unchecked(state, byte);
+ if dfa.is_special_state(state) {
+ if dfa.is_dead_state(state) {
+ return Ok(last_match);
+ } else if dfa.is_quit_state(state) {
+ return Err(MatchError::Quit {
+ byte,
+ offset: offset(bytes, p),
+ });
+ }
+ last_match = Some(offset(bytes, p) + dfa.match_offset());
+ if earliest {
+ return Ok(last_match);
+ }
+ }
+ }
+ }
+ */
+ Ok(eof_rev(dfa, state, bytes, start)?.or(last_match))
+}
+
+pub fn find_overlapping_fwd<A: Automaton + ?Sized>(
+ dfa: &A,
+ bytes: &[u8],
+ mut start: usize,
+ end: usize,
+ caller_state: &mut State<A::ID>,
+) -> Result<Option<usize>, MatchError> {
+ assert!(start <= end);
+ assert!(start <= bytes.len());
+ assert!(end <= bytes.len());
+
+ let (mut state, mut last_match) = match caller_state.as_option() {
+ None => init_fwd(dfa, bytes, start, end)?,
+ Some(id) => {
+ // This is a subtle but critical detail. If the caller provides a
+ // non-None state ID, then it must be the case that the state ID
+ // corresponds to one set by this function. The state ID therefore
+ // corresponds to a match state, a dead state or some other state.
+ // However, "some other" state _only_ occurs when the input has
+ // been exhausted because the only way to stop before then is to
+ // see a match or a dead/quit state.
+ //
+ // If the input is exhausted or if it's a dead state, then
+ // incrementing the starting position has no relevance on
+ // correctness, since the loop below will either not execute
+ // at all or will immediately stop due to being in a dead state.
+ // (Once in a dead state it is impossible to leave it.)
+ //
+ // Therefore, the only case we need to consider is when
+ // caller_state is a match state. In this case, since our machines
+ // support the ability to delay a match by a certain number of
+ // bytes (to support look-around), it follows that we actually
+ // consumed that many additional bytes on our previous search. When
+ // the caller resumes their search to find subsequent matches, they
+ // will use the ending location from the previous match as the next
+ // starting point, which is `match_offset` bytes PRIOR to where
+ // we scanned to on the previous search. Therefore, we need to
+ // compensate by bumping `start` up by `match_offset` bytes.
+ start += dfa.match_offset();
+ // Since match_offset could be any arbitrary value and we use
+ // `start` in pointer arithmetic below, we check that we are still
+ // in bounds. Otherwise, we could materialize a pointer that is
+ // more than one past the end point of `bytes`, which is UB.
+ if start > end {
+ return Ok(None);
+ }
+ (id, None)
+ }
+ };
+ if last_match.is_some() {
+ caller_state.set(state);
+ return Ok(last_match);
+ }
+
+ let mut at = start;
+ while at < end {
+ let byte = bytes[at];
+ state = dfa.next_state(state, byte);
+ at += 1;
+ if dfa.is_special_state(state) {
+ caller_state.set(state);
+ if dfa.is_dead_state(state) {
+ return Ok(None);
+ } else if dfa.is_quit_state(state) {
+ return Err(MatchError::Quit { byte, offset: at - 1 });
+ } else {
+ return Ok(Some(at - dfa.match_offset()));
+ }
+ }
+ }
+ /*
+ // SAFETY: Other than the normal pointer arithmetic happening here, a
+ // unique aspect of safety for this function is the fact that the caller
+ // can provide the state that the search routine will start with. If this
+ // state were invalid, it would be possible to incorrectly index the
+ // transition table. We however prevent this from happening by guaranteeing
+ // that State is valid. Namely, callers cannot mutate a State. All they can
+ // do is create a "start" state or otherwise reuse a previously set state.
+ // Since callers can't mutate a state, it follows that a previously set
+ // state can only be retrieved by crate internal functions. Therefore, our
+ // use of it is safe since this code will only ever set the provided state
+ // to a valid state.
+ unsafe {
+ let mut p = bytes.as_ptr().add(start);
+ while p < bytes[end..].as_ptr() {
+ let byte = *p;
+ state = dfa.next_state_unchecked(state, byte);
+ p = p.add(1);
+ if dfa.is_special_state(state) {
+ caller_state.set(state);
+ return if dfa.is_dead_state(state) {
+ Ok(None)
+ } else if dfa.is_quit_state(state) {
+ Err(MatchError::Quit { byte, offset: offset(bytes, p) - 1 })
+ } else {
+ Ok(Some(offset(bytes, p) - dfa.match_offset()))
+ };
+ }
+ }
+ }
+ */
+
+ let result = eof_fwd(dfa, bytes, end, &mut state);
+ caller_state.set(state);
+ result
+}
+
+fn init_fwd<A: Automaton + ?Sized>(
+ dfa: &A,
+ bytes: &[u8],
+ start: usize,
+ end: usize,
+) -> Result<(A::ID, Option<usize>), MatchError> {
+ let state = dfa.start_state_forward(bytes, start, end);
+ if dfa.is_match_state(state) {
+ Ok((state, Some(start - dfa.match_offset())))
+ } else {
+ Ok((state, None))
+ }
+}
+
+fn init_rev<A: Automaton + ?Sized>(
+ dfa: &A,
+ bytes: &[u8],
+ start: usize,
+ end: usize,
+) -> Result<(A::ID, Option<usize>), MatchError> {
+ let state = dfa.start_state_reverse(bytes, start, end);
+ if dfa.is_match_state(state) {
+ Ok((state, Some(end + dfa.match_offset())))
+ } else {
+ Ok((state, None))
+ }
+}
+
+fn eof_fwd<A: Automaton + ?Sized>(
+ dfa: &A,
+ bytes: &[u8],
+ end: usize,
+ state: &mut A::ID,
+) -> Result<Option<usize>, MatchError> {
+ match bytes.get(end) {
+ Some(&b) => {
+ *state = dfa.next_state(*state, b);
+ if dfa.is_match_state(*state) {
+ Ok(Some(end))
+ } else {
+ Ok(None)
+ }
+ }
+ None => {
+ *state = dfa.next_eof_state(*state);
+ if dfa.is_match_state(*state) {
+ Ok(Some(bytes.len()))
+ } else {
+ Ok(None)
+ }
+ }
+ }
+}
+
+fn eof_rev<A: Automaton + ?Sized>(
+ dfa: &A,
+ state: A::ID,
+ bytes: &[u8],
+ start: usize,
+) -> Result<Option<usize>, MatchError> {
+ if start > 0 {
+ if dfa.is_match_state(dfa.next_state(state, bytes[start - 1])) {
+ Ok(Some(start))
+ } else {
+ Ok(None)
+ }
+ } else {
+ if dfa.is_match_state(dfa.next_eof_state(state)) {
+ Ok(Some(0))
+ } else {
+ Ok(None)
+ }
+ }
+}
+
+/// Returns the distance between the given pointer and the start of `bytes`.
+/// This assumes that the given pointer points to somewhere in the `bytes`
+/// slice given.
+fn offset(bytes: &[u8], p: *const u8) -> usize {
+ debug_assert!(bytes.as_ptr() <= p);
+ debug_assert!(bytes[bytes.len()..].as_ptr() >= p);
+ ((p as isize) - (bytes.as_ptr() as isize)) as usize
+}