summaryrefslogtreecommitdiffstats
path: root/vendor/regex-automata/src/util/determinize/state.rs
diff options
context:
space:
mode:
Diffstat (limited to 'vendor/regex-automata/src/util/determinize/state.rs')
-rw-r--r--vendor/regex-automata/src/util/determinize/state.rs39
1 files changed, 20 insertions, 19 deletions
diff --git a/vendor/regex-automata/src/util/determinize/state.rs b/vendor/regex-automata/src/util/determinize/state.rs
index e64123587..effa6f44d 100644
--- a/vendor/regex-automata/src/util/determinize/state.rs
+++ b/vendor/regex-automata/src/util/determinize/state.rs
@@ -197,7 +197,7 @@ impl StateBuilderEmpty {
}
pub(crate) fn into_matches(mut self) -> StateBuilderMatches {
- self.0.extend_from_slice(&[0, 0, 0, 0, 0]);
+ self.0.extend_from_slice(&[0, 0, 0, 0, 0, 0, 0, 0, 0]);
StateBuilderMatches(self.0)
}
@@ -348,16 +348,17 @@ impl StateBuilderNFA {
/// generated by a transition over a "word" byte. (Callers may not always set
/// this. For example, if the NFA has no word boundary assertion, then needing
/// to track whether a state came from a word byte or not is superfluous and
-/// wasteful.)
+/// wasteful.) Bit 3 is set to 1 if the state was generated by a transition
+/// from a `\r` (forward search) or a `\n` (reverse search) when CRLF mode is
+/// enabled.
///
-/// Byte 1 corresponds to the look-behind assertions that were satisfied by
-/// the transition that created this state. This generally only includes the
-/// StartLF and Start assertions. (Look-ahead assertions are not tracked as
-/// part of states. Instead, these are applied by re-computing the epsilon
-/// closure of a state when computing the transition function. See `next` in
-/// the parent module.)
+/// Bytes 1..5 correspond to the look-behind assertions that were satisfied
+/// by the transition that created this state. (Look-ahead assertions are not
+/// tracked as part of states. Instead, these are applied by re-computing the
+/// epsilon closure of a state when computing the transition function. See
+/// `next` in the parent module.)
///
-/// Byte 2 corresponds to the set of look-around assertions (including both
+/// Bytes 5..9 correspond to the set of look-around assertions (including both
/// look-behind and look-ahead) that appear somewhere in this state's set of
/// NFA state IDs. This is used to determine whether this state's epsilon
/// closure should be re-computed when computing the transition function.
@@ -366,7 +367,7 @@ impl StateBuilderNFA {
/// function, we should only re-compute the epsilon closure if those new
/// assertions are relevant to this particular state.
///
-/// Bytes 3..7 correspond to a 32-bit native-endian encoded integer
+/// Bytes 9..13 correspond to a 32-bit native-endian encoded integer
/// corresponding to the number of patterns encoded in this state. If the state
/// is not a match state (byte 0 bit 0 is 0) or if it's only pattern ID is
/// PatternID::ZERO, then no integer is encoded at this position. Instead, byte
@@ -452,7 +453,7 @@ impl<'a> Repr<'a> {
/// state has no conditional epsilon transitions, then there is no need
/// to re-compute the epsilon closure.
fn look_need(&self) -> LookSet {
- LookSet::read_repr(&self.0[3..])
+ LookSet::read_repr(&self.0[5..])
}
/// Returns the total number of match pattern IDs in this state.
@@ -476,7 +477,7 @@ impl<'a> Repr<'a> {
if !self.has_pattern_ids() {
PatternID::ZERO
} else {
- let offset = 9 + index * PatternID::SIZE;
+ let offset = 13 + index * PatternID::SIZE;
// This is OK since we only ever serialize valid PatternIDs to
// states.
wire::read_pattern_id_unchecked(&self.0[offset..]).0
@@ -507,7 +508,7 @@ impl<'a> Repr<'a> {
f(PatternID::ZERO);
return;
}
- let mut pids = &self.0[9..self.pattern_offset_end()];
+ let mut pids = &self.0[13..self.pattern_offset_end()];
while !pids.is_empty() {
let pid = wire::read_u32(pids);
pids = &pids[PatternID::SIZE..];
@@ -539,11 +540,11 @@ impl<'a> Repr<'a> {
fn pattern_offset_end(&self) -> usize {
let encoded = self.encoded_pattern_len();
if encoded == 0 {
- return 5;
+ return 9;
}
// This arithmetic is OK since we were able to address this many bytes
// when writing to the state, thus, it must fit into a usize.
- encoded.checked_mul(4).unwrap().checked_add(9).unwrap()
+ encoded.checked_mul(4).unwrap().checked_add(13).unwrap()
}
/// Returns the total number of *encoded* pattern IDs in this state.
@@ -557,7 +558,7 @@ impl<'a> Repr<'a> {
}
// This unwrap is OK since the total number of patterns is always
// guaranteed to fit into a usize.
- usize::try_from(wire::read_u32(&self.0[5..9])).unwrap()
+ usize::try_from(wire::read_u32(&self.0[9..13])).unwrap()
}
}
@@ -643,7 +644,7 @@ impl<'a> ReprVec<'a> {
/// Mutate the set of look-around (both behind and ahead) assertions that
/// appear at least once in this state's set of NFA states.
fn set_look_need(&mut self, mut set: impl FnMut(LookSet) -> LookSet) {
- set(self.look_need()).write_repr(&mut self.0[3..]);
+ set(self.look_need()).write_repr(&mut self.0[5..]);
}
/// Add a pattern ID to this state. All match states must have at least
@@ -703,14 +704,14 @@ impl<'a> ReprVec<'a> {
return;
}
let patsize = PatternID::SIZE;
- let pattern_bytes = self.0.len() - 9;
+ let pattern_bytes = self.0.len() - 13;
// Every pattern ID uses 4 bytes, so number of bytes should be
// divisible by 4.
assert_eq!(pattern_bytes % patsize, 0);
// This unwrap is OK since we are guaranteed that the maximum number
// of possible patterns fits into a u32.
let count32 = u32::try_from(pattern_bytes / patsize).unwrap();
- wire::NE::write_u32(count32, &mut self.0[5..9]);
+ wire::NE::write_u32(count32, &mut self.0[9..13]);
}
/// Add an NFA state ID to this state. The order in which NFA states are