1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
|
/// Represents the four possible starting configurations of a DFA search.
///
/// The starting configuration is determined by inspecting the the beginning of
/// the haystack (up to 1 byte). Ultimately, this along with a pattern ID (if
/// specified) is what selects the start state to use in a DFA.
///
/// In a DFA that doesn't have starting states for each pattern, then it will
/// have a maximum of four DFA start states. If the DFA was compiled with start
/// states for each pattern, then it will have a maximum of four DFA start
/// states for searching for any pattern, and then another maximum of four DFA
/// start states for executing an anchored search for each pattern.
///
/// This ends up being represented as a table in the DFA (whether lazy or fully
/// built) where the stride of that table is 4, and each entry is an index into
/// the state transition table. Note though that multiple entries in the table
/// might point to the same state if the states would otherwise be equivalent.
/// (This is guaranteed by DFA minimization and may even be accomplished by
/// normal determinization, since it attempts to reuse equivalent states too.)
#[derive(Clone, Copy, Debug, Eq, PartialEq)]
pub(crate) enum Start {
/// This occurs when the starting position is not any of the ones below.
NonWordByte = 0,
/// This occurs when the byte immediately preceding the start of the search
/// is an ASCII word byte.
WordByte = 1,
/// This occurs when the starting position of the search corresponds to the
/// beginning of the haystack.
Text = 2,
/// This occurs when the byte immediately preceding the start of the search
/// is a line terminator. Specifically, `\n`.
Line = 3,
}
impl Start {
/// Return the starting state corresponding to the given integer. If no
/// starting state exists for the given integer, then None is returned.
pub(crate) fn from_usize(n: usize) -> Option<Start> {
match n {
0 => Some(Start::NonWordByte),
1 => Some(Start::WordByte),
2 => Some(Start::Text),
3 => Some(Start::Line),
_ => None,
}
}
/// Returns the total number of starting state configurations.
pub(crate) fn count() -> usize {
4
}
/// Returns the starting state configuration for the given search
/// parameters. If the given offset range is not valid, then this panics.
#[inline(always)]
pub(crate) fn from_position_fwd(
bytes: &[u8],
start: usize,
end: usize,
) -> Start {
assert!(
bytes.get(start..end).is_some(),
"{}..{} is invalid",
start,
end
);
if start == 0 {
Start::Text
} else if bytes[start - 1] == b'\n' {
Start::Line
} else if crate::util::is_word_byte(bytes[start - 1]) {
Start::WordByte
} else {
Start::NonWordByte
}
}
/// Returns the starting state configuration for a reverse search with the
/// given search parameters. If the given offset range is not valid, then
/// this panics.
#[inline(always)]
pub(crate) fn from_position_rev(
bytes: &[u8],
start: usize,
end: usize,
) -> Start {
assert!(
bytes.get(start..end).is_some(),
"{}..{} is invalid",
start,
end
);
if end == bytes.len() {
Start::Text
} else if bytes[end] == b'\n' {
Start::Line
} else if crate::util::is_word_byte(bytes[end]) {
Start::WordByte
} else {
Start::NonWordByte
}
}
/// Return this starting configuration as an integer. It is guaranteed to
/// be less than `Start::count()`.
#[inline(always)]
pub(crate) fn as_usize(&self) -> usize {
*self as usize
}
}
|