summaryrefslogtreecommitdiffstats
path: root/vendor/regex-automata/src/util/sparse_set.rs
diff options
context:
space:
mode:
Diffstat (limited to 'vendor/regex-automata/src/util/sparse_set.rs')
-rw-r--r--vendor/regex-automata/src/util/sparse_set.rs78
1 files changed, 44 insertions, 34 deletions
diff --git a/vendor/regex-automata/src/util/sparse_set.rs b/vendor/regex-automata/src/util/sparse_set.rs
index bf59e4469..cbaa0b6f4 100644
--- a/vendor/regex-automata/src/util/sparse_set.rs
+++ b/vendor/regex-automata/src/util/sparse_set.rs
@@ -1,6 +1,23 @@
-use alloc::{boxed::Box, vec, vec::Vec};
+/*!
+This module defines a sparse set data structure. Its most interesting
+properties are:
-use crate::util::id::StateID;
+* They preserve insertion order.
+* Set membership testing is done in constant time.
+* Set insertion is done in constant time.
+* Clearing the set is done in constant time.
+
+The cost for doing this is that the capacity of the set needs to be known up
+front, and the elements in the set are limited to state identifiers.
+
+These sets are principally used when traversing an NFA state graph. This
+happens at search time, for example, in the PikeVM. It also happens during DFA
+determinization.
+*/
+
+use alloc::{vec, vec::Vec};
+
+use crate::util::primitives::StateID;
/// A pairse of sparse sets.
///
@@ -79,7 +96,12 @@ pub(crate) struct SparseSet {
/// Sparse maps ids to their location in dense.
///
/// A state ID is in the set if and only if
- /// sparse[id] < dense.len() && id == dense[sparse[id]].
+ /// sparse[id] < len && id == dense[sparse[id]].
+ ///
+ /// Note that these are indices into 'dense'. It's a little weird to use
+ /// StateID here, but we know our length can never exceed the bounds of
+ /// StateID (enforced by 'resize') and StateID will be at most 4 bytes
+ /// where as a usize is likely double that in most cases.
sparse: Vec<StateID>,
}
@@ -146,9 +168,9 @@ impl SparseSet {
///
/// This is marked as inline(always) since the compiler won't inline it
/// otherwise, and it's a fairly hot piece of code in DFA determinization.
- #[inline(always)]
- pub(crate) fn insert(&mut self, value: StateID) -> bool {
- if self.contains(value) {
+ #[cfg_attr(feature = "perf-inline", inline(always))]
+ pub(crate) fn insert(&mut self, id: StateID) -> bool {
+ if self.contains(id) {
return false;
}
@@ -158,30 +180,22 @@ impl SparseSet {
"{:?} exceeds capacity of {:?} when inserting {:?}",
i,
self.capacity(),
- value,
+ id,
);
// OK since i < self.capacity() and self.capacity() is guaranteed to
// be <= StateID::LIMIT.
- let id = StateID::new_unchecked(i);
- self.dense[id] = value;
- self.sparse[value] = id;
+ let index = StateID::new_unchecked(i);
+ self.dense[index] = id;
+ self.sparse[id] = index;
self.len += 1;
true
}
/// Returns true if and only if this set contains the given value.
#[inline]
- pub(crate) fn contains(&self, value: StateID) -> bool {
- let i = self.sparse[value];
- i.as_usize() < self.len() && self.dense[i] == value
- }
-
- /// Returns the ith inserted element from this set.
- ///
- /// Panics when i >= self.len().
- #[inline]
- pub(crate) fn get(&self, i: usize) -> StateID {
- self.dense[i]
+ pub(crate) fn contains(&self, id: StateID) -> bool {
+ let index = self.sparse[id];
+ index.as_usize() < self.len() && self.dense[index] == id
}
/// Clear this set such that it has no members.
@@ -190,16 +204,21 @@ impl SparseSet {
self.len = 0;
}
+ #[inline]
+ pub(crate) fn iter(&self) -> SparseSetIter<'_> {
+ SparseSetIter(self.dense[..self.len()].iter())
+ }
+
/// Returns the heap memory usage, in bytes, used by this sparse set.
#[inline]
pub(crate) fn memory_usage(&self) -> usize {
- 2 * self.dense.len() * StateID::SIZE
+ self.dense.len() * StateID::SIZE + self.sparse.len() * StateID::SIZE
}
}
impl core::fmt::Debug for SparseSet {
fn fmt(&self, f: &mut core::fmt::Formatter) -> core::fmt::Result {
- let elements: Vec<StateID> = self.into_iter().collect();
+ let elements: Vec<StateID> = self.iter().collect();
f.debug_tuple("SparseSet").field(&elements).finish()
}
}
@@ -210,20 +229,11 @@ impl core::fmt::Debug for SparseSet {
#[derive(Debug)]
pub(crate) struct SparseSetIter<'a>(core::slice::Iter<'a, StateID>);
-impl<'a> IntoIterator for &'a SparseSet {
- type Item = StateID;
- type IntoIter = SparseSetIter<'a>;
-
- fn into_iter(self) -> Self::IntoIter {
- SparseSetIter(self.dense[..self.len()].iter())
- }
-}
-
impl<'a> Iterator for SparseSetIter<'a> {
type Item = StateID;
- #[inline(always)]
+ #[cfg_attr(feature = "perf-inline", inline(always))]
fn next(&mut self) -> Option<StateID> {
- self.0.next().map(|value| *value)
+ self.0.next().map(|&id| id)
}
}