summaryrefslogtreecommitdiffstats
path: root/vendor/regex-automata-0.2.0/src/util/id.rs
diff options
context:
space:
mode:
Diffstat (limited to 'vendor/regex-automata-0.2.0/src/util/id.rs')
-rw-r--r--vendor/regex-automata-0.2.0/src/util/id.rs608
1 files changed, 608 insertions, 0 deletions
diff --git a/vendor/regex-automata-0.2.0/src/util/id.rs b/vendor/regex-automata-0.2.0/src/util/id.rs
new file mode 100644
index 000000000..70bf0a93b
--- /dev/null
+++ b/vendor/regex-automata-0.2.0/src/util/id.rs
@@ -0,0 +1,608 @@
+/*!
+Type definitions for identifier types.
+
+A [`StateID`] represents the possible set of identifiers used in regex engine
+implementations in this crate. For example, they are used to identify both NFA
+and DFA states.
+
+A [`PatternID`] represents the possible set of identifiers for patterns. All
+regex engine implementations in this crate support searching for multiple
+patterns simultaneously. A `PatternID` is how each pattern is uniquely
+identified for a particular instance of a regex engine. Namely, a pattern is
+assigned an auto-incrementing integer, starting at `0`, based on the order of
+patterns supplied during the construction of the regex engine.
+
+These identifier types represent a way for this crate to make correctness
+guarantees around the possible set of values that a `StateID` or a `PatternID`
+might represent. Similarly, they also provide a way of constraining the size of
+these identifiers to reduce space usage while still guaranteeing that all such
+identifiers are repsentable by a `usize` for the current target.
+
+Moreover, the identifier types clamp the range of permissible values to a range
+that is typically smaller than its internal representation. (With the maximum
+value being, e.g., `StateID::MAX`.) Users of these types may not rely this
+clamping for the purpose of memory safety. Users may, however, rely on these
+invariants to avoid panics or other types of logic bugs.
+*/
+
+// Continuing from the above comment about correctness guarantees, an example
+// of a way in which we use the guarantees on these types is delta encoding.
+// Namely, we require that IDs can be at most 2^31 - 2, which means the
+// difference between any two IDs is always representable as an i32.
+
+use core::{
+ convert::{Infallible, TryFrom},
+ mem, ops,
+};
+
+#[cfg(feature = "alloc")]
+use alloc::vec::Vec;
+
+/// An identifier for a regex pattern.
+///
+/// The identifier for a pattern corresponds to its relative position among
+/// other patterns in a single finite state machine. Namely, when building
+/// a multi-pattern regex engine, one must supply a sequence of patterns to
+/// match. The position (starting at 0) of each pattern in that sequence
+/// represents its identifier. This identifier is in turn used to identify and
+/// report matches of that pattern in various APIs.
+///
+/// A pattern ID is guaranteed to be representable by a `usize`. Similarly,
+/// the number of patterns in any regex engine in this crate is guaranteed to
+/// be representable by a `usize`. This applies to regex engines that have
+/// been deserialized; a deserialization error will be returned if it contains
+/// pattern IDs that violate these requirements in your current environment.
+///
+/// For extra convenience in some cases, this type also guarantees that all
+/// IDs can fit into an `i32` and an `isize` without overflowing.
+///
+/// # Representation
+///
+/// This type is always represented internally by a `u32` and is marked as
+/// `repr(transparent)`. Thus, this type always has the same representation as
+/// a `u32`.
+///
+/// # Indexing
+///
+/// For convenience, callers may use a `PatternID` to index slices.
+///
+/// # Safety
+///
+/// While a `PatternID` is meant to guarantee that its value fits into `usize`
+/// (while using a possibly smaller representation than `usize` on some
+/// targets), callers must not rely on this property for safety. Callers may
+/// choose to rely on this property for correctness however.
+#[repr(transparent)]
+#[derive(
+ Clone, Copy, Debug, Default, Eq, Hash, PartialEq, PartialOrd, Ord,
+)]
+pub struct PatternID(u32);
+
+impl PatternID {
+ /// The maximum pattern ID value, represented as a `usize`.
+ #[cfg(any(target_pointer_width = "32", target_pointer_width = "64"))]
+ pub const MAX: PatternID =
+ PatternID::new_unchecked(core::i32::MAX as usize - 1);
+
+ /// The maximum pattern ID value, represented as a `usize`.
+ #[cfg(target_pointer_width = "16")]
+ pub const MAX: PatternID = PatternID::new_unchecked(core::isize::MAX - 1);
+
+ /// The total number of patterns that are allowed in any single regex
+ /// engine.
+ pub const LIMIT: usize = PatternID::MAX.as_usize() + 1;
+
+ /// The zero pattern ID value.
+ pub const ZERO: PatternID = PatternID::new_unchecked(0);
+
+ /// The number of bytes that a single `PatternID` uses in memory.
+ pub const SIZE: usize = core::mem::size_of::<PatternID>();
+
+ /// Create a new pattern ID.
+ ///
+ /// If the given identifier exceeds [`PatternID::MAX`], then this returns
+ /// an error.
+ #[inline]
+ pub fn new(id: usize) -> Result<PatternID, PatternIDError> {
+ PatternID::try_from(id)
+ }
+
+ /// Create a new pattern ID without checking whether the given value
+ /// exceeds [`PatternID::MAX`].
+ ///
+ /// While this is unchecked, providing an incorrect value must never
+ /// sacrifice memory safety, as documented above.
+ #[inline]
+ pub const fn new_unchecked(id: usize) -> PatternID {
+ PatternID(id as u32)
+ }
+
+ /// Like [`PatternID::new`], but panics if the given ID is not valid.
+ #[inline]
+ pub fn must(id: usize) -> PatternID {
+ PatternID::new(id).unwrap()
+ }
+
+ /// Return this pattern ID as a `usize`.
+ #[inline]
+ pub const fn as_usize(&self) -> usize {
+ self.0 as usize
+ }
+
+ /// Return the internal u32 of this pattern ID.
+ #[inline]
+ pub const fn as_u32(&self) -> u32 {
+ self.0
+ }
+
+ /// Return the internal u32 of this pattern ID represented as an i32.
+ ///
+ /// This is guaranteed to never overflow an `i32`.
+ #[inline]
+ pub const fn as_i32(&self) -> i32 {
+ self.0 as i32
+ }
+
+ /// Returns one more than this pattern ID as a usize.
+ ///
+ /// Since a pattern ID has constraints on its maximum value, adding `1` to
+ /// it will always fit in a `usize` (and a `u32`).
+ #[inline]
+ pub fn one_more(&self) -> usize {
+ self.as_usize().checked_add(1).unwrap()
+ }
+
+ /// Decode this pattern ID from the bytes given using the native endian
+ /// byte order for the current target.
+ ///
+ /// If the decoded integer is not representable as a pattern ID for the
+ /// current target, then this returns an error.
+ #[inline]
+ pub fn from_ne_bytes(bytes: [u8; 4]) -> Result<PatternID, PatternIDError> {
+ let id = u32::from_ne_bytes(bytes);
+ if id > PatternID::MAX.as_u32() {
+ return Err(PatternIDError { attempted: id as u64 });
+ }
+ Ok(PatternID::new_unchecked(id as usize))
+ }
+
+ /// Decode this pattern ID from the bytes given using the native endian
+ /// byte order for the current target.
+ ///
+ /// This is analogous to [`PatternID::new_unchecked`] in that is does not
+ /// check whether the decoded integer is representable as a pattern ID.
+ #[inline]
+ pub fn from_ne_bytes_unchecked(bytes: [u8; 4]) -> PatternID {
+ PatternID::new_unchecked(u32::from_ne_bytes(bytes) as usize)
+ }
+
+ /// Return the underlying pattern ID integer as raw bytes in native endian
+ /// format.
+ #[inline]
+ pub fn to_ne_bytes(&self) -> [u8; 4] {
+ self.0.to_ne_bytes()
+ }
+
+ /// Returns an iterator over all pattern IDs from 0 up to and not including
+ /// the given length.
+ ///
+ /// If the given length exceeds [`PatternID::LIMIT`], then this panics.
+ #[cfg(feature = "alloc")]
+ pub(crate) fn iter(len: usize) -> PatternIDIter {
+ PatternIDIter::new(len)
+ }
+}
+
+/// This error occurs when a pattern ID could not be constructed.
+///
+/// This occurs when given an integer exceeding the maximum pattern ID value.
+///
+/// When the `std` feature is enabled, this implements the `Error` trait.
+#[derive(Clone, Debug, Eq, PartialEq)]
+pub struct PatternIDError {
+ attempted: u64,
+}
+
+impl PatternIDError {
+ /// Returns the value that failed to constructed a pattern ID.
+ pub fn attempted(&self) -> u64 {
+ self.attempted
+ }
+}
+
+#[cfg(feature = "std")]
+impl std::error::Error for PatternIDError {}
+
+impl core::fmt::Display for PatternIDError {
+ fn fmt(&self, f: &mut core::fmt::Formatter) -> core::fmt::Result {
+ write!(
+ f,
+ "failed to create PatternID from {:?}, which exceeds {:?}",
+ self.attempted(),
+ PatternID::MAX,
+ )
+ }
+}
+
+/// An identifier for a state in a regex engine.
+///
+/// A state ID is guaranteed to be representable by a `usize`. Similarly, the
+/// number of states in any regex engine in this crate is guaranteed to be
+/// representable by a `usize`. This applies to regex engines that have been
+/// deserialized; a deserialization error will be returned if it contains state
+/// IDs that violate these requirements in your current environment.
+///
+/// For extra convenience in some cases, this type also guarantees that all
+/// IDs can fit into an `i32` and an `isize` without overflowing.
+///
+/// # Representation
+///
+/// This type is always represented internally by a `u32` and is marked as
+/// `repr(transparent)`. Thus, this type always has the same representation as
+/// a `u32`.
+///
+/// # Indexing
+///
+/// For convenience, callers may use a `StateID` to index slices.
+///
+/// # Safety
+///
+/// While a `StateID` is meant to guarantee that its value fits into `usize`
+/// (while using a possibly smaller representation than `usize` on some
+/// targets), callers must not rely on this property for safety. Callers may
+/// choose to rely on this property for correctness however.
+#[repr(transparent)]
+#[derive(
+ Clone, Copy, Debug, Default, Eq, Hash, PartialEq, PartialOrd, Ord,
+)]
+pub struct StateID(u32);
+
+impl StateID {
+ /// The maximum state ID value.
+ #[cfg(any(target_pointer_width = "32", target_pointer_width = "64"))]
+ pub const MAX: StateID =
+ StateID::new_unchecked(core::i32::MAX as usize - 1);
+
+ /// The maximum state ID value.
+ #[cfg(target_pointer_width = "16")]
+ pub const MAX: StateID = StateID::new_unchecked(core::isize::MAX - 1);
+
+ /// The total number of states that are allowed in any single regex
+ /// engine, represented as a `usize`.
+ pub const LIMIT: usize = StateID::MAX.as_usize() + 1;
+
+ /// The zero state ID value.
+ pub const ZERO: StateID = StateID::new_unchecked(0);
+
+ /// The number of bytes that a single `StateID` uses in memory.
+ pub const SIZE: usize = core::mem::size_of::<StateID>();
+
+ /// Create a new state ID.
+ ///
+ /// If the given identifier exceeds [`StateID::MAX`], then this returns
+ /// an error.
+ #[inline]
+ pub fn new(id: usize) -> Result<StateID, StateIDError> {
+ StateID::try_from(id)
+ }
+
+ /// Create a new state ID without checking whether the given value
+ /// exceeds [`StateID::MAX`].
+ ///
+ /// While this is unchecked, providing an incorrect value must never
+ /// sacrifice memory safety, as documented above.
+ #[inline]
+ pub const fn new_unchecked(id: usize) -> StateID {
+ StateID(id as u32)
+ }
+
+ /// Like [`StateID::new`], but panics if the given ID is not valid.
+ #[inline]
+ pub fn must(id: usize) -> StateID {
+ StateID::new(id).unwrap()
+ }
+
+ /// Return this state ID as a `usize`.
+ #[inline]
+ pub const fn as_usize(&self) -> usize {
+ self.0 as usize
+ }
+
+ /// Return the internal u32 of this state ID.
+ #[inline]
+ pub const fn as_u32(&self) -> u32 {
+ self.0
+ }
+
+ /// Return the internal u32 of this pattern ID represented as an i32.
+ ///
+ /// This is guaranteed to never overflow an `i32`.
+ #[inline]
+ pub const fn as_i32(&self) -> i32 {
+ self.0 as i32
+ }
+
+ /// Returns one more than this state ID as a usize.
+ ///
+ /// Since a state ID has constraints on its maximum value, adding `1` to
+ /// it will always fit in a `usize` (and a `u32`).
+ #[inline]
+ pub fn one_more(&self) -> usize {
+ self.as_usize().checked_add(1).unwrap()
+ }
+
+ /// Decode this state ID from the bytes given using the native endian byte
+ /// order for the current target.
+ ///
+ /// If the decoded integer is not representable as a state ID for the
+ /// current target, then this returns an error.
+ #[inline]
+ pub fn from_ne_bytes(bytes: [u8; 4]) -> Result<StateID, StateIDError> {
+ let id = u32::from_ne_bytes(bytes);
+ if id > StateID::MAX.as_u32() {
+ return Err(StateIDError { attempted: id as u64 });
+ }
+ Ok(StateID::new_unchecked(id as usize))
+ }
+
+ /// Decode this state ID from the bytes given using the native endian
+ /// byte order for the current target.
+ ///
+ /// This is analogous to [`StateID::new_unchecked`] in that is does not
+ /// check whether the decoded integer is representable as a state ID.
+ #[inline]
+ pub fn from_ne_bytes_unchecked(bytes: [u8; 4]) -> StateID {
+ StateID::new_unchecked(u32::from_ne_bytes(bytes) as usize)
+ }
+
+ /// Return the underlying state ID integer as raw bytes in native endian
+ /// format.
+ #[inline]
+ pub fn to_ne_bytes(&self) -> [u8; 4] {
+ self.0.to_ne_bytes()
+ }
+
+ /// Returns an iterator over all state IDs from 0 up to and not including
+ /// the given length.
+ ///
+ /// If the given length exceeds [`StateID::LIMIT`], then this panics.
+ #[cfg(feature = "alloc")]
+ pub(crate) fn iter(len: usize) -> StateIDIter {
+ StateIDIter::new(len)
+ }
+}
+
+/// This error occurs when a state ID could not be constructed.
+///
+/// This occurs when given an integer exceeding the maximum state ID value.
+///
+/// When the `std` feature is enabled, this implements the `Error` trait.
+#[derive(Clone, Debug, Eq, PartialEq)]
+pub struct StateIDError {
+ attempted: u64,
+}
+
+impl StateIDError {
+ /// Returns the value that failed to constructed a state ID.
+ pub fn attempted(&self) -> u64 {
+ self.attempted
+ }
+}
+
+#[cfg(feature = "std")]
+impl std::error::Error for StateIDError {}
+
+impl core::fmt::Display for StateIDError {
+ fn fmt(&self, f: &mut core::fmt::Formatter) -> core::fmt::Result {
+ write!(
+ f,
+ "failed to create StateID from {:?}, which exceeds {:?}",
+ self.attempted(),
+ StateID::MAX,
+ )
+ }
+}
+
+/// A macro for defining exactly identical (modulo names) impls for ID types.
+macro_rules! impls {
+ ($ty:ident, $tyerr:ident, $tyiter:ident) => {
+ #[derive(Clone, Debug)]
+ pub(crate) struct $tyiter {
+ rng: ops::Range<usize>,
+ }
+
+ impl $tyiter {
+ #[cfg(feature = "alloc")]
+ fn new(len: usize) -> $tyiter {
+ assert!(
+ len <= $ty::LIMIT,
+ "cannot create iterator with IDs when number of \
+ elements exceed {:?}",
+ $ty::LIMIT,
+ );
+ $tyiter { rng: 0..len }
+ }
+ }
+
+ impl Iterator for $tyiter {
+ type Item = $ty;
+
+ fn next(&mut self) -> Option<$ty> {
+ if self.rng.start >= self.rng.end {
+ return None;
+ }
+ let next_id = self.rng.start + 1;
+ let id = mem::replace(&mut self.rng.start, next_id);
+ // new_unchecked is OK since we asserted that the number of
+ // elements in this iterator will fit in an ID at construction.
+ Some($ty::new_unchecked(id))
+ }
+ }
+
+ impl<T> core::ops::Index<$ty> for [T] {
+ type Output = T;
+
+ #[inline]
+ fn index(&self, index: $ty) -> &T {
+ &self[index.as_usize()]
+ }
+ }
+
+ impl<T> core::ops::IndexMut<$ty> for [T] {
+ #[inline]
+ fn index_mut(&mut self, index: $ty) -> &mut T {
+ &mut self[index.as_usize()]
+ }
+ }
+
+ #[cfg(feature = "alloc")]
+ impl<T> core::ops::Index<$ty> for Vec<T> {
+ type Output = T;
+
+ #[inline]
+ fn index(&self, index: $ty) -> &T {
+ &self[index.as_usize()]
+ }
+ }
+
+ #[cfg(feature = "alloc")]
+ impl<T> core::ops::IndexMut<$ty> for Vec<T> {
+ #[inline]
+ fn index_mut(&mut self, index: $ty) -> &mut T {
+ &mut self[index.as_usize()]
+ }
+ }
+
+ impl TryFrom<usize> for $ty {
+ type Error = $tyerr;
+
+ fn try_from(id: usize) -> Result<$ty, $tyerr> {
+ if id > $ty::MAX.as_usize() {
+ return Err($tyerr { attempted: id as u64 });
+ }
+ Ok($ty::new_unchecked(id))
+ }
+ }
+
+ impl TryFrom<u8> for $ty {
+ type Error = Infallible;
+
+ fn try_from(id: u8) -> Result<$ty, Infallible> {
+ Ok($ty::new_unchecked(id as usize))
+ }
+ }
+
+ impl TryFrom<u16> for $ty {
+ type Error = $tyerr;
+
+ fn try_from(id: u16) -> Result<$ty, $tyerr> {
+ if id as u32 > $ty::MAX.as_u32() {
+ return Err($tyerr { attempted: id as u64 });
+ }
+ Ok($ty::new_unchecked(id as usize))
+ }
+ }
+
+ impl TryFrom<u32> for $ty {
+ type Error = $tyerr;
+
+ fn try_from(id: u32) -> Result<$ty, $tyerr> {
+ if id > $ty::MAX.as_u32() {
+ return Err($tyerr { attempted: id as u64 });
+ }
+ Ok($ty::new_unchecked(id as usize))
+ }
+ }
+
+ impl TryFrom<u64> for $ty {
+ type Error = $tyerr;
+
+ fn try_from(id: u64) -> Result<$ty, $tyerr> {
+ if id > $ty::MAX.as_u32() as u64 {
+ return Err($tyerr { attempted: id });
+ }
+ Ok($ty::new_unchecked(id as usize))
+ }
+ }
+
+ #[cfg(test)]
+ impl quickcheck::Arbitrary for $ty {
+ fn arbitrary(gen: &mut quickcheck::Gen) -> $ty {
+ use core::cmp::max;
+
+ let id = max(i32::MIN + 1, i32::arbitrary(gen)).abs();
+ if id > $ty::MAX.as_i32() {
+ $ty::MAX
+ } else {
+ $ty::new(usize::try_from(id).unwrap()).unwrap()
+ }
+ }
+ }
+ };
+}
+
+impls!(PatternID, PatternIDError, PatternIDIter);
+impls!(StateID, StateIDError, StateIDIter);
+
+/// A utility trait that defines a couple of adapters for making it convenient
+/// to access indices as ID types. We require ExactSizeIterator so that
+/// iterator construction can do a single check to make sure the index of each
+/// element is representable by its ID type.
+#[cfg(feature = "alloc")]
+pub(crate) trait IteratorIDExt: Iterator {
+ fn with_pattern_ids(self) -> WithPatternIDIter<Self>
+ where
+ Self: Sized + ExactSizeIterator,
+ {
+ WithPatternIDIter::new(self)
+ }
+
+ fn with_state_ids(self) -> WithStateIDIter<Self>
+ where
+ Self: Sized + ExactSizeIterator,
+ {
+ WithStateIDIter::new(self)
+ }
+}
+
+#[cfg(feature = "alloc")]
+impl<I: Iterator> IteratorIDExt for I {}
+
+#[cfg(feature = "alloc")]
+macro_rules! iditer {
+ ($ty:ident, $iterty:ident, $withiterty:ident) => {
+ /// An iterator adapter that is like std::iter::Enumerate, but attaches
+ /// IDs. It requires ExactSizeIterator. At construction, it ensures
+ /// that the index of each element in the iterator is representable in
+ /// the corresponding ID type.
+ #[derive(Clone, Debug)]
+ pub(crate) struct $withiterty<I> {
+ it: I,
+ ids: $iterty,
+ }
+
+ impl<I: Iterator + ExactSizeIterator> $withiterty<I> {
+ fn new(it: I) -> $withiterty<I> {
+ let ids = $ty::iter(it.len());
+ $withiterty { it, ids }
+ }
+ }
+
+ impl<I: Iterator + ExactSizeIterator> Iterator for $withiterty<I> {
+ type Item = ($ty, I::Item);
+
+ fn next(&mut self) -> Option<($ty, I::Item)> {
+ let item = self.it.next()?;
+ // Number of elements in this iterator must match, according
+ // to contract of ExactSizeIterator.
+ let id = self.ids.next().unwrap();
+ Some((id, item))
+ }
+ }
+ };
+}
+
+#[cfg(feature = "alloc")]
+iditer!(PatternID, PatternIDIter, WithPatternIDIter);
+#[cfg(feature = "alloc")]
+iditer!(StateID, StateIDIter, WithStateIDIter);