summaryrefslogtreecommitdiffstats
path: root/vendor/regex-automata/src/util/id.rs
diff options
context:
space:
mode:
Diffstat (limited to 'vendor/regex-automata/src/util/id.rs')
-rw-r--r--vendor/regex-automata/src/util/id.rs608
1 files changed, 0 insertions, 608 deletions
diff --git a/vendor/regex-automata/src/util/id.rs b/vendor/regex-automata/src/util/id.rs
deleted file mode 100644
index 70bf0a93b..000000000
--- a/vendor/regex-automata/src/util/id.rs
+++ /dev/null
@@ -1,608 +0,0 @@
-/*!
-Type definitions for identifier types.
-
-A [`StateID`] represents the possible set of identifiers used in regex engine
-implementations in this crate. For example, they are used to identify both NFA
-and DFA states.
-
-A [`PatternID`] represents the possible set of identifiers for patterns. All
-regex engine implementations in this crate support searching for multiple
-patterns simultaneously. A `PatternID` is how each pattern is uniquely
-identified for a particular instance of a regex engine. Namely, a pattern is
-assigned an auto-incrementing integer, starting at `0`, based on the order of
-patterns supplied during the construction of the regex engine.
-
-These identifier types represent a way for this crate to make correctness
-guarantees around the possible set of values that a `StateID` or a `PatternID`
-might represent. Similarly, they also provide a way of constraining the size of
-these identifiers to reduce space usage while still guaranteeing that all such
-identifiers are repsentable by a `usize` for the current target.
-
-Moreover, the identifier types clamp the range of permissible values to a range
-that is typically smaller than its internal representation. (With the maximum
-value being, e.g., `StateID::MAX`.) Users of these types may not rely this
-clamping for the purpose of memory safety. Users may, however, rely on these
-invariants to avoid panics or other types of logic bugs.
-*/
-
-// Continuing from the above comment about correctness guarantees, an example
-// of a way in which we use the guarantees on these types is delta encoding.
-// Namely, we require that IDs can be at most 2^31 - 2, which means the
-// difference between any two IDs is always representable as an i32.
-
-use core::{
- convert::{Infallible, TryFrom},
- mem, ops,
-};
-
-#[cfg(feature = "alloc")]
-use alloc::vec::Vec;
-
-/// An identifier for a regex pattern.
-///
-/// The identifier for a pattern corresponds to its relative position among
-/// other patterns in a single finite state machine. Namely, when building
-/// a multi-pattern regex engine, one must supply a sequence of patterns to
-/// match. The position (starting at 0) of each pattern in that sequence
-/// represents its identifier. This identifier is in turn used to identify and
-/// report matches of that pattern in various APIs.
-///
-/// A pattern ID is guaranteed to be representable by a `usize`. Similarly,
-/// the number of patterns in any regex engine in this crate is guaranteed to
-/// be representable by a `usize`. This applies to regex engines that have
-/// been deserialized; a deserialization error will be returned if it contains
-/// pattern IDs that violate these requirements in your current environment.
-///
-/// For extra convenience in some cases, this type also guarantees that all
-/// IDs can fit into an `i32` and an `isize` without overflowing.
-///
-/// # Representation
-///
-/// This type is always represented internally by a `u32` and is marked as
-/// `repr(transparent)`. Thus, this type always has the same representation as
-/// a `u32`.
-///
-/// # Indexing
-///
-/// For convenience, callers may use a `PatternID` to index slices.
-///
-/// # Safety
-///
-/// While a `PatternID` is meant to guarantee that its value fits into `usize`
-/// (while using a possibly smaller representation than `usize` on some
-/// targets), callers must not rely on this property for safety. Callers may
-/// choose to rely on this property for correctness however.
-#[repr(transparent)]
-#[derive(
- Clone, Copy, Debug, Default, Eq, Hash, PartialEq, PartialOrd, Ord,
-)]
-pub struct PatternID(u32);
-
-impl PatternID {
- /// The maximum pattern ID value, represented as a `usize`.
- #[cfg(any(target_pointer_width = "32", target_pointer_width = "64"))]
- pub const MAX: PatternID =
- PatternID::new_unchecked(core::i32::MAX as usize - 1);
-
- /// The maximum pattern ID value, represented as a `usize`.
- #[cfg(target_pointer_width = "16")]
- pub const MAX: PatternID = PatternID::new_unchecked(core::isize::MAX - 1);
-
- /// The total number of patterns that are allowed in any single regex
- /// engine.
- pub const LIMIT: usize = PatternID::MAX.as_usize() + 1;
-
- /// The zero pattern ID value.
- pub const ZERO: PatternID = PatternID::new_unchecked(0);
-
- /// The number of bytes that a single `PatternID` uses in memory.
- pub const SIZE: usize = core::mem::size_of::<PatternID>();
-
- /// Create a new pattern ID.
- ///
- /// If the given identifier exceeds [`PatternID::MAX`], then this returns
- /// an error.
- #[inline]
- pub fn new(id: usize) -> Result<PatternID, PatternIDError> {
- PatternID::try_from(id)
- }
-
- /// Create a new pattern ID without checking whether the given value
- /// exceeds [`PatternID::MAX`].
- ///
- /// While this is unchecked, providing an incorrect value must never
- /// sacrifice memory safety, as documented above.
- #[inline]
- pub const fn new_unchecked(id: usize) -> PatternID {
- PatternID(id as u32)
- }
-
- /// Like [`PatternID::new`], but panics if the given ID is not valid.
- #[inline]
- pub fn must(id: usize) -> PatternID {
- PatternID::new(id).unwrap()
- }
-
- /// Return this pattern ID as a `usize`.
- #[inline]
- pub const fn as_usize(&self) -> usize {
- self.0 as usize
- }
-
- /// Return the internal u32 of this pattern ID.
- #[inline]
- pub const fn as_u32(&self) -> u32 {
- self.0
- }
-
- /// Return the internal u32 of this pattern ID represented as an i32.
- ///
- /// This is guaranteed to never overflow an `i32`.
- #[inline]
- pub const fn as_i32(&self) -> i32 {
- self.0 as i32
- }
-
- /// Returns one more than this pattern ID as a usize.
- ///
- /// Since a pattern ID has constraints on its maximum value, adding `1` to
- /// it will always fit in a `usize` (and a `u32`).
- #[inline]
- pub fn one_more(&self) -> usize {
- self.as_usize().checked_add(1).unwrap()
- }
-
- /// Decode this pattern ID from the bytes given using the native endian
- /// byte order for the current target.
- ///
- /// If the decoded integer is not representable as a pattern ID for the
- /// current target, then this returns an error.
- #[inline]
- pub fn from_ne_bytes(bytes: [u8; 4]) -> Result<PatternID, PatternIDError> {
- let id = u32::from_ne_bytes(bytes);
- if id > PatternID::MAX.as_u32() {
- return Err(PatternIDError { attempted: id as u64 });
- }
- Ok(PatternID::new_unchecked(id as usize))
- }
-
- /// Decode this pattern ID from the bytes given using the native endian
- /// byte order for the current target.
- ///
- /// This is analogous to [`PatternID::new_unchecked`] in that is does not
- /// check whether the decoded integer is representable as a pattern ID.
- #[inline]
- pub fn from_ne_bytes_unchecked(bytes: [u8; 4]) -> PatternID {
- PatternID::new_unchecked(u32::from_ne_bytes(bytes) as usize)
- }
-
- /// Return the underlying pattern ID integer as raw bytes in native endian
- /// format.
- #[inline]
- pub fn to_ne_bytes(&self) -> [u8; 4] {
- self.0.to_ne_bytes()
- }
-
- /// Returns an iterator over all pattern IDs from 0 up to and not including
- /// the given length.
- ///
- /// If the given length exceeds [`PatternID::LIMIT`], then this panics.
- #[cfg(feature = "alloc")]
- pub(crate) fn iter(len: usize) -> PatternIDIter {
- PatternIDIter::new(len)
- }
-}
-
-/// This error occurs when a pattern ID could not be constructed.
-///
-/// This occurs when given an integer exceeding the maximum pattern ID value.
-///
-/// When the `std` feature is enabled, this implements the `Error` trait.
-#[derive(Clone, Debug, Eq, PartialEq)]
-pub struct PatternIDError {
- attempted: u64,
-}
-
-impl PatternIDError {
- /// Returns the value that failed to constructed a pattern ID.
- pub fn attempted(&self) -> u64 {
- self.attempted
- }
-}
-
-#[cfg(feature = "std")]
-impl std::error::Error for PatternIDError {}
-
-impl core::fmt::Display for PatternIDError {
- fn fmt(&self, f: &mut core::fmt::Formatter) -> core::fmt::Result {
- write!(
- f,
- "failed to create PatternID from {:?}, which exceeds {:?}",
- self.attempted(),
- PatternID::MAX,
- )
- }
-}
-
-/// An identifier for a state in a regex engine.
-///
-/// A state ID is guaranteed to be representable by a `usize`. Similarly, the
-/// number of states in any regex engine in this crate is guaranteed to be
-/// representable by a `usize`. This applies to regex engines that have been
-/// deserialized; a deserialization error will be returned if it contains state
-/// IDs that violate these requirements in your current environment.
-///
-/// For extra convenience in some cases, this type also guarantees that all
-/// IDs can fit into an `i32` and an `isize` without overflowing.
-///
-/// # Representation
-///
-/// This type is always represented internally by a `u32` and is marked as
-/// `repr(transparent)`. Thus, this type always has the same representation as
-/// a `u32`.
-///
-/// # Indexing
-///
-/// For convenience, callers may use a `StateID` to index slices.
-///
-/// # Safety
-///
-/// While a `StateID` is meant to guarantee that its value fits into `usize`
-/// (while using a possibly smaller representation than `usize` on some
-/// targets), callers must not rely on this property for safety. Callers may
-/// choose to rely on this property for correctness however.
-#[repr(transparent)]
-#[derive(
- Clone, Copy, Debug, Default, Eq, Hash, PartialEq, PartialOrd, Ord,
-)]
-pub struct StateID(u32);
-
-impl StateID {
- /// The maximum state ID value.
- #[cfg(any(target_pointer_width = "32", target_pointer_width = "64"))]
- pub const MAX: StateID =
- StateID::new_unchecked(core::i32::MAX as usize - 1);
-
- /// The maximum state ID value.
- #[cfg(target_pointer_width = "16")]
- pub const MAX: StateID = StateID::new_unchecked(core::isize::MAX - 1);
-
- /// The total number of states that are allowed in any single regex
- /// engine, represented as a `usize`.
- pub const LIMIT: usize = StateID::MAX.as_usize() + 1;
-
- /// The zero state ID value.
- pub const ZERO: StateID = StateID::new_unchecked(0);
-
- /// The number of bytes that a single `StateID` uses in memory.
- pub const SIZE: usize = core::mem::size_of::<StateID>();
-
- /// Create a new state ID.
- ///
- /// If the given identifier exceeds [`StateID::MAX`], then this returns
- /// an error.
- #[inline]
- pub fn new(id: usize) -> Result<StateID, StateIDError> {
- StateID::try_from(id)
- }
-
- /// Create a new state ID without checking whether the given value
- /// exceeds [`StateID::MAX`].
- ///
- /// While this is unchecked, providing an incorrect value must never
- /// sacrifice memory safety, as documented above.
- #[inline]
- pub const fn new_unchecked(id: usize) -> StateID {
- StateID(id as u32)
- }
-
- /// Like [`StateID::new`], but panics if the given ID is not valid.
- #[inline]
- pub fn must(id: usize) -> StateID {
- StateID::new(id).unwrap()
- }
-
- /// Return this state ID as a `usize`.
- #[inline]
- pub const fn as_usize(&self) -> usize {
- self.0 as usize
- }
-
- /// Return the internal u32 of this state ID.
- #[inline]
- pub const fn as_u32(&self) -> u32 {
- self.0
- }
-
- /// Return the internal u32 of this pattern ID represented as an i32.
- ///
- /// This is guaranteed to never overflow an `i32`.
- #[inline]
- pub const fn as_i32(&self) -> i32 {
- self.0 as i32
- }
-
- /// Returns one more than this state ID as a usize.
- ///
- /// Since a state ID has constraints on its maximum value, adding `1` to
- /// it will always fit in a `usize` (and a `u32`).
- #[inline]
- pub fn one_more(&self) -> usize {
- self.as_usize().checked_add(1).unwrap()
- }
-
- /// Decode this state ID from the bytes given using the native endian byte
- /// order for the current target.
- ///
- /// If the decoded integer is not representable as a state ID for the
- /// current target, then this returns an error.
- #[inline]
- pub fn from_ne_bytes(bytes: [u8; 4]) -> Result<StateID, StateIDError> {
- let id = u32::from_ne_bytes(bytes);
- if id > StateID::MAX.as_u32() {
- return Err(StateIDError { attempted: id as u64 });
- }
- Ok(StateID::new_unchecked(id as usize))
- }
-
- /// Decode this state ID from the bytes given using the native endian
- /// byte order for the current target.
- ///
- /// This is analogous to [`StateID::new_unchecked`] in that is does not
- /// check whether the decoded integer is representable as a state ID.
- #[inline]
- pub fn from_ne_bytes_unchecked(bytes: [u8; 4]) -> StateID {
- StateID::new_unchecked(u32::from_ne_bytes(bytes) as usize)
- }
-
- /// Return the underlying state ID integer as raw bytes in native endian
- /// format.
- #[inline]
- pub fn to_ne_bytes(&self) -> [u8; 4] {
- self.0.to_ne_bytes()
- }
-
- /// Returns an iterator over all state IDs from 0 up to and not including
- /// the given length.
- ///
- /// If the given length exceeds [`StateID::LIMIT`], then this panics.
- #[cfg(feature = "alloc")]
- pub(crate) fn iter(len: usize) -> StateIDIter {
- StateIDIter::new(len)
- }
-}
-
-/// This error occurs when a state ID could not be constructed.
-///
-/// This occurs when given an integer exceeding the maximum state ID value.
-///
-/// When the `std` feature is enabled, this implements the `Error` trait.
-#[derive(Clone, Debug, Eq, PartialEq)]
-pub struct StateIDError {
- attempted: u64,
-}
-
-impl StateIDError {
- /// Returns the value that failed to constructed a state ID.
- pub fn attempted(&self) -> u64 {
- self.attempted
- }
-}
-
-#[cfg(feature = "std")]
-impl std::error::Error for StateIDError {}
-
-impl core::fmt::Display for StateIDError {
- fn fmt(&self, f: &mut core::fmt::Formatter) -> core::fmt::Result {
- write!(
- f,
- "failed to create StateID from {:?}, which exceeds {:?}",
- self.attempted(),
- StateID::MAX,
- )
- }
-}
-
-/// A macro for defining exactly identical (modulo names) impls for ID types.
-macro_rules! impls {
- ($ty:ident, $tyerr:ident, $tyiter:ident) => {
- #[derive(Clone, Debug)]
- pub(crate) struct $tyiter {
- rng: ops::Range<usize>,
- }
-
- impl $tyiter {
- #[cfg(feature = "alloc")]
- fn new(len: usize) -> $tyiter {
- assert!(
- len <= $ty::LIMIT,
- "cannot create iterator with IDs when number of \
- elements exceed {:?}",
- $ty::LIMIT,
- );
- $tyiter { rng: 0..len }
- }
- }
-
- impl Iterator for $tyiter {
- type Item = $ty;
-
- fn next(&mut self) -> Option<$ty> {
- if self.rng.start >= self.rng.end {
- return None;
- }
- let next_id = self.rng.start + 1;
- let id = mem::replace(&mut self.rng.start, next_id);
- // new_unchecked is OK since we asserted that the number of
- // elements in this iterator will fit in an ID at construction.
- Some($ty::new_unchecked(id))
- }
- }
-
- impl<T> core::ops::Index<$ty> for [T] {
- type Output = T;
-
- #[inline]
- fn index(&self, index: $ty) -> &T {
- &self[index.as_usize()]
- }
- }
-
- impl<T> core::ops::IndexMut<$ty> for [T] {
- #[inline]
- fn index_mut(&mut self, index: $ty) -> &mut T {
- &mut self[index.as_usize()]
- }
- }
-
- #[cfg(feature = "alloc")]
- impl<T> core::ops::Index<$ty> for Vec<T> {
- type Output = T;
-
- #[inline]
- fn index(&self, index: $ty) -> &T {
- &self[index.as_usize()]
- }
- }
-
- #[cfg(feature = "alloc")]
- impl<T> core::ops::IndexMut<$ty> for Vec<T> {
- #[inline]
- fn index_mut(&mut self, index: $ty) -> &mut T {
- &mut self[index.as_usize()]
- }
- }
-
- impl TryFrom<usize> for $ty {
- type Error = $tyerr;
-
- fn try_from(id: usize) -> Result<$ty, $tyerr> {
- if id > $ty::MAX.as_usize() {
- return Err($tyerr { attempted: id as u64 });
- }
- Ok($ty::new_unchecked(id))
- }
- }
-
- impl TryFrom<u8> for $ty {
- type Error = Infallible;
-
- fn try_from(id: u8) -> Result<$ty, Infallible> {
- Ok($ty::new_unchecked(id as usize))
- }
- }
-
- impl TryFrom<u16> for $ty {
- type Error = $tyerr;
-
- fn try_from(id: u16) -> Result<$ty, $tyerr> {
- if id as u32 > $ty::MAX.as_u32() {
- return Err($tyerr { attempted: id as u64 });
- }
- Ok($ty::new_unchecked(id as usize))
- }
- }
-
- impl TryFrom<u32> for $ty {
- type Error = $tyerr;
-
- fn try_from(id: u32) -> Result<$ty, $tyerr> {
- if id > $ty::MAX.as_u32() {
- return Err($tyerr { attempted: id as u64 });
- }
- Ok($ty::new_unchecked(id as usize))
- }
- }
-
- impl TryFrom<u64> for $ty {
- type Error = $tyerr;
-
- fn try_from(id: u64) -> Result<$ty, $tyerr> {
- if id > $ty::MAX.as_u32() as u64 {
- return Err($tyerr { attempted: id });
- }
- Ok($ty::new_unchecked(id as usize))
- }
- }
-
- #[cfg(test)]
- impl quickcheck::Arbitrary for $ty {
- fn arbitrary(gen: &mut quickcheck::Gen) -> $ty {
- use core::cmp::max;
-
- let id = max(i32::MIN + 1, i32::arbitrary(gen)).abs();
- if id > $ty::MAX.as_i32() {
- $ty::MAX
- } else {
- $ty::new(usize::try_from(id).unwrap()).unwrap()
- }
- }
- }
- };
-}
-
-impls!(PatternID, PatternIDError, PatternIDIter);
-impls!(StateID, StateIDError, StateIDIter);
-
-/// A utility trait that defines a couple of adapters for making it convenient
-/// to access indices as ID types. We require ExactSizeIterator so that
-/// iterator construction can do a single check to make sure the index of each
-/// element is representable by its ID type.
-#[cfg(feature = "alloc")]
-pub(crate) trait IteratorIDExt: Iterator {
- fn with_pattern_ids(self) -> WithPatternIDIter<Self>
- where
- Self: Sized + ExactSizeIterator,
- {
- WithPatternIDIter::new(self)
- }
-
- fn with_state_ids(self) -> WithStateIDIter<Self>
- where
- Self: Sized + ExactSizeIterator,
- {
- WithStateIDIter::new(self)
- }
-}
-
-#[cfg(feature = "alloc")]
-impl<I: Iterator> IteratorIDExt for I {}
-
-#[cfg(feature = "alloc")]
-macro_rules! iditer {
- ($ty:ident, $iterty:ident, $withiterty:ident) => {
- /// An iterator adapter that is like std::iter::Enumerate, but attaches
- /// IDs. It requires ExactSizeIterator. At construction, it ensures
- /// that the index of each element in the iterator is representable in
- /// the corresponding ID type.
- #[derive(Clone, Debug)]
- pub(crate) struct $withiterty<I> {
- it: I,
- ids: $iterty,
- }
-
- impl<I: Iterator + ExactSizeIterator> $withiterty<I> {
- fn new(it: I) -> $withiterty<I> {
- let ids = $ty::iter(it.len());
- $withiterty { it, ids }
- }
- }
-
- impl<I: Iterator + ExactSizeIterator> Iterator for $withiterty<I> {
- type Item = ($ty, I::Item);
-
- fn next(&mut self) -> Option<($ty, I::Item)> {
- let item = self.it.next()?;
- // Number of elements in this iterator must match, according
- // to contract of ExactSizeIterator.
- let id = self.ids.next().unwrap();
- Some((id, item))
- }
- }
- };
-}
-
-#[cfg(feature = "alloc")]
-iditer!(PatternID, PatternIDIter, WithPatternIDIter);
-#[cfg(feature = "alloc")]
-iditer!(StateID, StateIDIter, WithStateIDIter);