From 698f8c2f01ea549d77d7dc3338a12e04c11057b9 Mon Sep 17 00:00:00 2001 From: Daniel Baumann Date: Wed, 17 Apr 2024 14:02:58 +0200 Subject: Adding upstream version 1.64.0+dfsg1. Signed-off-by: Daniel Baumann --- vendor/regex-automata/src/state_id.rs | 291 ++++++++++++++++++++++++++++++++++ 1 file changed, 291 insertions(+) create mode 100644 vendor/regex-automata/src/state_id.rs (limited to 'vendor/regex-automata/src/state_id.rs') diff --git a/vendor/regex-automata/src/state_id.rs b/vendor/regex-automata/src/state_id.rs new file mode 100644 index 000000000..c9bac1941 --- /dev/null +++ b/vendor/regex-automata/src/state_id.rs @@ -0,0 +1,291 @@ +use core::fmt::Debug; +use core::hash::Hash; +use core::mem::size_of; + +use byteorder::{ByteOrder, NativeEndian}; + +#[cfg(feature = "std")] +pub use self::std::*; + +#[cfg(feature = "std")] +mod std { + use byteorder::ByteOrder; + use core::mem::size_of; + use error::{Error, Result}; + + use super::StateID; + + /// Check that the premultiplication of the given state identifier can + /// fit into the representation indicated by `S`. If it cannot, or if it + /// overflows `usize` itself, then an error is returned. + pub fn premultiply_overflow_error( + last_state: S, + alphabet_len: usize, + ) -> Result<()> { + let requested = match last_state.to_usize().checked_mul(alphabet_len) { + Some(requested) => requested, + None => return Err(Error::premultiply_overflow(0, 0)), + }; + if requested > S::max_id() { + return Err(Error::premultiply_overflow(S::max_id(), requested)); + } + Ok(()) + } + + /// Allocate the next sequential identifier for a fresh state given + /// the previously constructed state identified by `current`. If the + /// next sequential identifier would overflow `usize` or the chosen + /// representation indicated by `S`, then an error is returned. + pub fn next_state_id(current: S) -> Result { + let next = match current.to_usize().checked_add(1) { + Some(next) => next, + None => return Err(Error::state_id_overflow(::std::usize::MAX)), + }; + if next > S::max_id() { + return Err(Error::state_id_overflow(S::max_id())); + } + Ok(S::from_usize(next)) + } + + /// Convert the given `usize` to the chosen state identifier + /// representation. If the given value cannot fit in the chosen + /// representation, then an error is returned. + pub fn usize_to_state_id(value: usize) -> Result { + if value > S::max_id() { + Err(Error::state_id_overflow(S::max_id())) + } else { + Ok(S::from_usize(value)) + } + } + + /// Write the given identifier to the given slice of bytes using the + /// specified endianness. The given slice must have length at least + /// `size_of::()`. + /// + /// The given state identifier representation must have size 1, 2, 4 or 8. + pub fn write_state_id_bytes( + slice: &mut [u8], + id: S, + ) { + assert!( + 1 == size_of::() + || 2 == size_of::() + || 4 == size_of::() + || 8 == size_of::() + ); + + match size_of::() { + 1 => slice[0] = id.to_usize() as u8, + 2 => E::write_u16(slice, id.to_usize() as u16), + 4 => E::write_u32(slice, id.to_usize() as u32), + 8 => E::write_u64(slice, id.to_usize() as u64), + _ => unreachable!(), + } + } +} + +/// Return the unique identifier for a DFA's dead state in the chosen +/// representation indicated by `S`. +pub fn dead_id() -> S { + S::from_usize(0) +} + +/// A trait describing the representation of a DFA's state identifier. +/// +/// The purpose of this trait is to safely express both the possible state +/// identifier representations that can be used in a DFA and to convert between +/// state identifier representations and types that can be used to efficiently +/// index memory (such as `usize`). +/// +/// In general, one should not need to implement this trait explicitly. In +/// particular, this crate provides implementations for `u8`, `u16`, `u32`, +/// `u64` and `usize`. (`u32` and `u64` are only provided for targets that can +/// represent all corresponding values in a `usize`.) +/// +/// # Safety +/// +/// This trait is unsafe because the correctness of its implementations may be +/// relied upon by other unsafe code. For example, one possible way to +/// implement this trait incorrectly would be to return a maximum identifier +/// in `max_id` that is greater than the real maximum identifier. This will +/// likely result in wrap-on-overflow semantics in release mode, which can in +/// turn produce incorrect state identifiers. Those state identifiers may then +/// in turn access out-of-bounds memory in a DFA's search routine, where bounds +/// checks are explicitly elided for performance reasons. +pub unsafe trait StateID: + Clone + Copy + Debug + Eq + Hash + PartialEq + PartialOrd + Ord +{ + /// Convert from a `usize` to this implementation's representation. + /// + /// Implementors may assume that `n <= Self::max_id`. That is, implementors + /// do not need to check whether `n` can fit inside this implementation's + /// representation. + fn from_usize(n: usize) -> Self; + + /// Convert this implementation's representation to a `usize`. + /// + /// Implementors must not return a `usize` value greater than + /// `Self::max_id` and must not permit overflow when converting between the + /// implementor's representation and `usize`. In general, the preferred + /// way for implementors to achieve this is to simply not provide + /// implementations of `StateID` that cannot fit into the target platform's + /// `usize`. + fn to_usize(self) -> usize; + + /// Return the maximum state identifier supported by this representation. + /// + /// Implementors must return a correct bound. Doing otherwise may result + /// in memory unsafety. + fn max_id() -> usize; + + /// Read a single state identifier from the given slice of bytes in native + /// endian format. + /// + /// Implementors may assume that the given slice has length at least + /// `size_of::()`. + fn read_bytes(slice: &[u8]) -> Self; + + /// Write this state identifier to the given slice of bytes in native + /// endian format. + /// + /// Implementors may assume that the given slice has length at least + /// `size_of::()`. + fn write_bytes(self, slice: &mut [u8]); +} + +unsafe impl StateID for usize { + #[inline] + fn from_usize(n: usize) -> usize { + n + } + + #[inline] + fn to_usize(self) -> usize { + self + } + + #[inline] + fn max_id() -> usize { + ::core::usize::MAX + } + + #[inline] + fn read_bytes(slice: &[u8]) -> Self { + NativeEndian::read_uint(slice, size_of::()) as usize + } + + #[inline] + fn write_bytes(self, slice: &mut [u8]) { + NativeEndian::write_uint(slice, self as u64, size_of::()) + } +} + +unsafe impl StateID for u8 { + #[inline] + fn from_usize(n: usize) -> u8 { + n as u8 + } + + #[inline] + fn to_usize(self) -> usize { + self as usize + } + + #[inline] + fn max_id() -> usize { + ::core::u8::MAX as usize + } + + #[inline] + fn read_bytes(slice: &[u8]) -> Self { + slice[0] + } + + #[inline] + fn write_bytes(self, slice: &mut [u8]) { + slice[0] = self; + } +} + +unsafe impl StateID for u16 { + #[inline] + fn from_usize(n: usize) -> u16 { + n as u16 + } + + #[inline] + fn to_usize(self) -> usize { + self as usize + } + + #[inline] + fn max_id() -> usize { + ::core::u16::MAX as usize + } + + #[inline] + fn read_bytes(slice: &[u8]) -> Self { + NativeEndian::read_u16(slice) + } + + #[inline] + fn write_bytes(self, slice: &mut [u8]) { + NativeEndian::write_u16(slice, self) + } +} + +#[cfg(any(target_pointer_width = "32", target_pointer_width = "64"))] +unsafe impl StateID for u32 { + #[inline] + fn from_usize(n: usize) -> u32 { + n as u32 + } + + #[inline] + fn to_usize(self) -> usize { + self as usize + } + + #[inline] + fn max_id() -> usize { + ::core::u32::MAX as usize + } + + #[inline] + fn read_bytes(slice: &[u8]) -> Self { + NativeEndian::read_u32(slice) + } + + #[inline] + fn write_bytes(self, slice: &mut [u8]) { + NativeEndian::write_u32(slice, self) + } +} + +#[cfg(target_pointer_width = "64")] +unsafe impl StateID for u64 { + #[inline] + fn from_usize(n: usize) -> u64 { + n as u64 + } + + #[inline] + fn to_usize(self) -> usize { + self as usize + } + + #[inline] + fn max_id() -> usize { + ::core::u64::MAX as usize + } + + #[inline] + fn read_bytes(slice: &[u8]) -> Self { + NativeEndian::read_u64(slice) + } + + #[inline] + fn write_bytes(self, slice: &mut [u8]) { + NativeEndian::write_u64(slice, self) + } +} -- cgit v1.2.3