summaryrefslogtreecommitdiffstats
path: root/third_party/rust/regex-automata/src/util/int.rs
diff options
context:
space:
mode:
Diffstat (limited to 'third_party/rust/regex-automata/src/util/int.rs')
-rw-r--r--third_party/rust/regex-automata/src/util/int.rs252
1 files changed, 252 insertions, 0 deletions
diff --git a/third_party/rust/regex-automata/src/util/int.rs b/third_party/rust/regex-automata/src/util/int.rs
new file mode 100644
index 0000000000..e6b13bff96
--- /dev/null
+++ b/third_party/rust/regex-automata/src/util/int.rs
@@ -0,0 +1,252 @@
+/*!
+This module provides several integer oriented traits for converting between
+both fixed size integers and integers whose size varies based on the target
+(like `usize`).
+
+The driving design principle of this module is to attempt to centralize as many
+`as` casts as possible here. And in particular, we separate casts into two
+buckets:
+
+* Casts that we use for their truncating behavior. In this case, we use more
+descriptive names, like `low_u32` and `high_u32`.
+* Casts that we use for converting back-and-forth between `usize`. These
+conversions are generally necessary because we often store indices in different
+formats to save on memory, which requires converting to and from `usize`. In
+this case, we very specifically do not want to overflow, and so the methods
+defined here will panic if the `as` cast would be lossy in debug mode. (A
+normal `as` cast will never panic!)
+
+For `as` casts between raw pointers, we use `cast`, so `as` isn't needed there.
+
+For regex engines, floating point is just never used, so we don't have to worry
+about `as` casts for those.
+
+Otherwise, this module pretty much covers all of our `as` needs except for one
+thing: const contexts. There are a select few places in this crate where we
+still need to use `as` because const functions on traits aren't stable yet.
+If we wind up significantly expanding our const footprint in this crate, it
+might be worth defining free functions to handle those cases. But at the time
+of writing, that just seemed like too much ceremony. Instead, I comment each
+such use of `as` in a const context with a "fixme" notice.
+
+NOTE: for simplicity, we don't take target pointer width into account here for
+`usize` conversions. Since we currently only panic in debug mode, skipping the
+check when it can be proven it isn't needed at compile time doesn't really
+matter. Now, if we wind up wanting to do as many checks as possible in release
+mode, then we would want to skip those when we know the conversions are always
+non-lossy.
+
+NOTE: this module isn't an exhaustive API. For example, we still use things
+like `u64::from` where possible, or even `usize::try_from()` for when we do
+explicitly want to panic or when we want to return an error for overflow.
+*/
+
+pub(crate) trait U8 {
+ fn as_usize(self) -> usize;
+}
+
+impl U8 for u8 {
+ fn as_usize(self) -> usize {
+ usize::from(self)
+ }
+}
+
+pub(crate) trait U16 {
+ fn as_usize(self) -> usize;
+ fn low_u8(self) -> u8;
+ fn high_u8(self) -> u8;
+}
+
+impl U16 for u16 {
+ fn as_usize(self) -> usize {
+ usize::from(self)
+ }
+
+ fn low_u8(self) -> u8 {
+ self as u8
+ }
+
+ fn high_u8(self) -> u8 {
+ (self >> 8) as u8
+ }
+}
+
+pub(crate) trait U32 {
+ fn as_usize(self) -> usize;
+ fn low_u8(self) -> u8;
+ fn low_u16(self) -> u16;
+ fn high_u16(self) -> u16;
+}
+
+impl U32 for u32 {
+ fn as_usize(self) -> usize {
+ #[cfg(debug_assertions)]
+ {
+ usize::try_from(self).expect("u32 overflowed usize")
+ }
+ #[cfg(not(debug_assertions))]
+ {
+ self as usize
+ }
+ }
+
+ fn low_u8(self) -> u8 {
+ self as u8
+ }
+
+ fn low_u16(self) -> u16 {
+ self as u16
+ }
+
+ fn high_u16(self) -> u16 {
+ (self >> 16) as u16
+ }
+}
+
+pub(crate) trait U64 {
+ fn as_usize(self) -> usize;
+ fn low_u8(self) -> u8;
+ fn low_u16(self) -> u16;
+ fn low_u32(self) -> u32;
+ fn high_u32(self) -> u32;
+}
+
+impl U64 for u64 {
+ fn as_usize(self) -> usize {
+ #[cfg(debug_assertions)]
+ {
+ usize::try_from(self).expect("u64 overflowed usize")
+ }
+ #[cfg(not(debug_assertions))]
+ {
+ self as usize
+ }
+ }
+
+ fn low_u8(self) -> u8 {
+ self as u8
+ }
+
+ fn low_u16(self) -> u16 {
+ self as u16
+ }
+
+ fn low_u32(self) -> u32 {
+ self as u32
+ }
+
+ fn high_u32(self) -> u32 {
+ (self >> 32) as u32
+ }
+}
+
+pub(crate) trait I32 {
+ fn as_usize(self) -> usize;
+ fn to_bits(self) -> u32;
+ fn from_bits(n: u32) -> i32;
+}
+
+impl I32 for i32 {
+ fn as_usize(self) -> usize {
+ #[cfg(debug_assertions)]
+ {
+ usize::try_from(self).expect("i32 overflowed usize")
+ }
+ #[cfg(not(debug_assertions))]
+ {
+ self as usize
+ }
+ }
+
+ fn to_bits(self) -> u32 {
+ self as u32
+ }
+
+ fn from_bits(n: u32) -> i32 {
+ n as i32
+ }
+}
+
+pub(crate) trait Usize {
+ fn as_u8(self) -> u8;
+ fn as_u16(self) -> u16;
+ fn as_u32(self) -> u32;
+ fn as_u64(self) -> u64;
+}
+
+impl Usize for usize {
+ fn as_u8(self) -> u8 {
+ #[cfg(debug_assertions)]
+ {
+ u8::try_from(self).expect("usize overflowed u8")
+ }
+ #[cfg(not(debug_assertions))]
+ {
+ self as u8
+ }
+ }
+
+ fn as_u16(self) -> u16 {
+ #[cfg(debug_assertions)]
+ {
+ u16::try_from(self).expect("usize overflowed u16")
+ }
+ #[cfg(not(debug_assertions))]
+ {
+ self as u16
+ }
+ }
+
+ fn as_u32(self) -> u32 {
+ #[cfg(debug_assertions)]
+ {
+ u32::try_from(self).expect("usize overflowed u32")
+ }
+ #[cfg(not(debug_assertions))]
+ {
+ self as u32
+ }
+ }
+
+ fn as_u64(self) -> u64 {
+ #[cfg(debug_assertions)]
+ {
+ u64::try_from(self).expect("usize overflowed u64")
+ }
+ #[cfg(not(debug_assertions))]
+ {
+ self as u64
+ }
+ }
+}
+
+// Pointers aren't integers, but we convert pointers to integers to perform
+// offset arithmetic in some places. (And no, we don't convert the integers
+// back to pointers.) So add 'as_usize' conversions here too for completeness.
+//
+// These 'as' casts are actually okay because they're always non-lossy. But the
+// idea here is to just try and remove as much 'as' as possible, particularly
+// in this crate where we are being really paranoid about offsets and making
+// sure we don't panic on inputs that might be untrusted. This way, the 'as'
+// casts become easier to audit if they're all in one place, even when some of
+// them are actually okay 100% of the time.
+
+pub(crate) trait Pointer {
+ fn as_usize(self) -> usize;
+}
+
+impl<T> Pointer for *const T {
+ fn as_usize(self) -> usize {
+ self as usize
+ }
+}
+
+pub(crate) trait PointerMut {
+ fn as_usize(self) -> usize;
+}
+
+impl<T> PointerMut for *mut T {
+ fn as_usize(self) -> usize {
+ self as usize
+ }
+}