diff options
Diffstat (limited to 'third_party/rust/litrs/src/integer/mod.rs')
-rw-r--r-- | third_party/rust/litrs/src/integer/mod.rs | 356 |
1 files changed, 356 insertions, 0 deletions
diff --git a/third_party/rust/litrs/src/integer/mod.rs b/third_party/rust/litrs/src/integer/mod.rs new file mode 100644 index 0000000000..cecd79d3fb --- /dev/null +++ b/third_party/rust/litrs/src/integer/mod.rs @@ -0,0 +1,356 @@ +use std::{fmt, str::FromStr}; + +use crate::{ + Buffer, ParseError, + err::{perr, ParseErrorKind::*}, + parse::{first_byte_or_empty, hex_digit_value, check_suffix}, +}; + + +/// An integer literal, e.g. `27`, `0x7F`, `0b101010u8` or `5_000_000i64`. +/// +/// An integer literal consists of an optional base prefix (`0b`, `0o`, `0x`), +/// the main part (digits and underscores), and an optional type suffix +/// (e.g. `u64` or `i8`). See [the reference][ref] for more information. +/// +/// Note that integer literals are always positive: the grammar does not contain +/// the minus sign at all. The minus sign is just the unary negate operator, +/// not part of the literal. Which is interesting for cases like `- 128i8`: +/// here, the literal itself would overflow the specified type (`i8` cannot +/// represent 128). That's why in rustc, the literal overflow check is +/// performed as a lint after parsing, not during the lexing stage. Similarly, +/// [`IntegerLit::parse`] does not perform an overflow check. +/// +/// [ref]: https://doc.rust-lang.org/reference/tokens.html#integer-literals +#[derive(Debug, Clone, Copy, PartialEq, Eq)] +#[non_exhaustive] +pub struct IntegerLit<B: Buffer> { + /// The raw literal. Grammar: `<prefix?><main part><suffix?>`. + raw: B, + /// First index of the main number part (after the base prefix). + start_main_part: usize, + /// First index not part of the main number part. + end_main_part: usize, + /// Parsed `raw[..start_main_part]`. + base: IntegerBase, +} + +impl<B: Buffer> IntegerLit<B> { + /// Parses the input as an integer literal. Returns an error if the input is + /// invalid or represents a different kind of literal. + pub fn parse(input: B) -> Result<Self, ParseError> { + match first_byte_or_empty(&input)? { + digit @ b'0'..=b'9' => { + // TODO: simplify once RFC 2528 is stabilized + let IntegerLit { + start_main_part, + end_main_part, + base, + .. + } = parse_impl(&input, digit)?; + + Ok(Self { raw: input, start_main_part, end_main_part, base }) + }, + _ => Err(perr(0, DoesNotStartWithDigit)), + } + } + + /// Performs the actual string to int conversion to obtain the integer + /// value. The optional type suffix of the literal **is ignored by this + /// method**. This means `N` does not need to match the type suffix! + /// + /// Returns `None` if the literal overflows `N`. + /// + /// Hint: `u128` can represent all possible values integer literal values, + /// as there are no negative literals (see type docs). Thus you can, for + /// example, safely use `lit.value::<u128>().to_string()` to get a decimal + /// string. (Technically, Rust integer literals can represent arbitrarily + /// large numbers, but those would be rejected at a later stage by the Rust + /// compiler). + pub fn value<N: FromIntegerLiteral>(&self) -> Option<N> { + let base = N::from_small_number(self.base.value()); + + let mut acc = N::from_small_number(0); + for digit in self.raw_main_part().bytes() { + if digit == b'_' { + continue; + } + + // We don't actually need the base here: we already know this main + // part only contains digits valid for the specified base. + let digit = hex_digit_value(digit) + .unwrap_or_else(|| unreachable!("bug: integer main part contains non-digit")); + + acc = acc.checked_mul(base)?; + acc = acc.checked_add(N::from_small_number(digit))?; + } + + Some(acc) + } + + /// The base of this integer literal. + pub fn base(&self) -> IntegerBase { + self.base + } + + /// The main part containing the digits and potentially `_`. Do not try to + /// parse this directly as that would ignore the base! + pub fn raw_main_part(&self) -> &str { + &(*self.raw)[self.start_main_part..self.end_main_part] + } + + /// The optional suffix. Returns `""` if the suffix is empty/does not exist. + /// + /// If you want the type, try `IntegerType::from_suffix(lit.suffix())`. + pub fn suffix(&self) -> &str { + &(*self.raw)[self.end_main_part..] + } + + /// Returns the raw input that was passed to `parse`. + pub fn raw_input(&self) -> &str { + &self.raw + } + + /// Returns the raw input that was passed to `parse`, potentially owned. + pub fn into_raw_input(self) -> B { + self.raw + } +} + +impl IntegerLit<&str> { + /// Makes a copy of the underlying buffer and returns the owned version of + /// `Self`. + pub fn to_owned(&self) -> IntegerLit<String> { + IntegerLit { + raw: self.raw.to_owned(), + start_main_part: self.start_main_part, + end_main_part: self.end_main_part, + base: self.base, + } + } +} + +impl<B: Buffer> fmt::Display for IntegerLit<B> { + fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { + write!(f, "{}", &*self.raw) + } +} + +/// Integer literal types. *Implementation detail*. +/// +/// Implemented for all integer literal types. This trait is sealed and cannot +/// be implemented outside of this crate. The trait's methods are implementation +/// detail of this library and are not subject to semver. +pub trait FromIntegerLiteral: self::sealed::Sealed + Copy { + /// Creates itself from the given number. `n` is guaranteed to be `<= 16`. + #[doc(hidden)] + fn from_small_number(n: u8) -> Self; + + #[doc(hidden)] + fn checked_add(self, rhs: Self) -> Option<Self>; + + #[doc(hidden)] + fn checked_mul(self, rhs: Self) -> Option<Self>; + + #[doc(hidden)] + fn ty() -> IntegerType; +} + +macro_rules! impl_from_int_literal { + ($( $ty:ty => $variant:ident ,)* ) => { + $( + impl self::sealed::Sealed for $ty {} + impl FromIntegerLiteral for $ty { + fn from_small_number(n: u8) -> Self { + n as Self + } + fn checked_add(self, rhs: Self) -> Option<Self> { + self.checked_add(rhs) + } + fn checked_mul(self, rhs: Self) -> Option<Self> { + self.checked_mul(rhs) + } + fn ty() -> IntegerType { + IntegerType::$variant + } + } + )* + }; +} + +impl_from_int_literal!( + u8 => U8, u16 => U16, u32 => U32, u64 => U64, u128 => U128, usize => Usize, + i8 => I8, i16 => I16, i32 => I32, i64 => I64, i128 => I128, isize => Isize, +); + +mod sealed { + pub trait Sealed {} +} + +/// Precondition: first byte of string has to be in `b'0'..=b'9'`. +#[inline(never)] +pub(crate) fn parse_impl(input: &str, first: u8) -> Result<IntegerLit<&str>, ParseError> { + // Figure out base and strip prefix base, if it exists. + let (end_prefix, base) = match (first, input.as_bytes().get(1)) { + (b'0', Some(b'b')) => (2, IntegerBase::Binary), + (b'0', Some(b'o')) => (2, IntegerBase::Octal), + (b'0', Some(b'x')) => (2, IntegerBase::Hexadecimal), + + // Everything else is treated as decimal. Several cases are caught + // by this: + // - "123" + // - "0" + // - "0u8" + // - "0r" -> this will error later + _ => (0, IntegerBase::Decimal), + }; + let without_prefix = &input[end_prefix..]; + + + // Scan input to find the first character that's not a valid digit. + let is_valid_digit = match base { + IntegerBase::Binary => |b| matches!(b, b'0' | b'1' | b'_'), + IntegerBase::Octal => |b| matches!(b, b'0'..=b'7' | b'_'), + IntegerBase::Decimal => |b| matches!(b, b'0'..=b'9' | b'_'), + IntegerBase::Hexadecimal => |b| matches!(b, b'0'..=b'9' | b'a'..=b'f' | b'A'..=b'F' | b'_'), + }; + let end_main = without_prefix.bytes() + .position(|b| !is_valid_digit(b)) + .unwrap_or(without_prefix.len()); + let (main_part, suffix) = without_prefix.split_at(end_main); + + check_suffix(suffix).map_err(|kind| { + // This is just to have a nicer error kind for this special case. If the + // suffix is invalid, it is non-empty -> unwrap ok. + let first = suffix.as_bytes()[0]; + if !is_valid_digit(first) && first.is_ascii_digit() { + perr(end_main + end_prefix, InvalidDigit) + } else { + perr(end_main + end_prefix..input.len(), kind) + } + })?; + if suffix.starts_with('e') || suffix.starts_with('E') { + return Err(perr(end_main, IntegerSuffixStartingWithE)); + } + + // Make sure main number part is not empty. + if main_part.bytes().filter(|&b| b != b'_').count() == 0 { + return Err(perr(end_prefix..end_prefix + end_main, NoDigits)); + } + + Ok(IntegerLit { + raw: input, + start_main_part: end_prefix, + end_main_part: end_main + end_prefix, + base, + }) +} + + +/// The bases in which an integer can be specified. +#[derive(Debug, Clone, Copy, PartialEq, Eq)] +pub enum IntegerBase { + Binary, + Octal, + Decimal, + Hexadecimal, +} + +impl IntegerBase { + /// Returns the literal prefix that indicates this base, i.e. `"0b"`, + /// `"0o"`, `""` and `"0x"`. + pub fn prefix(self) -> &'static str { + match self { + Self::Binary => "0b", + Self::Octal => "0o", + Self::Decimal => "", + Self::Hexadecimal => "0x", + } + } + + /// Returns the base value, i.e. 2, 8, 10 or 16. + pub fn value(self) -> u8 { + match self { + Self::Binary => 2, + Self::Octal => 8, + Self::Decimal => 10, + Self::Hexadecimal => 16, + } + } +} + +/// All possible integer type suffixes. +#[derive(Debug, Clone, Copy, PartialEq, Eq)] +#[non_exhaustive] +pub enum IntegerType { + U8, + U16, + U32, + U64, + U128, + Usize, + I8, + I16, + I32, + I64, + I128, + Isize, +} + +impl IntegerType { + /// Returns the type corresponding to the given suffix (e.g. `"u8"` is + /// mapped to `Self::U8`). If the suffix is not a valid integer type, + /// `None` is returned. + pub fn from_suffix(suffix: &str) -> Option<Self> { + match suffix { + "u8" => Some(Self::U8), + "u16" => Some(Self::U16), + "u32" => Some(Self::U32), + "u64" => Some(Self::U64), + "u128" => Some(Self::U128), + "usize" => Some(Self::Usize), + "i8" => Some(Self::I8), + "i16" => Some(Self::I16), + "i32" => Some(Self::I32), + "i64" => Some(Self::I64), + "i128" => Some(Self::I128), + "isize" => Some(Self::Isize), + _ => None, + } + } + + /// Returns the suffix for this type, e.g. `"u8"` for `Self::U8`. + pub fn suffix(self) -> &'static str { + match self { + Self::U8 => "u8", + Self::U16 => "u16", + Self::U32 => "u32", + Self::U64 => "u64", + Self::U128 => "u128", + Self::Usize => "usize", + Self::I8 => "i8", + Self::I16 => "i16", + Self::I32 => "i32", + Self::I64 => "i64", + Self::I128 => "i128", + Self::Isize => "isize", + } + } +} + +impl FromStr for IntegerType { + type Err = (); + fn from_str(s: &str) -> Result<Self, Self::Err> { + Self::from_suffix(s).ok_or(()) + } +} + +impl fmt::Display for IntegerType { + fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { + self.suffix().fmt(f) + } +} + + +#[cfg(test)] +mod tests; |