From fbaf0bb26397aa498eb9156f06d5a6fe34dd7dd8 Mon Sep 17 00:00:00 2001 From: Daniel Baumann Date: Fri, 19 Apr 2024 03:14:29 +0200 Subject: Merging upstream version 125.0.1. Signed-off-by: Daniel Baumann --- third_party/rust/litrs/src/float/mod.rs | 257 ++++++++++++++++++++++++++++++ third_party/rust/litrs/src/float/tests.rs | 253 +++++++++++++++++++++++++++++ 2 files changed, 510 insertions(+) create mode 100644 third_party/rust/litrs/src/float/mod.rs create mode 100644 third_party/rust/litrs/src/float/tests.rs (limited to 'third_party/rust/litrs/src/float') diff --git a/third_party/rust/litrs/src/float/mod.rs b/third_party/rust/litrs/src/float/mod.rs new file mode 100644 index 0000000000..0518633a6b --- /dev/null +++ b/third_party/rust/litrs/src/float/mod.rs @@ -0,0 +1,257 @@ +use std::{fmt, str::FromStr}; + +use crate::{ + Buffer, ParseError, + err::{perr, ParseErrorKind::*}, + parse::{end_dec_digits, first_byte_or_empty, check_suffix}, +}; + + + +/// A floating point literal, e.g. `3.14`, `8.`, `135e12`, or `1.956e2f64`. +/// +/// This kind of literal has several forms, but generally consists of a main +/// number part, an optional exponent and an optional type suffix. See +/// [the reference][ref] for more information. +/// +/// A leading minus sign `-` is not part of the literal grammar! `-3.14` are two +/// tokens in the Rust grammar. Further, `27` and `27f32` are both not float, +/// but integer literals! Consequently `FloatLit::parse` will reject them. +/// +/// +/// [ref]: https://doc.rust-lang.org/reference/tokens.html#floating-point-literals +#[derive(Debug, Clone, Copy, PartialEq, Eq)] +pub struct FloatLit { + /// The whole raw input. The `usize` fields in this struct partition this + /// string. Always true: `end_integer_part <= end_fractional_part`. + /// + /// ```text + /// 12_3.4_56e789f32 + /// ╷ ╷ ╷ + /// | | └ end_number_part = 13 + /// | └ end_fractional_part = 9 + /// └ end_integer_part = 4 + /// + /// 246. + /// ╷╷ + /// |└ end_fractional_part = end_number_part = 4 + /// └ end_integer_part = 3 + /// + /// 1234e89 + /// ╷ ╷ + /// | └ end_number_part = 7 + /// └ end_integer_part = end_fractional_part = 4 + /// ``` + raw: B, + + /// The first index not part of the integer part anymore. Since the integer + /// part is at the start, this is also the length of that part. + end_integer_part: usize, + + /// The first index after the fractional part. + end_fractional_part: usize, + + /// The first index after the whole number part (everything except type suffix). + end_number_part: usize, +} + +impl FloatLit { + /// Parses the input as a floating point literal. Returns an error if the + /// input is invalid or represents a different kind of literal. Will also + /// reject decimal integer literals like `23` or `17f32`, in accordance + /// with the spec. + pub fn parse(s: B) -> Result { + match first_byte_or_empty(&s)? { + b'0'..=b'9' => { + // TODO: simplify once RFC 2528 is stabilized + let FloatLit { + end_integer_part, + end_fractional_part, + end_number_part, + .. + } = parse_impl(&s)?; + + Ok(Self { raw: s, end_integer_part, end_fractional_part, end_number_part }) + }, + _ => Err(perr(0, DoesNotStartWithDigit)), + } + } + + /// Returns the number part (including integer part, fractional part and + /// exponent), but without the suffix. If you want an actual floating + /// point value, you need to parse this string, e.g. with `f32::from_str` + /// or an external crate. + pub fn number_part(&self) -> &str { + &(*self.raw)[..self.end_number_part] + } + + /// Returns the non-empty integer part of this literal. + pub fn integer_part(&self) -> &str { + &(*self.raw)[..self.end_integer_part] + } + + /// Returns the optional fractional part of this literal. Does not include + /// the period. If a period exists in the input, `Some` is returned, `None` + /// otherwise. Note that `Some("")` might be returned, e.g. for `3.`. + pub fn fractional_part(&self) -> Option<&str> { + if self.end_integer_part == self.end_fractional_part { + None + } else { + Some(&(*self.raw)[self.end_integer_part + 1..self.end_fractional_part]) + } + } + + /// Optional exponent part. Might be empty if there was no exponent part in + /// the input. Includes the `e` or `E` at the beginning. + pub fn exponent_part(&self) -> &str { + &(*self.raw)[self.end_fractional_part..self.end_number_part] + } + + /// The optional suffix. Returns `""` if the suffix is empty/does not exist. + pub fn suffix(&self) -> &str { + &(*self.raw)[self.end_number_part..] + } + + /// Returns the raw input that was passed to `parse`. + pub fn raw_input(&self) -> &str { + &self.raw + } + + /// Returns the raw input that was passed to `parse`, potentially owned. + pub fn into_raw_input(self) -> B { + self.raw + } +} + +impl FloatLit<&str> { + /// Makes a copy of the underlying buffer and returns the owned version of + /// `Self`. + pub fn to_owned(&self) -> FloatLit { + FloatLit { + raw: self.raw.to_owned(), + end_integer_part: self.end_integer_part, + end_fractional_part: self.end_fractional_part, + end_number_part: self.end_number_part, + } + } +} + +impl fmt::Display for FloatLit { + fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { + write!(f, "{}", &*self.raw) + } +} + +/// Precondition: first byte of string has to be in `b'0'..=b'9'`. +#[inline(never)] +pub(crate) fn parse_impl(input: &str) -> Result, ParseError> { + // Integer part. + let end_integer_part = end_dec_digits(input.as_bytes()); + let rest = &input[end_integer_part..]; + + + // Fractional part. + let end_fractional_part = if rest.as_bytes().get(0) == Some(&b'.') { + // The fractional part must not start with `_`. + if rest.as_bytes().get(1) == Some(&b'_') { + return Err(perr(end_integer_part + 1, UnexpectedChar)); + } + + end_dec_digits(rest[1..].as_bytes()) + 1 + end_integer_part + } else { + end_integer_part + }; + let rest = &input[end_fractional_part..]; + + // If we have a period that is not followed by decimal digits, the + // literal must end now. + if end_integer_part + 1 == end_fractional_part && !rest.is_empty() { + return Err(perr(end_integer_part + 1, UnexpectedChar)); + } + + // Optional exponent. + let end_number_part = if rest.starts_with('e') || rest.starts_with('E') { + // Strip single - or + sign at the beginning. + let exp_number_start = match rest.as_bytes().get(1) { + Some(b'-') | Some(b'+') => 2, + _ => 1, + }; + + // Find end of exponent and make sure there is at least one digit. + let end_exponent = end_dec_digits(rest[exp_number_start..].as_bytes()) + exp_number_start; + if !rest[exp_number_start..end_exponent].bytes().any(|b| matches!(b, b'0'..=b'9')) { + return Err(perr( + end_fractional_part..end_fractional_part + end_exponent, + NoExponentDigits, + )); + } + + end_exponent + end_fractional_part + } else { + end_fractional_part + }; + + // Make sure the suffix is valid. + let suffix = &input[end_number_part..]; + check_suffix(suffix).map_err(|kind| perr(end_number_part..input.len(), kind))?; + + // A float literal needs either a fractional or exponent part, otherwise its + // an integer literal. + if end_integer_part == end_number_part { + return Err(perr(None, UnexpectedIntegerLit)); + } + + Ok(FloatLit { + raw: input, + end_integer_part, + end_fractional_part, + end_number_part, + }) +} + + +/// All possible float type suffixes. +#[derive(Debug, Clone, Copy, PartialEq, Eq)] +#[non_exhaustive] +pub enum FloatType { + F32, + F64, +} + +impl FloatType { + /// Returns the type corresponding to the given suffix (e.g. `"f32"` is + /// mapped to `Self::F32`). If the suffix is not a valid float type, `None` + /// is returned. + pub fn from_suffix(suffix: &str) -> Option { + match suffix { + "f32" => Some(FloatType::F32), + "f64" => Some(FloatType::F64), + _ => None, + } + } + + /// Returns the suffix for this type, e.g. `"f32"` for `Self::F32`. + pub fn suffix(self) -> &'static str { + match self { + Self::F32 => "f32", + Self::F64 => "f64", + } + } +} + +impl FromStr for FloatType { + type Err = (); + fn from_str(s: &str) -> Result { + Self::from_suffix(s).ok_or(()) + } +} + +impl fmt::Display for FloatType { + fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { + self.suffix().fmt(f) + } +} + + +#[cfg(test)] +mod tests; diff --git a/third_party/rust/litrs/src/float/tests.rs b/third_party/rust/litrs/src/float/tests.rs new file mode 100644 index 0000000000..f22443bd19 --- /dev/null +++ b/third_party/rust/litrs/src/float/tests.rs @@ -0,0 +1,253 @@ +use crate::{ + Literal, ParseError, + test_util::{assert_parse_ok_eq, assert_roundtrip}, +}; +use super::{FloatLit, FloatType}; + + +// ===== Utility functions ======================================================================= + +/// Helper macro to check parsing a float. +/// +/// This macro contains quite a bit of logic itself (which can be buggy of +/// course), so we have a few test functions below to test a bunch of cases +/// manually. +macro_rules! check { + ($intpart:literal $fracpart:literal $exppart:literal $suffix:tt) => { + let input = concat!($intpart, $fracpart, $exppart, check!(@stringify_suffix $suffix)); + let expected_float = FloatLit { + raw: input, + end_integer_part: $intpart.len(), + end_fractional_part: $intpart.len() + $fracpart.len(), + end_number_part: $intpart.len() + $fracpart.len() + $exppart.len(), + }; + + assert_parse_ok_eq( + input, FloatLit::parse(input), expected_float.clone(), "FloatLit::parse"); + assert_parse_ok_eq( + input, Literal::parse(input), Literal::Float(expected_float), "Literal::parse"); + assert_eq!(FloatLit::parse(input).unwrap().suffix(), check!(@ty $suffix)); + assert_roundtrip(expected_float.to_owned(), input); + }; + (@ty f32) => { "f32" }; + (@ty f64) => { "f64" }; + (@ty -) => { "" }; + (@stringify_suffix -) => { "" }; + (@stringify_suffix $suffix:ident) => { stringify!($suffix) }; +} + + +// ===== Actual tests =========================================================================== + +#[test] +fn manual_without_suffix() -> Result<(), ParseError> { + let f = FloatLit::parse("3.14")?; + assert_eq!(f.number_part(), "3.14"); + assert_eq!(f.integer_part(), "3"); + assert_eq!(f.fractional_part(), Some("14")); + assert_eq!(f.exponent_part(), ""); + assert_eq!(f.suffix(), ""); + + let f = FloatLit::parse("9.")?; + assert_eq!(f.number_part(), "9."); + assert_eq!(f.integer_part(), "9"); + assert_eq!(f.fractional_part(), Some("")); + assert_eq!(f.exponent_part(), ""); + assert_eq!(f.suffix(), ""); + + let f = FloatLit::parse("8e1")?; + assert_eq!(f.number_part(), "8e1"); + assert_eq!(f.integer_part(), "8"); + assert_eq!(f.fractional_part(), None); + assert_eq!(f.exponent_part(), "e1"); + assert_eq!(f.suffix(), ""); + + let f = FloatLit::parse("8E3")?; + assert_eq!(f.number_part(), "8E3"); + assert_eq!(f.integer_part(), "8"); + assert_eq!(f.fractional_part(), None); + assert_eq!(f.exponent_part(), "E3"); + assert_eq!(f.suffix(), ""); + + let f = FloatLit::parse("8_7_6.1_23e15")?; + assert_eq!(f.number_part(), "8_7_6.1_23e15"); + assert_eq!(f.integer_part(), "8_7_6"); + assert_eq!(f.fractional_part(), Some("1_23")); + assert_eq!(f.exponent_part(), "e15"); + assert_eq!(f.suffix(), ""); + + let f = FloatLit::parse("8.2e-_04_9")?; + assert_eq!(f.number_part(), "8.2e-_04_9"); + assert_eq!(f.integer_part(), "8"); + assert_eq!(f.fractional_part(), Some("2")); + assert_eq!(f.exponent_part(), "e-_04_9"); + assert_eq!(f.suffix(), ""); + + Ok(()) +} + +#[test] +fn manual_with_suffix() -> Result<(), ParseError> { + let f = FloatLit::parse("3.14f32")?; + assert_eq!(f.number_part(), "3.14"); + assert_eq!(f.integer_part(), "3"); + assert_eq!(f.fractional_part(), Some("14")); + assert_eq!(f.exponent_part(), ""); + assert_eq!(FloatType::from_suffix(f.suffix()), Some(FloatType::F32)); + + let f = FloatLit::parse("8e1f64")?; + assert_eq!(f.number_part(), "8e1"); + assert_eq!(f.integer_part(), "8"); + assert_eq!(f.fractional_part(), None); + assert_eq!(f.exponent_part(), "e1"); + assert_eq!(FloatType::from_suffix(f.suffix()), Some(FloatType::F64)); + + let f = FloatLit::parse("8_7_6.1_23e15f32")?; + assert_eq!(f.number_part(), "8_7_6.1_23e15"); + assert_eq!(f.integer_part(), "8_7_6"); + assert_eq!(f.fractional_part(), Some("1_23")); + assert_eq!(f.exponent_part(), "e15"); + assert_eq!(FloatType::from_suffix(f.suffix()), Some(FloatType::F32)); + + let f = FloatLit::parse("8.2e-_04_9f64")?; + assert_eq!(f.number_part(), "8.2e-_04_9"); + assert_eq!(f.integer_part(), "8"); + assert_eq!(f.fractional_part(), Some("2")); + assert_eq!(f.exponent_part(), "e-_04_9"); + assert_eq!(FloatType::from_suffix(f.suffix()), Some(FloatType::F64)); + + Ok(()) +} + +#[test] +fn simple() { + check!("3" ".14" "" -); + check!("3" ".14" "" f32); + check!("3" ".14" "" f64); + + check!("3" "" "e987654321" -); + check!("3" "" "e987654321" f64); + + check!("42_888" ".05" "" -); + check!("42_888" ".05" "E5___" f32); + check!("123456789" "" "e_1" f64); + check!("123456789" ".99" "e_1" f64); + check!("123456789" ".99" "" f64); + check!("123456789" ".99" "" -); + + check!("147" ".3_33" "" -); + check!("147" ".3_33__" "E3" f64); + check!("147" ".3_33__" "" f32); + + check!("147" ".333" "e-10" -); + check!("147" ".333" "e-_7" f32); + check!("147" ".333" "e+10" -); + check!("147" ".333" "e+_7" f32); + + check!("86" "." "" -); + check!("0" "." "" -); + check!("0_" "." "" -); + check!("0" ".0000001" "" -); + check!("0" ".000_0001" "" -); + + check!("0" ".0" "e+0" -); + check!("0" "" "E+0" -); + check!("34" "" "e+0" -); + check!("0" ".9182" "E+0" f32); +} + +#[test] +fn non_standard_suffixes() { + #[track_caller] + fn check_suffix( + input: &str, + integer_part: &str, + fractional_part: Option<&str>, + exponent_part: &str, + suffix: &str, + ) { + let lit = FloatLit::parse(input) + .unwrap_or_else(|e| panic!("expected to parse '{}' but got {}", input, e)); + assert_eq!(lit.integer_part(), integer_part); + assert_eq!(lit.fractional_part(), fractional_part); + assert_eq!(lit.exponent_part(), exponent_part); + assert_eq!(lit.suffix(), suffix); + + let lit = match Literal::parse(input) { + Ok(Literal::Float(f)) => f, + other => panic!("Expected float literal, but got {:?} for '{}'", other, input), + }; + assert_eq!(lit.integer_part(), integer_part); + assert_eq!(lit.fractional_part(), fractional_part); + assert_eq!(lit.exponent_part(), exponent_part); + assert_eq!(lit.suffix(), suffix); + } + + check_suffix("7.1f23", "7", Some("1"), "", "f23"); + check_suffix("7.1f320", "7", Some("1"), "", "f320"); + check_suffix("7.1f64_", "7", Some("1"), "", "f64_"); + check_suffix("8.1f649", "8", Some("1"), "", "f649"); + check_suffix("8.1f64f32", "8", Some("1"), "", "f64f32"); + check_suffix("23e2_banana", "23", None, "e2_", "banana"); + check_suffix("23.2_banana", "23", Some("2_"), "", "banana"); + check_suffix("23e2pe55ter", "23", None, "e2", "pe55ter"); + check_suffix("23e2p_e55ter", "23", None, "e2", "p_e55ter"); + check_suffix("3.15Jürgen", "3", Some("15"), "", "Jürgen"); + check_suffix("3e2e5", "3", None, "e2", "e5"); + check_suffix("3e2e5f", "3", None, "e2", "e5f"); +} + +#[test] +fn parse_err() { + assert_err!(FloatLit, "", Empty, None); + assert_err_single!(FloatLit::parse("."), DoesNotStartWithDigit, 0); + assert_err_single!(FloatLit::parse("+"), DoesNotStartWithDigit, 0); + assert_err_single!(FloatLit::parse("-"), DoesNotStartWithDigit, 0); + assert_err_single!(FloatLit::parse("e"), DoesNotStartWithDigit, 0); + assert_err_single!(FloatLit::parse("e8"), DoesNotStartWithDigit, 0); + assert_err!(FloatLit, "0e", NoExponentDigits, 1..2); + assert_err_single!(FloatLit::parse("f32"), DoesNotStartWithDigit, 0); + assert_err_single!(FloatLit::parse("foo"), DoesNotStartWithDigit, 0); + + assert_err_single!(FloatLit::parse("inf"), DoesNotStartWithDigit, 0); + assert_err_single!(FloatLit::parse("nan"), DoesNotStartWithDigit, 0); + assert_err_single!(FloatLit::parse("NaN"), DoesNotStartWithDigit, 0); + assert_err_single!(FloatLit::parse("NAN"), DoesNotStartWithDigit, 0); + + assert_err_single!(FloatLit::parse("_2.7"), DoesNotStartWithDigit, 0); + assert_err_single!(FloatLit::parse(".5"), DoesNotStartWithDigit, 0); + assert_err!(FloatLit, "1e", NoExponentDigits, 1..2); + assert_err!(FloatLit, "1.e4", UnexpectedChar, 2); + assert_err!(FloatLit, "3._4", UnexpectedChar, 2); + assert_err!(FloatLit, "3.f32", UnexpectedChar, 2); + assert_err!(FloatLit, "3.e5", UnexpectedChar, 2); + assert_err!(FloatLit, "12345._987", UnexpectedChar, 6); + assert_err!(FloatLit, "46._", UnexpectedChar, 3); + assert_err!(FloatLit, "46.f32", UnexpectedChar, 3); + assert_err!(FloatLit, "46.e3", UnexpectedChar, 3); + assert_err!(FloatLit, "46._e3", UnexpectedChar, 3); + assert_err!(FloatLit, "46.e3f64", UnexpectedChar, 3); + assert_err!(FloatLit, "23.4e_", NoExponentDigits, 4..6); + assert_err!(FloatLit, "23E___f32", NoExponentDigits, 2..6); + assert_err!(FloatLit, "55e3.1", UnexpectedChar, 4..6); + + assert_err!(FloatLit, "3.7+", UnexpectedChar, 3..4); + assert_err!(FloatLit, "3.7+2", UnexpectedChar, 3..5); + assert_err!(FloatLit, "3.7-", UnexpectedChar, 3..4); + assert_err!(FloatLit, "3.7-2", UnexpectedChar, 3..5); + assert_err!(FloatLit, "3.7e+", NoExponentDigits, 3..5); + assert_err!(FloatLit, "3.7e-", NoExponentDigits, 3..5); + assert_err!(FloatLit, "3.7e-+3", NoExponentDigits, 3..5); // suboptimal error + assert_err!(FloatLit, "3.7e+-3", NoExponentDigits, 3..5); // suboptimal error + assert_err_single!(FloatLit::parse("0x44.5"), InvalidSuffix, 1..6); + + assert_err_single!(FloatLit::parse("3"), UnexpectedIntegerLit, None); + assert_err_single!(FloatLit::parse("35_389"), UnexpectedIntegerLit, None); + assert_err_single!(FloatLit::parse("9_8_7f32"), UnexpectedIntegerLit, None); + assert_err_single!(FloatLit::parse("9_8_7banana"), UnexpectedIntegerLit, None); + assert_err_single!(FloatLit::parse("7f23"), UnexpectedIntegerLit, None); + assert_err_single!(FloatLit::parse("7f320"), UnexpectedIntegerLit, None); + assert_err_single!(FloatLit::parse("7f64_"), UnexpectedIntegerLit, None); + assert_err_single!(FloatLit::parse("8f649"), UnexpectedIntegerLit, None); + assert_err_single!(FloatLit::parse("8f64f32"), UnexpectedIntegerLit, None); +} -- cgit v1.2.3