From 631cd5845e8de329d0e227aaa707d7ea228b8f8f Mon Sep 17 00:00:00 2001 From: Daniel Baumann Date: Wed, 17 Apr 2024 14:20:29 +0200 Subject: Merging upstream version 1.70.0+dfsg1. Signed-off-by: Daniel Baumann --- library/core/src/num/dec2flt/parse.rs | 224 ++++++++++++++++++---------------- 1 file changed, 117 insertions(+), 107 deletions(-) (limited to 'library/core/src/num/dec2flt/parse.rs') diff --git a/library/core/src/num/dec2flt/parse.rs b/library/core/src/num/dec2flt/parse.rs index 1a90e0d20..b0a23835c 100644 --- a/library/core/src/num/dec2flt/parse.rs +++ b/library/core/src/num/dec2flt/parse.rs @@ -1,6 +1,6 @@ //! Functions to parse floating-point numbers. -use crate::num::dec2flt::common::{is_8digits, AsciiStr, ByteSlice}; +use crate::num::dec2flt::common::{is_8digits, ByteSlice}; use crate::num::dec2flt::float::RawFloat; use crate::num::dec2flt::number::Number; @@ -26,24 +26,39 @@ fn parse_8digits(mut v: u64) -> u64 { } /// Parse digits until a non-digit character is found. -fn try_parse_digits(s: &mut AsciiStr<'_>, x: &mut u64) { +fn try_parse_digits(mut s: &[u8], mut x: u64) -> (&[u8], u64) { // may cause overflows, to be handled later - s.parse_digits(|digit| { - *x = x.wrapping_mul(10).wrapping_add(digit as _); + + while s.len() >= 8 { + let num = s.read_u64(); + if is_8digits(num) { + x = x.wrapping_mul(1_0000_0000).wrapping_add(parse_8digits(num)); + s = &s[8..]; + } else { + break; + } + } + + s = s.parse_digits(|digit| { + x = x.wrapping_mul(10).wrapping_add(digit as _); }); + + (s, x) } /// Parse up to 19 digits (the max that can be stored in a 64-bit integer). -fn try_parse_19digits(s: &mut AsciiStr<'_>, x: &mut u64) { +fn try_parse_19digits(s_ref: &mut &[u8], x: &mut u64) { + let mut s = *s_ref; + while *x < MIN_19DIGIT_INT { - if let Some(&c) = s.as_ref().first() { + // FIXME: Can't use s.split_first() here yet, + // see https://github.com/rust-lang/rust/issues/109328 + if let [c, s_next @ ..] = s { let digit = c.wrapping_sub(b'0'); + if digit < 10 { *x = (*x * 10) + digit as u64; // no overflows here - // SAFETY: cannot be empty - unsafe { - s.step(); - } + s = s_next; } else { break; } @@ -51,46 +66,26 @@ fn try_parse_19digits(s: &mut AsciiStr<'_>, x: &mut u64) { break; } } -} -/// Try to parse 8 digits at a time, using an optimized algorithm. -fn try_parse_8digits(s: &mut AsciiStr<'_>, x: &mut u64) { - // may cause overflows, to be handled later - if let Some(v) = s.read_u64() { - if is_8digits(v) { - *x = x.wrapping_mul(1_0000_0000).wrapping_add(parse_8digits(v)); - // SAFETY: already ensured the buffer was >= 8 bytes in read_u64. - unsafe { - s.step_by(8); - } - if let Some(v) = s.read_u64() { - if is_8digits(v) { - *x = x.wrapping_mul(1_0000_0000).wrapping_add(parse_8digits(v)); - // SAFETY: already ensured the buffer was >= 8 bytes in try_read_u64. - unsafe { - s.step_by(8); - } - } - } - } - } + *s_ref = s; } /// Parse the scientific notation component of a float. -fn parse_scientific(s: &mut AsciiStr<'_>) -> Option { - let mut exponent = 0_i64; +fn parse_scientific(s_ref: &mut &[u8]) -> Option { + let mut exponent = 0i64; let mut negative = false; - if let Some(&c) = s.as_ref().get(0) { + + let mut s = *s_ref; + + if let Some((&c, s_next)) = s.split_first() { negative = c == b'-'; if c == b'-' || c == b'+' { - // SAFETY: s cannot be empty - unsafe { - s.step(); - } + s = s_next; } } - if s.first_isdigit() { - s.parse_digits(|digit| { + + if matches!(s.first(), Some(&x) if x.is_ascii_digit()) { + *s_ref = s.parse_digits(|digit| { // no overflows here, saturate well before overflow if exponent < 0x10000 { exponent = 10 * exponent + digit as i64; @@ -98,6 +93,7 @@ fn parse_scientific(s: &mut AsciiStr<'_>) -> Option { }); if negative { Some(-exponent) } else { Some(exponent) } } else { + *s_ref = s; None } } @@ -106,28 +102,29 @@ fn parse_scientific(s: &mut AsciiStr<'_>) -> Option { /// /// This creates a representation of the float as the /// significant digits and the decimal exponent. -fn parse_partial_number(s: &[u8], negative: bool) -> Option<(Number, usize)> { - let mut s = AsciiStr::new(s); - let start = s; +fn parse_partial_number(mut s: &[u8]) -> Option<(Number, usize)> { debug_assert!(!s.is_empty()); // parse initial digits before dot let mut mantissa = 0_u64; - let digits_start = s; - try_parse_digits(&mut s, &mut mantissa); - let mut n_digits = s.offset_from(&digits_start); + let start = s; + let tmp = try_parse_digits(s, mantissa); + s = tmp.0; + mantissa = tmp.1; + let mut n_digits = s.offset_from(start); // handle dot with the following digits let mut n_after_dot = 0; let mut exponent = 0_i64; let int_end = s; - if s.first_is(b'.') { - // SAFETY: s cannot be empty due to first_is - unsafe { s.step() }; + + if let Some((&b'.', s_next)) = s.split_first() { + s = s_next; let before = s; - try_parse_8digits(&mut s, &mut mantissa); - try_parse_digits(&mut s, &mut mantissa); - n_after_dot = s.offset_from(&before); + let tmp = try_parse_digits(s, mantissa); + s = tmp.0; + mantissa = tmp.1; + n_after_dot = s.offset_from(before); exponent = -n_after_dot as i64; } @@ -138,65 +135,60 @@ fn parse_partial_number(s: &[u8], negative: bool) -> Option<(Number, usize)> { // handle scientific format let mut exp_number = 0_i64; - if s.first_is2(b'e', b'E') { - // SAFETY: s cannot be empty - unsafe { - s.step(); + if let Some((&c, s_next)) = s.split_first() { + if c == b'e' || c == b'E' { + s = s_next; + // If None, we have no trailing digits after exponent, or an invalid float. + exp_number = parse_scientific(&mut s)?; + exponent += exp_number; } - // If None, we have no trailing digits after exponent, or an invalid float. - exp_number = parse_scientific(&mut s)?; - exponent += exp_number; } - let len = s.offset_from(&start) as _; + let len = s.offset_from(start) as _; // handle uncommon case with many digits if n_digits <= 19 { - return Some((Number { exponent, mantissa, negative, many_digits: false }, len)); + return Some((Number { exponent, mantissa, negative: false, many_digits: false }, len)); } n_digits -= 19; let mut many_digits = false; - let mut p = digits_start; - while p.first_is2(b'0', b'.') { - // SAFETY: p cannot be empty due to first_is2 - unsafe { - // '0' = b'.' + 2 - n_digits -= p.first_unchecked().saturating_sub(b'0' - 1) as isize; - p.step(); + let mut p = start; + while let Some((&c, p_next)) = p.split_first() { + if c == b'.' || c == b'0' { + n_digits -= c.saturating_sub(b'0' - 1) as isize; + p = p_next; + } else { + break; } } if n_digits > 0 { // at this point we have more than 19 significant digits, let's try again many_digits = true; mantissa = 0; - let mut s = digits_start; + let mut s = start; try_parse_19digits(&mut s, &mut mantissa); exponent = if mantissa >= MIN_19DIGIT_INT { // big int - int_end.offset_from(&s) + int_end.offset_from(s) } else { - // SAFETY: the next byte must be present and be '.' - // We know this is true because we had more than 19 - // digits previously, so we overflowed a 64-bit integer, - // but parsing only the integral digits produced less - // than 19 digits. That means we must have a decimal - // point, and at least 1 fractional digit. - unsafe { s.step() }; + s = &s[1..]; let before = s; try_parse_19digits(&mut s, &mut mantissa); - -s.offset_from(&before) + -s.offset_from(before) } as i64; // add back the explicit part exponent += exp_number; } - Some((Number { exponent, mantissa, negative, many_digits }, len)) + Some((Number { exponent, mantissa, negative: false, many_digits }, len)) } -/// Try to parse a non-special floating point number. -pub fn parse_number(s: &[u8], negative: bool) -> Option { - if let Some((float, rest)) = parse_partial_number(s, negative) { +/// Try to parse a non-special floating point number, +/// as well as two slices with integer and fractional parts +/// and the parsed exponent. +pub fn parse_number(s: &[u8]) -> Option { + if let Some((float, rest)) = parse_partial_number(s) { if rest == s.len() { return Some(float); } @@ -204,30 +196,48 @@ pub fn parse_number(s: &[u8], negative: bool) -> Option { None } -/// Parse a partial representation of a special, non-finite float. -fn parse_partial_inf_nan(s: &[u8]) -> Option<(F, usize)> { - fn parse_inf_rest(s: &[u8]) -> usize { - if s.len() >= 8 && s[3..].as_ref().starts_with_ignore_case(b"inity") { 8 } else { 3 } - } - if s.len() >= 3 { - if s.starts_with_ignore_case(b"nan") { - return Some((F::NAN, 3)); - } else if s.starts_with_ignore_case(b"inf") { - return Some((F::INFINITY, parse_inf_rest(s))); - } - } - None -} - /// Try to parse a special, non-finite float. -pub fn parse_inf_nan(s: &[u8], negative: bool) -> Option { - if let Some((mut float, rest)) = parse_partial_inf_nan::(s) { - if rest == s.len() { - if negative { - float = -float; - } - return Some(float); - } +pub(crate) fn parse_inf_nan(s: &[u8], negative: bool) -> Option { + // Since a valid string has at most the length 8, we can load + // all relevant characters into a u64 and work from there. + // This also generates much better code. + + let mut register; + let len: usize; + + // All valid strings are either of length 8 or 3. + if s.len() == 8 { + register = s.read_u64(); + len = 8; + } else if s.len() == 3 { + let a = s[0] as u64; + let b = s[1] as u64; + let c = s[2] as u64; + register = (c << 16) | (b << 8) | a; + len = 3; + } else { + return None; } - None + + // Clear out the bits which turn ASCII uppercase characters into + // lowercase characters. The resulting string is all uppercase. + // What happens to other characters is irrelevant. + register &= 0xDFDFDFDFDFDFDFDF; + + // u64 values corresponding to relevant cases + const INF_3: u64 = 0x464E49; // "INF" + const INF_8: u64 = 0x5954494E49464E49; // "INFINITY" + const NAN: u64 = 0x4E414E; // "NAN" + + // Match register value to constant to parse string. + // Also match on the string length to catch edge cases + // like "inf\0\0\0\0\0". + let float = match (register, len) { + (INF_3, 3) => F::INFINITY, + (INF_8, 8) => F::INFINITY, + (NAN, 3) => F::NAN, + _ => return None, + }; + + if negative { Some(-float) } else { Some(float) } } -- cgit v1.2.3