summaryrefslogtreecommitdiffstats
path: root/library/core/src/num/dec2flt/parse.rs
diff options
context:
space:
mode:
authorDaniel Baumann <daniel.baumann@progress-linux.org>2024-04-17 12:20:39 +0000
committerDaniel Baumann <daniel.baumann@progress-linux.org>2024-04-17 12:20:39 +0000
commit1376c5a617be5c25655d0d7cb63e3beaa5a6e026 (patch)
tree3bb8d61aee02bc7a15eab3f36e3b921afc2075d0 /library/core/src/num/dec2flt/parse.rs
parentReleasing progress-linux version 1.69.0+dfsg1-1~progress7.99u1. (diff)
downloadrustc-1376c5a617be5c25655d0d7cb63e3beaa5a6e026.tar.xz
rustc-1376c5a617be5c25655d0d7cb63e3beaa5a6e026.zip
Merging upstream version 1.70.0+dfsg1.
Signed-off-by: Daniel Baumann <daniel.baumann@progress-linux.org>
Diffstat (limited to 'library/core/src/num/dec2flt/parse.rs')
-rw-r--r--library/core/src/num/dec2flt/parse.rs224
1 files changed, 117 insertions, 107 deletions
diff --git a/library/core/src/num/dec2flt/parse.rs b/library/core/src/num/dec2flt/parse.rs
index 1a90e0d20..b0a23835c 100644
--- a/library/core/src/num/dec2flt/parse.rs
+++ b/library/core/src/num/dec2flt/parse.rs
@@ -1,6 +1,6 @@
//! Functions to parse floating-point numbers.
-use crate::num::dec2flt::common::{is_8digits, AsciiStr, ByteSlice};
+use crate::num::dec2flt::common::{is_8digits, ByteSlice};
use crate::num::dec2flt::float::RawFloat;
use crate::num::dec2flt::number::Number;
@@ -26,24 +26,39 @@ fn parse_8digits(mut v: u64) -> u64 {
}
/// Parse digits until a non-digit character is found.
-fn try_parse_digits(s: &mut AsciiStr<'_>, x: &mut u64) {
+fn try_parse_digits(mut s: &[u8], mut x: u64) -> (&[u8], u64) {
// may cause overflows, to be handled later
- s.parse_digits(|digit| {
- *x = x.wrapping_mul(10).wrapping_add(digit as _);
+
+ while s.len() >= 8 {
+ let num = s.read_u64();
+ if is_8digits(num) {
+ x = x.wrapping_mul(1_0000_0000).wrapping_add(parse_8digits(num));
+ s = &s[8..];
+ } else {
+ break;
+ }
+ }
+
+ s = s.parse_digits(|digit| {
+ x = x.wrapping_mul(10).wrapping_add(digit as _);
});
+
+ (s, x)
}
/// Parse up to 19 digits (the max that can be stored in a 64-bit integer).
-fn try_parse_19digits(s: &mut AsciiStr<'_>, x: &mut u64) {
+fn try_parse_19digits(s_ref: &mut &[u8], x: &mut u64) {
+ let mut s = *s_ref;
+
while *x < MIN_19DIGIT_INT {
- if let Some(&c) = s.as_ref().first() {
+ // FIXME: Can't use s.split_first() here yet,
+ // see https://github.com/rust-lang/rust/issues/109328
+ if let [c, s_next @ ..] = s {
let digit = c.wrapping_sub(b'0');
+
if digit < 10 {
*x = (*x * 10) + digit as u64; // no overflows here
- // SAFETY: cannot be empty
- unsafe {
- s.step();
- }
+ s = s_next;
} else {
break;
}
@@ -51,46 +66,26 @@ fn try_parse_19digits(s: &mut AsciiStr<'_>, x: &mut u64) {
break;
}
}
-}
-/// Try to parse 8 digits at a time, using an optimized algorithm.
-fn try_parse_8digits(s: &mut AsciiStr<'_>, x: &mut u64) {
- // may cause overflows, to be handled later
- if let Some(v) = s.read_u64() {
- if is_8digits(v) {
- *x = x.wrapping_mul(1_0000_0000).wrapping_add(parse_8digits(v));
- // SAFETY: already ensured the buffer was >= 8 bytes in read_u64.
- unsafe {
- s.step_by(8);
- }
- if let Some(v) = s.read_u64() {
- if is_8digits(v) {
- *x = x.wrapping_mul(1_0000_0000).wrapping_add(parse_8digits(v));
- // SAFETY: already ensured the buffer was >= 8 bytes in try_read_u64.
- unsafe {
- s.step_by(8);
- }
- }
- }
- }
- }
+ *s_ref = s;
}
/// Parse the scientific notation component of a float.
-fn parse_scientific(s: &mut AsciiStr<'_>) -> Option<i64> {
- let mut exponent = 0_i64;
+fn parse_scientific(s_ref: &mut &[u8]) -> Option<i64> {
+ let mut exponent = 0i64;
let mut negative = false;
- if let Some(&c) = s.as_ref().get(0) {
+
+ let mut s = *s_ref;
+
+ if let Some((&c, s_next)) = s.split_first() {
negative = c == b'-';
if c == b'-' || c == b'+' {
- // SAFETY: s cannot be empty
- unsafe {
- s.step();
- }
+ s = s_next;
}
}
- if s.first_isdigit() {
- s.parse_digits(|digit| {
+
+ if matches!(s.first(), Some(&x) if x.is_ascii_digit()) {
+ *s_ref = s.parse_digits(|digit| {
// no overflows here, saturate well before overflow
if exponent < 0x10000 {
exponent = 10 * exponent + digit as i64;
@@ -98,6 +93,7 @@ fn parse_scientific(s: &mut AsciiStr<'_>) -> Option<i64> {
});
if negative { Some(-exponent) } else { Some(exponent) }
} else {
+ *s_ref = s;
None
}
}
@@ -106,28 +102,29 @@ fn parse_scientific(s: &mut AsciiStr<'_>) -> Option<i64> {
///
/// This creates a representation of the float as the
/// significant digits and the decimal exponent.
-fn parse_partial_number(s: &[u8], negative: bool) -> Option<(Number, usize)> {
- let mut s = AsciiStr::new(s);
- let start = s;
+fn parse_partial_number(mut s: &[u8]) -> Option<(Number, usize)> {
debug_assert!(!s.is_empty());
// parse initial digits before dot
let mut mantissa = 0_u64;
- let digits_start = s;
- try_parse_digits(&mut s, &mut mantissa);
- let mut n_digits = s.offset_from(&digits_start);
+ let start = s;
+ let tmp = try_parse_digits(s, mantissa);
+ s = tmp.0;
+ mantissa = tmp.1;
+ let mut n_digits = s.offset_from(start);
// handle dot with the following digits
let mut n_after_dot = 0;
let mut exponent = 0_i64;
let int_end = s;
- if s.first_is(b'.') {
- // SAFETY: s cannot be empty due to first_is
- unsafe { s.step() };
+
+ if let Some((&b'.', s_next)) = s.split_first() {
+ s = s_next;
let before = s;
- try_parse_8digits(&mut s, &mut mantissa);
- try_parse_digits(&mut s, &mut mantissa);
- n_after_dot = s.offset_from(&before);
+ let tmp = try_parse_digits(s, mantissa);
+ s = tmp.0;
+ mantissa = tmp.1;
+ n_after_dot = s.offset_from(before);
exponent = -n_after_dot as i64;
}
@@ -138,65 +135,60 @@ fn parse_partial_number(s: &[u8], negative: bool) -> Option<(Number, usize)> {
// handle scientific format
let mut exp_number = 0_i64;
- if s.first_is2(b'e', b'E') {
- // SAFETY: s cannot be empty
- unsafe {
- s.step();
+ if let Some((&c, s_next)) = s.split_first() {
+ if c == b'e' || c == b'E' {
+ s = s_next;
+ // If None, we have no trailing digits after exponent, or an invalid float.
+ exp_number = parse_scientific(&mut s)?;
+ exponent += exp_number;
}
- // If None, we have no trailing digits after exponent, or an invalid float.
- exp_number = parse_scientific(&mut s)?;
- exponent += exp_number;
}
- let len = s.offset_from(&start) as _;
+ let len = s.offset_from(start) as _;
// handle uncommon case with many digits
if n_digits <= 19 {
- return Some((Number { exponent, mantissa, negative, many_digits: false }, len));
+ return Some((Number { exponent, mantissa, negative: false, many_digits: false }, len));
}
n_digits -= 19;
let mut many_digits = false;
- let mut p = digits_start;
- while p.first_is2(b'0', b'.') {
- // SAFETY: p cannot be empty due to first_is2
- unsafe {
- // '0' = b'.' + 2
- n_digits -= p.first_unchecked().saturating_sub(b'0' - 1) as isize;
- p.step();
+ let mut p = start;
+ while let Some((&c, p_next)) = p.split_first() {
+ if c == b'.' || c == b'0' {
+ n_digits -= c.saturating_sub(b'0' - 1) as isize;
+ p = p_next;
+ } else {
+ break;
}
}
if n_digits > 0 {
// at this point we have more than 19 significant digits, let's try again
many_digits = true;
mantissa = 0;
- let mut s = digits_start;
+ let mut s = start;
try_parse_19digits(&mut s, &mut mantissa);
exponent = if mantissa >= MIN_19DIGIT_INT {
// big int
- int_end.offset_from(&s)
+ int_end.offset_from(s)
} else {
- // SAFETY: the next byte must be present and be '.'
- // We know this is true because we had more than 19
- // digits previously, so we overflowed a 64-bit integer,
- // but parsing only the integral digits produced less
- // than 19 digits. That means we must have a decimal
- // point, and at least 1 fractional digit.
- unsafe { s.step() };
+ s = &s[1..];
let before = s;
try_parse_19digits(&mut s, &mut mantissa);
- -s.offset_from(&before)
+ -s.offset_from(before)
} as i64;
// add back the explicit part
exponent += exp_number;
}
- Some((Number { exponent, mantissa, negative, many_digits }, len))
+ Some((Number { exponent, mantissa, negative: false, many_digits }, len))
}
-/// Try to parse a non-special floating point number.
-pub fn parse_number(s: &[u8], negative: bool) -> Option<Number> {
- if let Some((float, rest)) = parse_partial_number(s, negative) {
+/// Try to parse a non-special floating point number,
+/// as well as two slices with integer and fractional parts
+/// and the parsed exponent.
+pub fn parse_number(s: &[u8]) -> Option<Number> {
+ if let Some((float, rest)) = parse_partial_number(s) {
if rest == s.len() {
return Some(float);
}
@@ -204,30 +196,48 @@ pub fn parse_number(s: &[u8], negative: bool) -> Option<Number> {
None
}
-/// Parse a partial representation of a special, non-finite float.
-fn parse_partial_inf_nan<F: RawFloat>(s: &[u8]) -> Option<(F, usize)> {
- fn parse_inf_rest(s: &[u8]) -> usize {
- if s.len() >= 8 && s[3..].as_ref().starts_with_ignore_case(b"inity") { 8 } else { 3 }
- }
- if s.len() >= 3 {
- if s.starts_with_ignore_case(b"nan") {
- return Some((F::NAN, 3));
- } else if s.starts_with_ignore_case(b"inf") {
- return Some((F::INFINITY, parse_inf_rest(s)));
- }
- }
- None
-}
-
/// Try to parse a special, non-finite float.
-pub fn parse_inf_nan<F: RawFloat>(s: &[u8], negative: bool) -> Option<F> {
- if let Some((mut float, rest)) = parse_partial_inf_nan::<F>(s) {
- if rest == s.len() {
- if negative {
- float = -float;
- }
- return Some(float);
- }
+pub(crate) fn parse_inf_nan<F: RawFloat>(s: &[u8], negative: bool) -> Option<F> {
+ // Since a valid string has at most the length 8, we can load
+ // all relevant characters into a u64 and work from there.
+ // This also generates much better code.
+
+ let mut register;
+ let len: usize;
+
+ // All valid strings are either of length 8 or 3.
+ if s.len() == 8 {
+ register = s.read_u64();
+ len = 8;
+ } else if s.len() == 3 {
+ let a = s[0] as u64;
+ let b = s[1] as u64;
+ let c = s[2] as u64;
+ register = (c << 16) | (b << 8) | a;
+ len = 3;
+ } else {
+ return None;
}
- None
+
+ // Clear out the bits which turn ASCII uppercase characters into
+ // lowercase characters. The resulting string is all uppercase.
+ // What happens to other characters is irrelevant.
+ register &= 0xDFDFDFDFDFDFDFDF;
+
+ // u64 values corresponding to relevant cases
+ const INF_3: u64 = 0x464E49; // "INF"
+ const INF_8: u64 = 0x5954494E49464E49; // "INFINITY"
+ const NAN: u64 = 0x4E414E; // "NAN"
+
+ // Match register value to constant to parse string.
+ // Also match on the string length to catch edge cases
+ // like "inf\0\0\0\0\0".
+ let float = match (register, len) {
+ (INF_3, 3) => F::INFINITY,
+ (INF_8, 8) => F::INFINITY,
+ (NAN, 3) => F::NAN,
+ _ => return None,
+ };
+
+ if negative { Some(-float) } else { Some(float) }
}