diff options
Diffstat (limited to 'third_party/rust/minimal-lexical/src')
19 files changed, 5412 insertions, 0 deletions
diff --git a/third_party/rust/minimal-lexical/src/bellerophon.rs b/third_party/rust/minimal-lexical/src/bellerophon.rs new file mode 100644 index 0000000000..86a2023d09 --- /dev/null +++ b/third_party/rust/minimal-lexical/src/bellerophon.rs @@ -0,0 +1,391 @@ +//! An implementation of Clinger's Bellerophon algorithm. +//! +//! This is a moderate path algorithm that uses an extended-precision +//! float, represented in 80 bits, by calculating the bits of slop +//! and determining if those bits could prevent unambiguous rounding. +//! +//! This algorithm requires less static storage than the Lemire algorithm, +//! and has decent performance, and is therefore used when non-decimal, +//! non-power-of-two strings need to be parsed. Clinger's algorithm +//! is described in depth in "How to Read Floating Point Numbers Accurately.", +//! available online [here](http://citeseerx.ist.psu.edu/viewdoc/download?doi=10.1.1.45.4152&rep=rep1&type=pdf). +//! +//! This implementation is loosely based off the Golang implementation, +//! found [here](https://github.com/golang/go/blob/b10849fbb97a2244c086991b4623ae9f32c212d0/src/strconv/extfloat.go). +//! This code is therefore subject to a 3-clause BSD license. + +#![cfg(feature = "compact")] +#![doc(hidden)] + +use crate::extended_float::ExtendedFloat; +use crate::mask::{lower_n_halfway, lower_n_mask}; +use crate::num::Float; +use crate::number::Number; +use crate::rounding::{round, round_nearest_tie_even}; +use crate::table::BASE10_POWERS; + +// ALGORITHM +// --------- + +/// Core implementation of the Bellerophon algorithm. +/// +/// Create an extended-precision float, scale it to the proper radix power, +/// calculate the bits of slop, and return the representation. The value +/// will always be guaranteed to be within 1 bit, rounded-down, of the real +/// value. If a negative exponent is returned, this represents we were +/// unable to unambiguously round the significant digits. +/// +/// This has been modified to return a biased, rather than unbiased exponent. +pub fn bellerophon<F: Float>(num: &Number) -> ExtendedFloat { + let fp_zero = ExtendedFloat { + mant: 0, + exp: 0, + }; + let fp_inf = ExtendedFloat { + mant: 0, + exp: F::INFINITE_POWER, + }; + + // Early short-circuit, in case of literal 0 or infinity. + // This allows us to avoid narrow casts causing numeric overflow, + // and is a quick check for any radix. + if num.mantissa == 0 || num.exponent <= -0x1000 { + return fp_zero; + } else if num.exponent >= 0x1000 { + return fp_inf; + } + + // Calculate our indexes for our extended-precision multiplication. + // This narrowing cast is safe, since exponent must be in a valid range. + let exponent = num.exponent as i32 + BASE10_POWERS.bias; + let small_index = exponent % BASE10_POWERS.step; + let large_index = exponent / BASE10_POWERS.step; + + if exponent < 0 { + // Guaranteed underflow (assign 0). + return fp_zero; + } + if large_index as usize >= BASE10_POWERS.large.len() { + // Overflow (assign infinity) + return fp_inf; + } + + // Within the valid exponent range, multiply by the large and small + // exponents and return the resulting value. + + // Track errors to as a factor of unit in last-precision. + let mut errors: u32 = 0; + if num.many_digits { + errors += error_halfscale(); + } + + // Multiply by the small power. + // Check if we can directly multiply by an integer, if not, + // use extended-precision multiplication. + let mut fp = ExtendedFloat { + mant: num.mantissa, + exp: 0, + }; + match fp.mant.overflowing_mul(BASE10_POWERS.get_small_int(small_index as usize)) { + // Overflow, multiplication unsuccessful, go slow path. + (_, true) => { + normalize(&mut fp); + fp = mul(&fp, &BASE10_POWERS.get_small(small_index as usize)); + errors += error_halfscale(); + }, + // No overflow, multiplication successful. + (mant, false) => { + fp.mant = mant; + normalize(&mut fp); + }, + } + + // Multiply by the large power. + fp = mul(&fp, &BASE10_POWERS.get_large(large_index as usize)); + if errors > 0 { + errors += 1; + } + errors += error_halfscale(); + + // Normalize the floating point (and the errors). + let shift = normalize(&mut fp); + errors <<= shift; + fp.exp += F::EXPONENT_BIAS; + + // Check for literal overflow, even with halfway cases. + if -fp.exp + 1 > 65 { + return fp_zero; + } + + // Too many errors accumulated, return an error. + if !error_is_accurate::<F>(errors, &fp) { + // Bias the exponent so we know it's invalid. + fp.exp += F::INVALID_FP; + return fp; + } + + // Check if we have a literal 0 or overflow here. + // If we have an exponent of -63, we can still have a valid shift, + // giving a case where we have too many errors and need to round-up. + if -fp.exp + 1 == 65 { + // Have more than 64 bits below the minimum exponent, must be 0. + return fp_zero; + } + + round::<F, _>(&mut fp, |f, s| { + round_nearest_tie_even(f, s, |is_odd, is_halfway, is_above| { + is_above || (is_odd && is_halfway) + }); + }); + fp +} + +// ERRORS +// ------ + +// Calculate if the errors in calculating the extended-precision float. +// +// Specifically, we want to know if we are close to a halfway representation, +// or halfway between `b` and `b+1`, or `b+h`. The halfway representation +// has the form: +// SEEEEEEEHMMMMMMMMMMMMMMMMMMMMMMM100... +// where: +// S = Sign Bit +// E = Exponent Bits +// H = Hidden Bit +// M = Mantissa Bits +// +// The halfway representation has a bit set 1-after the mantissa digits, +// and no bits set immediately afterward, making it impossible to +// round between `b` and `b+1` with this representation. + +/// Get the full error scale. +#[inline(always)] +const fn error_scale() -> u32 { + 8 +} + +/// Get the half error scale. +#[inline(always)] +const fn error_halfscale() -> u32 { + error_scale() / 2 +} + +/// Determine if the number of errors is tolerable for float precision. +fn error_is_accurate<F: Float>(errors: u32, fp: &ExtendedFloat) -> bool { + // Check we can't have a literal 0 denormal float. + debug_assert!(fp.exp >= -64); + + // Determine if extended-precision float is a good approximation. + // If the error has affected too many units, the float will be + // inaccurate, or if the representation is too close to halfway + // that any operations could affect this halfway representation. + // See the documentation for dtoa for more information. + + // This is always a valid u32, since `fp.exp >= -64` + // will always be positive and the significand size is {23, 52}. + let mantissa_shift = 64 - F::MANTISSA_SIZE - 1; + + // The unbiased exponent checks is `unbiased_exp <= F::MANTISSA_SIZE + // - F::EXPONENT_BIAS -64 + 1`, or `biased_exp <= F::MANTISSA_SIZE - 63`, + // or `biased_exp <= mantissa_shift`. + let extrabits = match fp.exp <= -mantissa_shift { + // Denormal, since shifting to the hidden bit still has a negative exponent. + // The unbiased check calculation for bits is `1 - F::EXPONENT_BIAS - unbiased_exp`, + // or `1 - biased_exp`. + true => 1 - fp.exp, + false => 64 - F::MANTISSA_SIZE - 1, + }; + + // Our logic is as follows: we want to determine if the actual + // mantissa and the errors during calculation differ significantly + // from the rounding point. The rounding point for round-nearest + // is the halfway point, IE, this when the truncated bits start + // with b1000..., while the rounding point for the round-toward + // is when the truncated bits are equal to 0. + // To do so, we can check whether the rounding point +/- the error + // are >/< the actual lower n bits. + // + // For whether we need to use signed or unsigned types for this + // analysis, see this example, using u8 rather than u64 to simplify + // things. + // + // # Comparisons + // cmp1 = (halfway - errors) < extra + // cmp1 = extra < (halfway + errors) + // + // # Large Extrabits, Low Errors + // + // extrabits = 8 + // halfway = 0b10000000 + // extra = 0b10000010 + // errors = 0b00000100 + // halfway - errors = 0b01111100 + // halfway + errors = 0b10000100 + // + // Unsigned: + // halfway - errors = 124 + // halfway + errors = 132 + // extra = 130 + // cmp1 = true + // cmp2 = true + // Signed: + // halfway - errors = 124 + // halfway + errors = -124 + // extra = -126 + // cmp1 = false + // cmp2 = true + // + // # Conclusion + // + // Since errors will always be small, and since we want to detect + // if the representation is accurate, we need to use an **unsigned** + // type for comparisons. + let maskbits = extrabits as u64; + let errors = errors as u64; + + // Round-to-nearest, need to use the halfway point. + if extrabits > 64 { + // Underflow, we have a shift larger than the mantissa. + // Representation is valid **only** if the value is close enough + // overflow to the next bit within errors. If it overflows, + // the representation is **not** valid. + !fp.mant.overflowing_add(errors).1 + } else { + let mask = lower_n_mask(maskbits); + let extra = fp.mant & mask; + + // Round-to-nearest, need to check if we're close to halfway. + // IE, b10100 | 100000, where `|` signifies the truncation point. + let halfway = lower_n_halfway(maskbits); + let cmp1 = halfway.wrapping_sub(errors) < extra; + let cmp2 = extra < halfway.wrapping_add(errors); + + // If both comparisons are true, we have significant rounding error, + // and the value cannot be exactly represented. Otherwise, the + // representation is valid. + !(cmp1 && cmp2) + } +} + +// MATH +// ---- + +/// Normalize float-point number. +/// +/// Shift the mantissa so the number of leading zeros is 0, or the value +/// itself is 0. +/// +/// Get the number of bytes shifted. +pub fn normalize(fp: &mut ExtendedFloat) -> i32 { + // Note: + // Using the ctlz intrinsic via leading_zeros is way faster (~10x) + // than shifting 1-bit at a time, via while loop, and also way + // faster (~2x) than an unrolled loop that checks at 32, 16, 4, + // 2, and 1 bit. + // + // Using a modulus of pow2 (which will get optimized to a bitwise + // and with 0x3F or faster) is slightly slower than an if/then, + // however, removing the if/then will likely optimize more branched + // code as it removes conditional logic. + + // Calculate the number of leading zeros, and then zero-out + // any overflowing bits, to avoid shl overflow when self.mant == 0. + if fp.mant != 0 { + let shift = fp.mant.leading_zeros() as i32; + fp.mant <<= shift; + fp.exp -= shift; + shift + } else { + 0 + } +} + +/// Multiply two normalized extended-precision floats, as if by `a*b`. +/// +/// The precision is maximal when the numbers are normalized, however, +/// decent precision will occur as long as both values have high bits +/// set. The result is not normalized. +/// +/// Algorithm: +/// 1. Non-signed multiplication of mantissas (requires 2x as many bits as input). +/// 2. Normalization of the result (not done here). +/// 3. Addition of exponents. +pub fn mul(x: &ExtendedFloat, y: &ExtendedFloat) -> ExtendedFloat { + // Logic check, values must be decently normalized prior to multiplication. + debug_assert!(x.mant >> 32 != 0); + debug_assert!(y.mant >> 32 != 0); + + // Extract high-and-low masks. + // Mask is u32::MAX for older Rustc versions. + const LOMASK: u64 = 0xffff_ffff; + let x1 = x.mant >> 32; + let x0 = x.mant & LOMASK; + let y1 = y.mant >> 32; + let y0 = y.mant & LOMASK; + + // Get our products + let x1_y0 = x1 * y0; + let x0_y1 = x0 * y1; + let x0_y0 = x0 * y0; + let x1_y1 = x1 * y1; + + let mut tmp = (x1_y0 & LOMASK) + (x0_y1 & LOMASK) + (x0_y0 >> 32); + // round up + tmp += 1 << (32 - 1); + + ExtendedFloat { + mant: x1_y1 + (x1_y0 >> 32) + (x0_y1 >> 32) + (tmp >> 32), + exp: x.exp + y.exp + 64, + } +} + +// POWERS +// ------ + +/// Precalculated powers of base N for the Bellerophon algorithm. +pub struct BellerophonPowers { + // Pre-calculated small powers. + pub small: &'static [u64], + // Pre-calculated large powers. + pub large: &'static [u64], + /// Pre-calculated small powers as 64-bit integers + pub small_int: &'static [u64], + // Step between large powers and number of small powers. + pub step: i32, + // Exponent bias for the large powers. + pub bias: i32, + /// ceil(log2(radix)) scaled as a multiplier. + pub log2: i64, + /// Bitshift for the log2 multiplier. + pub log2_shift: i32, +} + +/// Allow indexing of values without bounds checking +impl BellerophonPowers { + #[inline] + pub fn get_small(&self, index: usize) -> ExtendedFloat { + let mant = self.small[index]; + let exp = (1 - 64) + ((self.log2 * index as i64) >> self.log2_shift); + ExtendedFloat { + mant, + exp: exp as i32, + } + } + + #[inline] + pub fn get_large(&self, index: usize) -> ExtendedFloat { + let mant = self.large[index]; + let biased_e = index as i64 * self.step as i64 - self.bias as i64; + let exp = (1 - 64) + ((self.log2 * biased_e) >> self.log2_shift); + ExtendedFloat { + mant, + exp: exp as i32, + } + } + + #[inline] + pub fn get_small_int(&self, index: usize) -> u64 { + self.small_int[index] + } +} diff --git a/third_party/rust/minimal-lexical/src/bigint.rs b/third_party/rust/minimal-lexical/src/bigint.rs new file mode 100644 index 0000000000..d1d5027a19 --- /dev/null +++ b/third_party/rust/minimal-lexical/src/bigint.rs @@ -0,0 +1,788 @@ +//! A simple big-integer type for slow path algorithms. +//! +//! This includes minimal stackvector for use in big-integer arithmetic. + +#![doc(hidden)] + +#[cfg(feature = "alloc")] +use crate::heapvec::HeapVec; +use crate::num::Float; +#[cfg(not(feature = "alloc"))] +use crate::stackvec::StackVec; +#[cfg(not(feature = "compact"))] +use crate::table::{LARGE_POW5, LARGE_POW5_STEP}; +use core::{cmp, ops, ptr}; + +/// Number of bits in a Bigint. +/// +/// This needs to be at least the number of bits required to store +/// a Bigint, which is `log2(radix**digits)`. +/// ≅ 3600 for base-10, rounded-up. +pub const BIGINT_BITS: usize = 4000; + +/// The number of limbs for the bigint. +pub const BIGINT_LIMBS: usize = BIGINT_BITS / LIMB_BITS; + +#[cfg(feature = "alloc")] +pub type VecType = HeapVec; + +#[cfg(not(feature = "alloc"))] +pub type VecType = StackVec; + +/// Storage for a big integer type. +/// +/// This is used for algorithms when we have a finite number of digits. +/// Specifically, it stores all the significant digits scaled to the +/// proper exponent, as an integral type, and then directly compares +/// these digits. +/// +/// This requires us to store the number of significant bits, plus the +/// number of exponent bits (required) since we scale everything +/// to the same exponent. +#[derive(Clone, PartialEq, Eq)] +pub struct Bigint { + /// Significant digits for the float, stored in a big integer in LE order. + /// + /// This is pretty much the same number of digits for any radix, since the + /// significant digits balances out the zeros from the exponent: + /// 1. Decimal is 1091 digits, 767 mantissa digits + 324 exponent zeros. + /// 2. Base 6 is 1097 digits, or 680 mantissa digits + 417 exponent zeros. + /// 3. Base 36 is 1086 digits, or 877 mantissa digits + 209 exponent zeros. + /// + /// However, the number of bytes required is larger for large radixes: + /// for decimal, we need `log2(10**1091) ≅ 3600`, while for base 36 + /// we need `log2(36**1086) ≅ 5600`. Since we use uninitialized data, + /// we avoid a major performance hit from the large buffer size. + pub data: VecType, +} + +#[allow(clippy::new_without_default)] +impl Bigint { + /// Construct a bigint representing 0. + #[inline(always)] + pub fn new() -> Self { + Self { + data: VecType::new(), + } + } + + /// Construct a bigint from an integer. + #[inline(always)] + pub fn from_u64(value: u64) -> Self { + Self { + data: VecType::from_u64(value), + } + } + + #[inline(always)] + pub fn hi64(&self) -> (u64, bool) { + self.data.hi64() + } + + /// Multiply and assign as if by exponentiation by a power. + #[inline] + pub fn pow(&mut self, base: u32, exp: u32) -> Option<()> { + debug_assert!(base == 2 || base == 5 || base == 10); + if base % 5 == 0 { + pow(&mut self.data, exp)?; + } + if base % 2 == 0 { + shl(&mut self.data, exp as usize)?; + } + Some(()) + } + + /// Calculate the bit-length of the big-integer. + #[inline] + pub fn bit_length(&self) -> u32 { + bit_length(&self.data) + } +} + +impl ops::MulAssign<&Bigint> for Bigint { + fn mul_assign(&mut self, rhs: &Bigint) { + self.data *= &rhs.data; + } +} + +/// REVERSE VIEW + +/// Reverse, immutable view of a sequence. +pub struct ReverseView<'a, T: 'a> { + inner: &'a [T], +} + +impl<'a, T> ops::Index<usize> for ReverseView<'a, T> { + type Output = T; + + #[inline] + fn index(&self, index: usize) -> &T { + let len = self.inner.len(); + &(*self.inner)[len - index - 1] + } +} + +/// Create a reverse view of the vector for indexing. +#[inline] +pub fn rview(x: &[Limb]) -> ReverseView<Limb> { + ReverseView { + inner: x, + } +} + +// COMPARE +// ------- + +/// Compare `x` to `y`, in little-endian order. +#[inline] +pub fn compare(x: &[Limb], y: &[Limb]) -> cmp::Ordering { + match x.len().cmp(&y.len()) { + cmp::Ordering::Equal => { + let iter = x.iter().rev().zip(y.iter().rev()); + for (&xi, yi) in iter { + match xi.cmp(yi) { + cmp::Ordering::Equal => (), + ord => return ord, + } + } + // Equal case. + cmp::Ordering::Equal + }, + ord => ord, + } +} + +// NORMALIZE +// --------- + +/// Normalize the integer, so any leading zero values are removed. +#[inline] +pub fn normalize(x: &mut VecType) { + // We don't care if this wraps: the index is bounds-checked. + while let Some(&value) = x.get(x.len().wrapping_sub(1)) { + if value == 0 { + unsafe { x.set_len(x.len() - 1) }; + } else { + break; + } + } +} + +/// Get if the big integer is normalized. +#[inline] +#[allow(clippy::match_like_matches_macro)] +pub fn is_normalized(x: &[Limb]) -> bool { + // We don't care if this wraps: the index is bounds-checked. + match x.get(x.len().wrapping_sub(1)) { + Some(&0) => false, + _ => true, + } +} + +// FROM +// ---- + +/// Create StackVec from u64 value. +#[inline(always)] +#[allow(clippy::branches_sharing_code)] +pub fn from_u64(x: u64) -> VecType { + let mut vec = VecType::new(); + debug_assert!(vec.capacity() >= 2); + if LIMB_BITS == 32 { + vec.try_push(x as Limb).unwrap(); + vec.try_push((x >> 32) as Limb).unwrap(); + } else { + vec.try_push(x as Limb).unwrap(); + } + vec.normalize(); + vec +} + +// HI +// -- + +/// Check if any of the remaining bits are non-zero. +/// +/// # Safety +/// +/// Safe as long as `rindex <= x.len()`. +#[inline] +pub fn nonzero(x: &[Limb], rindex: usize) -> bool { + debug_assert!(rindex <= x.len()); + + let len = x.len(); + let slc = &x[..len - rindex]; + slc.iter().rev().any(|&x| x != 0) +} + +// These return the high X bits and if the bits were truncated. + +/// Shift 32-bit integer to high 64-bits. +#[inline] +pub fn u32_to_hi64_1(r0: u32) -> (u64, bool) { + u64_to_hi64_1(r0 as u64) +} + +/// Shift 2 32-bit integers to high 64-bits. +#[inline] +pub fn u32_to_hi64_2(r0: u32, r1: u32) -> (u64, bool) { + let r0 = (r0 as u64) << 32; + let r1 = r1 as u64; + u64_to_hi64_1(r0 | r1) +} + +/// Shift 3 32-bit integers to high 64-bits. +#[inline] +pub fn u32_to_hi64_3(r0: u32, r1: u32, r2: u32) -> (u64, bool) { + let r0 = r0 as u64; + let r1 = (r1 as u64) << 32; + let r2 = r2 as u64; + u64_to_hi64_2(r0, r1 | r2) +} + +/// Shift 64-bit integer to high 64-bits. +#[inline] +pub fn u64_to_hi64_1(r0: u64) -> (u64, bool) { + let ls = r0.leading_zeros(); + (r0 << ls, false) +} + +/// Shift 2 64-bit integers to high 64-bits. +#[inline] +pub fn u64_to_hi64_2(r0: u64, r1: u64) -> (u64, bool) { + let ls = r0.leading_zeros(); + let rs = 64 - ls; + let v = match ls { + 0 => r0, + _ => (r0 << ls) | (r1 >> rs), + }; + let n = r1 << ls != 0; + (v, n) +} + +/// Extract the hi bits from the buffer. +macro_rules! hi { + // # Safety + // + // Safe as long as the `stackvec.len() >= 1`. + (@1 $self:ident, $rview:ident, $t:ident, $fn:ident) => {{ + $fn($rview[0] as $t) + }}; + + // # Safety + // + // Safe as long as the `stackvec.len() >= 2`. + (@2 $self:ident, $rview:ident, $t:ident, $fn:ident) => {{ + let r0 = $rview[0] as $t; + let r1 = $rview[1] as $t; + $fn(r0, r1) + }}; + + // # Safety + // + // Safe as long as the `stackvec.len() >= 2`. + (@nonzero2 $self:ident, $rview:ident, $t:ident, $fn:ident) => {{ + let (v, n) = hi!(@2 $self, $rview, $t, $fn); + (v, n || nonzero($self, 2 )) + }}; + + // # Safety + // + // Safe as long as the `stackvec.len() >= 3`. + (@3 $self:ident, $rview:ident, $t:ident, $fn:ident) => {{ + let r0 = $rview[0] as $t; + let r1 = $rview[1] as $t; + let r2 = $rview[2] as $t; + $fn(r0, r1, r2) + }}; + + // # Safety + // + // Safe as long as the `stackvec.len() >= 3`. + (@nonzero3 $self:ident, $rview:ident, $t:ident, $fn:ident) => {{ + let (v, n) = hi!(@3 $self, $rview, $t, $fn); + (v, n || nonzero($self, 3)) + }}; +} + +/// Get the high 64 bits from the vector. +#[inline(always)] +pub fn hi64(x: &[Limb]) -> (u64, bool) { + let rslc = rview(x); + // SAFETY: the buffer must be at least length bytes long. + match x.len() { + 0 => (0, false), + 1 if LIMB_BITS == 32 => hi!(@1 x, rslc, u32, u32_to_hi64_1), + 1 => hi!(@1 x, rslc, u64, u64_to_hi64_1), + 2 if LIMB_BITS == 32 => hi!(@2 x, rslc, u32, u32_to_hi64_2), + 2 => hi!(@2 x, rslc, u64, u64_to_hi64_2), + _ if LIMB_BITS == 32 => hi!(@nonzero3 x, rslc, u32, u32_to_hi64_3), + _ => hi!(@nonzero2 x, rslc, u64, u64_to_hi64_2), + } +} + +// POWERS +// ------ + +/// MulAssign by a power of 5. +/// +/// Theoretically... +/// +/// Use an exponentiation by squaring method, since it reduces the time +/// complexity of the multiplication to ~`O(log(n))` for the squaring, +/// and `O(n*m)` for the result. Since `m` is typically a lower-order +/// factor, this significantly reduces the number of multiplications +/// we need to do. Iteratively multiplying by small powers follows +/// the nth triangular number series, which scales as `O(p^2)`, but +/// where `p` is `n+m`. In short, it scales very poorly. +/// +/// Practically.... +/// +/// Exponentiation by Squaring: +/// running 2 tests +/// test bigcomp_f32_lexical ... bench: 1,018 ns/iter (+/- 78) +/// test bigcomp_f64_lexical ... bench: 3,639 ns/iter (+/- 1,007) +/// +/// Exponentiation by Iterative Small Powers: +/// running 2 tests +/// test bigcomp_f32_lexical ... bench: 518 ns/iter (+/- 31) +/// test bigcomp_f64_lexical ... bench: 583 ns/iter (+/- 47) +/// +/// Exponentiation by Iterative Large Powers (of 2): +/// running 2 tests +/// test bigcomp_f32_lexical ... bench: 671 ns/iter (+/- 31) +/// test bigcomp_f64_lexical ... bench: 1,394 ns/iter (+/- 47) +/// +/// The following benchmarks were run on `1 * 5^300`, using native `pow`, +/// a version with only small powers, and one with pre-computed powers +/// of `5^(3 * max_exp)`, rather than `5^(5 * max_exp)`. +/// +/// However, using large powers is crucial for good performance for higher +/// powers. +/// pow/default time: [426.20 ns 427.96 ns 429.89 ns] +/// pow/small time: [2.9270 us 2.9411 us 2.9565 us] +/// pow/large:3 time: [838.51 ns 842.21 ns 846.27 ns] +/// +/// Even using worst-case scenarios, exponentiation by squaring is +/// significantly slower for our workloads. Just multiply by small powers, +/// in simple cases, and use precalculated large powers in other cases. +/// +/// Furthermore, using sufficiently big large powers is also crucial for +/// performance. This is a tradeoff of binary size and performance, and +/// using a single value at ~`5^(5 * max_exp)` seems optimal. +pub fn pow(x: &mut VecType, mut exp: u32) -> Option<()> { + // Minimize the number of iterations for large exponents: just + // do a few steps with a large powers. + #[cfg(not(feature = "compact"))] + { + while exp >= LARGE_POW5_STEP { + large_mul(x, &LARGE_POW5)?; + exp -= LARGE_POW5_STEP; + } + } + + // Now use our pre-computed small powers iteratively. + // This is calculated as `⌊log(2^BITS - 1, 5)⌋`. + let small_step = if LIMB_BITS == 32 { + 13 + } else { + 27 + }; + let max_native = (5 as Limb).pow(small_step); + while exp >= small_step { + small_mul(x, max_native)?; + exp -= small_step; + } + if exp != 0 { + // SAFETY: safe, since `exp < small_step`. + let small_power = unsafe { f64::int_pow_fast_path(exp as usize, 5) }; + small_mul(x, small_power as Limb)?; + } + Some(()) +} + +// SCALAR +// ------ + +/// Add two small integers and return the resulting value and if overflow happens. +#[inline(always)] +pub fn scalar_add(x: Limb, y: Limb) -> (Limb, bool) { + x.overflowing_add(y) +} + +/// Multiply two small integers (with carry) (and return the overflow contribution). +/// +/// Returns the (low, high) components. +#[inline(always)] +pub fn scalar_mul(x: Limb, y: Limb, carry: Limb) -> (Limb, Limb) { + // Cannot overflow, as long as wide is 2x as wide. This is because + // the following is always true: + // `Wide::MAX - (Narrow::MAX * Narrow::MAX) >= Narrow::MAX` + let z: Wide = (x as Wide) * (y as Wide) + (carry as Wide); + (z as Limb, (z >> LIMB_BITS) as Limb) +} + +// SMALL +// ----- + +/// Add small integer to bigint starting from offset. +#[inline] +pub fn small_add_from(x: &mut VecType, y: Limb, start: usize) -> Option<()> { + let mut index = start; + let mut carry = y; + while carry != 0 && index < x.len() { + let result = scalar_add(x[index], carry); + x[index] = result.0; + carry = result.1 as Limb; + index += 1; + } + // If we carried past all the elements, add to the end of the buffer. + if carry != 0 { + x.try_push(carry)?; + } + Some(()) +} + +/// Add small integer to bigint. +#[inline(always)] +pub fn small_add(x: &mut VecType, y: Limb) -> Option<()> { + small_add_from(x, y, 0) +} + +/// Multiply bigint by small integer. +#[inline] +pub fn small_mul(x: &mut VecType, y: Limb) -> Option<()> { + let mut carry = 0; + for xi in x.iter_mut() { + let result = scalar_mul(*xi, y, carry); + *xi = result.0; + carry = result.1; + } + // If we carried past all the elements, add to the end of the buffer. + if carry != 0 { + x.try_push(carry)?; + } + Some(()) +} + +// LARGE +// ----- + +/// Add bigint to bigint starting from offset. +pub fn large_add_from(x: &mut VecType, y: &[Limb], start: usize) -> Option<()> { + // The effective x buffer is from `xstart..x.len()`, so we need to treat + // that as the current range. If the effective y buffer is longer, need + // to resize to that, + the start index. + if y.len() > x.len().saturating_sub(start) { + // Ensure we panic if we can't extend the buffer. + // This avoids any unsafe behavior afterwards. + x.try_resize(y.len() + start, 0)?; + } + + // Iteratively add elements from y to x. + let mut carry = false; + for (index, &yi) in y.iter().enumerate() { + // We panicked in `try_resize` if this wasn't true. + let xi = x.get_mut(start + index).unwrap(); + + // Only one op of the two ops can overflow, since we added at max + // Limb::max_value() + Limb::max_value(). Add the previous carry, + // and store the current carry for the next. + let result = scalar_add(*xi, yi); + *xi = result.0; + let mut tmp = result.1; + if carry { + let result = scalar_add(*xi, 1); + *xi = result.0; + tmp |= result.1; + } + carry = tmp; + } + + // Handle overflow. + if carry { + small_add_from(x, 1, y.len() + start)?; + } + Some(()) +} + +/// Add bigint to bigint. +#[inline(always)] +pub fn large_add(x: &mut VecType, y: &[Limb]) -> Option<()> { + large_add_from(x, y, 0) +} + +/// Grade-school multiplication algorithm. +/// +/// Slow, naive algorithm, using limb-bit bases and just shifting left for +/// each iteration. This could be optimized with numerous other algorithms, +/// but it's extremely simple, and works in O(n*m) time, which is fine +/// by me. Each iteration, of which there are `m` iterations, requires +/// `n` multiplications, and `n` additions, or grade-school multiplication. +/// +/// Don't use Karatsuba multiplication, since out implementation seems to +/// be slower asymptotically, which is likely just due to the small sizes +/// we deal with here. For example, running on the following data: +/// +/// ```text +/// const SMALL_X: &[u32] = &[ +/// 766857581, 3588187092, 1583923090, 2204542082, 1564708913, 2695310100, 3676050286, +/// 1022770393, 468044626, 446028186 +/// ]; +/// const SMALL_Y: &[u32] = &[ +/// 3945492125, 3250752032, 1282554898, 1708742809, 1131807209, 3171663979, 1353276095, +/// 1678845844, 2373924447, 3640713171 +/// ]; +/// const LARGE_X: &[u32] = &[ +/// 3647536243, 2836434412, 2154401029, 1297917894, 137240595, 790694805, 2260404854, +/// 3872698172, 690585094, 99641546, 3510774932, 1672049983, 2313458559, 2017623719, +/// 638180197, 1140936565, 1787190494, 1797420655, 14113450, 2350476485, 3052941684, +/// 1993594787, 2901001571, 4156930025, 1248016552, 848099908, 2660577483, 4030871206, +/// 692169593, 2835966319, 1781364505, 4266390061, 1813581655, 4210899844, 2137005290, +/// 2346701569, 3715571980, 3386325356, 1251725092, 2267270902, 474686922, 2712200426, +/// 197581715, 3087636290, 1379224439, 1258285015, 3230794403, 2759309199, 1494932094, +/// 326310242 +/// ]; +/// const LARGE_Y: &[u32] = &[ +/// 1574249566, 868970575, 76716509, 3198027972, 1541766986, 1095120699, 3891610505, +/// 2322545818, 1677345138, 865101357, 2650232883, 2831881215, 3985005565, 2294283760, +/// 3468161605, 393539559, 3665153349, 1494067812, 106699483, 2596454134, 797235106, +/// 705031740, 1209732933, 2732145769, 4122429072, 141002534, 790195010, 4014829800, +/// 1303930792, 3649568494, 308065964, 1233648836, 2807326116, 79326486, 1262500691, +/// 621809229, 2258109428, 3819258501, 171115668, 1139491184, 2979680603, 1333372297, +/// 1657496603, 2790845317, 4090236532, 4220374789, 601876604, 1828177209, 2372228171, +/// 2247372529 +/// ]; +/// ``` +/// +/// We get the following results: + +/// ```text +/// mul/small:long time: [220.23 ns 221.47 ns 222.81 ns] +/// Found 4 outliers among 100 measurements (4.00%) +/// 2 (2.00%) high mild +/// 2 (2.00%) high severe +/// mul/small:karatsuba time: [233.88 ns 234.63 ns 235.44 ns] +/// Found 11 outliers among 100 measurements (11.00%) +/// 8 (8.00%) high mild +/// 3 (3.00%) high severe +/// mul/large:long time: [1.9365 us 1.9455 us 1.9558 us] +/// Found 12 outliers among 100 measurements (12.00%) +/// 7 (7.00%) high mild +/// 5 (5.00%) high severe +/// mul/large:karatsuba time: [4.4250 us 4.4515 us 4.4812 us] +/// ``` +/// +/// In short, Karatsuba multiplication is never worthwhile for out use-case. +pub fn long_mul(x: &[Limb], y: &[Limb]) -> Option<VecType> { + // Using the immutable value, multiply by all the scalars in y, using + // the algorithm defined above. Use a single buffer to avoid + // frequent reallocations. Handle the first case to avoid a redundant + // addition, since we know y.len() >= 1. + let mut z = VecType::try_from(x)?; + if !y.is_empty() { + let y0 = y[0]; + small_mul(&mut z, y0)?; + + for (index, &yi) in y.iter().enumerate().skip(1) { + if yi != 0 { + let mut zi = VecType::try_from(x)?; + small_mul(&mut zi, yi)?; + large_add_from(&mut z, &zi, index)?; + } + } + } + + z.normalize(); + Some(z) +} + +/// Multiply bigint by bigint using grade-school multiplication algorithm. +#[inline(always)] +pub fn large_mul(x: &mut VecType, y: &[Limb]) -> Option<()> { + // Karatsuba multiplication never makes sense, so just use grade school + // multiplication. + if y.len() == 1 { + // SAFETY: safe since `y.len() == 1`. + small_mul(x, y[0])?; + } else { + *x = long_mul(y, x)?; + } + Some(()) +} + +// SHIFT +// ----- + +/// Shift-left `n` bits inside a buffer. +#[inline] +pub fn shl_bits(x: &mut VecType, n: usize) -> Option<()> { + debug_assert!(n != 0); + + // Internally, for each item, we shift left by n, and add the previous + // right shifted limb-bits. + // For example, we transform (for u8) shifted left 2, to: + // b10100100 b01000010 + // b10 b10010001 b00001000 + debug_assert!(n < LIMB_BITS); + let rshift = LIMB_BITS - n; + let lshift = n; + let mut prev: Limb = 0; + for xi in x.iter_mut() { + let tmp = *xi; + *xi <<= lshift; + *xi |= prev >> rshift; + prev = tmp; + } + + // Always push the carry, even if it creates a non-normal result. + let carry = prev >> rshift; + if carry != 0 { + x.try_push(carry)?; + } + + Some(()) +} + +/// Shift-left `n` limbs inside a buffer. +#[inline] +pub fn shl_limbs(x: &mut VecType, n: usize) -> Option<()> { + debug_assert!(n != 0); + if n + x.len() > x.capacity() { + None + } else if !x.is_empty() { + let len = n + x.len(); + // SAFE: since x is not empty, and `x.len() + n <= x.capacity()`. + unsafe { + // Move the elements. + let src = x.as_ptr(); + let dst = x.as_mut_ptr().add(n); + ptr::copy(src, dst, x.len()); + // Write our 0s. + ptr::write_bytes(x.as_mut_ptr(), 0, n); + x.set_len(len); + } + Some(()) + } else { + Some(()) + } +} + +/// Shift-left buffer by n bits. +#[inline] +pub fn shl(x: &mut VecType, n: usize) -> Option<()> { + let rem = n % LIMB_BITS; + let div = n / LIMB_BITS; + if rem != 0 { + shl_bits(x, rem)?; + } + if div != 0 { + shl_limbs(x, div)?; + } + Some(()) +} + +/// Get number of leading zero bits in the storage. +#[inline] +pub fn leading_zeros(x: &[Limb]) -> u32 { + let length = x.len(); + // wrapping_sub is fine, since it'll just return None. + if let Some(&value) = x.get(length.wrapping_sub(1)) { + value.leading_zeros() + } else { + 0 + } +} + +/// Calculate the bit-length of the big-integer. +#[inline] +pub fn bit_length(x: &[Limb]) -> u32 { + let nlz = leading_zeros(x); + LIMB_BITS as u32 * x.len() as u32 - nlz +} + +// LIMB +// ---- + +// Type for a single limb of the big integer. +// +// A limb is analogous to a digit in base10, except, it stores 32-bit +// or 64-bit numbers instead. We want types where 64-bit multiplication +// is well-supported by the architecture, rather than emulated in 3 +// instructions. The quickest way to check this support is using a +// cross-compiler for numerous architectures, along with the following +// source file and command: +// +// Compile with `gcc main.c -c -S -O3 -masm=intel` +// +// And the source code is: +// ```text +// #include <stdint.h> +// +// struct i128 { +// uint64_t hi; +// uint64_t lo; +// }; +// +// // Type your code here, or load an example. +// struct i128 square(uint64_t x, uint64_t y) { +// __int128 prod = (__int128)x * (__int128)y; +// struct i128 z; +// z.hi = (uint64_t)(prod >> 64); +// z.lo = (uint64_t)prod; +// return z; +// } +// ``` +// +// If the result contains `call __multi3`, then the multiplication +// is emulated by the compiler. Otherwise, it's natively supported. +// +// This should be all-known 64-bit platforms supported by Rust. +// https://forge.rust-lang.org/platform-support.html +// +// # Supported +// +// Platforms where native 128-bit multiplication is explicitly supported: +// - x86_64 (Supported via `MUL`). +// - mips64 (Supported via `DMULTU`, which `HI` and `LO` can be read-from). +// - s390x (Supported via `MLGR`). +// +// # Efficient +// +// Platforms where native 64-bit multiplication is supported and +// you can extract hi-lo for 64-bit multiplications. +// - aarch64 (Requires `UMULH` and `MUL` to capture high and low bits). +// - powerpc64 (Requires `MULHDU` and `MULLD` to capture high and low bits). +// - riscv64 (Requires `MUL` and `MULH` to capture high and low bits). +// +// # Unsupported +// +// Platforms where native 128-bit multiplication is not supported, +// requiring software emulation. +// sparc64 (`UMUL` only supports double-word arguments). +// sparcv9 (Same as sparc64). +// +// These tests are run via `xcross`, my own library for C cross-compiling, +// which supports numerous targets (far in excess of Rust's tier 1 support, +// or rust-embedded/cross's list). xcross may be found here: +// https://github.com/Alexhuszagh/xcross +// +// To compile for the given target, run: +// `xcross gcc main.c -c -S -O3 --target $target` +// +// All 32-bit architectures inherently do not have support. That means +// we can essentially look for 64-bit architectures that are not SPARC. + +#[cfg(all(target_pointer_width = "64", not(target_arch = "sparc")))] +pub type Limb = u64; +#[cfg(all(target_pointer_width = "64", not(target_arch = "sparc")))] +pub type Wide = u128; +#[cfg(all(target_pointer_width = "64", not(target_arch = "sparc")))] +pub const LIMB_BITS: usize = 64; + +#[cfg(not(all(target_pointer_width = "64", not(target_arch = "sparc"))))] +pub type Limb = u32; +#[cfg(not(all(target_pointer_width = "64", not(target_arch = "sparc"))))] +pub type Wide = u64; +#[cfg(not(all(target_pointer_width = "64", not(target_arch = "sparc"))))] +pub const LIMB_BITS: usize = 32; diff --git a/third_party/rust/minimal-lexical/src/extended_float.rs b/third_party/rust/minimal-lexical/src/extended_float.rs new file mode 100644 index 0000000000..7397e199c8 --- /dev/null +++ b/third_party/rust/minimal-lexical/src/extended_float.rs @@ -0,0 +1,24 @@ +// FLOAT TYPE + +#![doc(hidden)] + +use crate::num::Float; + +/// Extended precision floating-point type. +/// +/// Private implementation, exposed only for testing purposes. +#[derive(Clone, Copy, Debug, PartialEq, Eq)] +pub struct ExtendedFloat { + /// Mantissa for the extended-precision float. + pub mant: u64, + /// Binary exponent for the extended-precision float. + pub exp: i32, +} + +/// Converts an `ExtendedFloat` to the closest machine float type. +#[inline(always)] +pub fn extended_to_float<F: Float>(x: ExtendedFloat) -> F { + let mut word = x.mant; + word |= (x.exp as u64) << F::MANTISSA_SIZE; + F::from_bits(word) +} diff --git a/third_party/rust/minimal-lexical/src/fpu.rs b/third_party/rust/minimal-lexical/src/fpu.rs new file mode 100644 index 0000000000..42059a080a --- /dev/null +++ b/third_party/rust/minimal-lexical/src/fpu.rs @@ -0,0 +1,98 @@ +//! Platform-specific, assembly instructions to avoid +//! intermediate rounding on architectures with FPUs. +//! +//! This is adapted from the implementation in the Rust core library, +//! the original implementation can be [here](https://github.com/rust-lang/rust/blob/master/library/core/src/num/dec2flt/fpu.rs). +//! +//! It is therefore also subject to a Apache2.0/MIT license. + +#![cfg(feature = "nightly")] +#![doc(hidden)] + +pub use fpu_precision::set_precision; + +// On x86, the x87 FPU is used for float operations if the SSE/SSE2 extensions are not available. +// The x87 FPU operates with 80 bits of precision by default, which means that operations will +// round to 80 bits causing double rounding to happen when values are eventually represented as +// 32/64 bit float values. To overcome this, the FPU control word can be set so that the +// computations are performed in the desired precision. +#[cfg(all(target_arch = "x86", not(target_feature = "sse2")))] +mod fpu_precision { + use core::mem::size_of; + + /// A structure used to preserve the original value of the FPU control word, so that it can be + /// restored when the structure is dropped. + /// + /// The x87 FPU is a 16-bits register whose fields are as follows: + /// + /// | 12-15 | 10-11 | 8-9 | 6-7 | 5 | 4 | 3 | 2 | 1 | 0 | + /// |------:|------:|----:|----:|---:|---:|---:|---:|---:|---:| + /// | | RC | PC | | PM | UM | OM | ZM | DM | IM | + /// + /// The documentation for all of the fields is available in the IA-32 Architectures Software + /// Developer's Manual (Volume 1). + /// + /// The only field which is relevant for the following code is PC, Precision Control. This + /// field determines the precision of the operations performed by the FPU. It can be set to: + /// - 0b00, single precision i.e., 32-bits + /// - 0b10, double precision i.e., 64-bits + /// - 0b11, double extended precision i.e., 80-bits (default state) + /// The 0b01 value is reserved and should not be used. + pub struct FPUControlWord(u16); + + fn set_cw(cw: u16) { + // SAFETY: the `fldcw` instruction has been audited to be able to work correctly with + // any `u16` + unsafe { + asm!( + "fldcw word ptr [{}]", + in(reg) &cw, + options(nostack), + ) + } + } + + /// Sets the precision field of the FPU to `T` and returns a `FPUControlWord`. + pub fn set_precision<T>() -> FPUControlWord { + let mut cw = 0_u16; + + // Compute the value for the Precision Control field that is appropriate for `T`. + let cw_precision = match size_of::<T>() { + 4 => 0x0000, // 32 bits + 8 => 0x0200, // 64 bits + _ => 0x0300, // default, 80 bits + }; + + // Get the original value of the control word to restore it later, when the + // `FPUControlWord` structure is dropped + // SAFETY: the `fnstcw` instruction has been audited to be able to work correctly with + // any `u16` + unsafe { + asm!( + "fnstcw word ptr [{}]", + in(reg) &mut cw, + options(nostack), + ) + } + + // Set the control word to the desired precision. This is achieved by masking away the old + // precision (bits 8 and 9, 0x300) and replacing it with the precision flag computed above. + set_cw((cw & 0xFCFF) | cw_precision); + + FPUControlWord(cw) + } + + impl Drop for FPUControlWord { + fn drop(&mut self) { + set_cw(self.0) + } + } +} + +// In most architectures, floating point operations have an explicit bit size, therefore the +// precision of the computation is determined on a per-operation basis. +#[cfg(any(not(target_arch = "x86"), target_feature = "sse2"))] +mod fpu_precision { + pub fn set_precision<T>() { + } +} diff --git a/third_party/rust/minimal-lexical/src/heapvec.rs b/third_party/rust/minimal-lexical/src/heapvec.rs new file mode 100644 index 0000000000..035926018a --- /dev/null +++ b/third_party/rust/minimal-lexical/src/heapvec.rs @@ -0,0 +1,190 @@ +//! Simple heap-allocated vector. + +#![cfg(feature = "alloc")] +#![doc(hidden)] + +use crate::bigint; +#[cfg(not(feature = "std"))] +use alloc::vec::Vec; +use core::{cmp, ops}; +#[cfg(feature = "std")] +use std::vec::Vec; + +/// Simple heap vector implementation. +#[derive(Clone)] +pub struct HeapVec { + /// The heap-allocated buffer for the elements. + data: Vec<bigint::Limb>, +} + +#[allow(clippy::new_without_default)] +impl HeapVec { + /// Construct an empty vector. + #[inline] + pub fn new() -> Self { + Self { + data: Vec::with_capacity(bigint::BIGINT_LIMBS), + } + } + + /// Construct a vector from an existing slice. + #[inline] + pub fn try_from(x: &[bigint::Limb]) -> Option<Self> { + let mut vec = Self::new(); + vec.try_extend(x)?; + Some(vec) + } + + /// Sets the length of a vector. + /// + /// This will explicitly set the size of the vector, without actually + /// modifying its buffers, so it is up to the caller to ensure that the + /// vector is actually the specified size. + /// + /// # Safety + /// + /// Safe as long as `len` is less than `self.capacity()` and has been initialized. + #[inline] + pub unsafe fn set_len(&mut self, len: usize) { + debug_assert!(len <= bigint::BIGINT_LIMBS); + unsafe { self.data.set_len(len) }; + } + + /// The number of elements stored in the vector. + #[inline] + pub fn len(&self) -> usize { + self.data.len() + } + + /// If the vector is empty. + #[inline] + pub fn is_empty(&self) -> bool { + self.len() == 0 + } + + /// The number of items the vector can hold. + #[inline] + pub fn capacity(&self) -> usize { + self.data.capacity() + } + + /// Append an item to the vector. + #[inline] + pub fn try_push(&mut self, value: bigint::Limb) -> Option<()> { + self.data.push(value); + Some(()) + } + + /// Remove an item from the end of the vector and return it, or None if empty. + #[inline] + pub fn pop(&mut self) -> Option<bigint::Limb> { + self.data.pop() + } + + /// Copy elements from a slice and append them to the vector. + #[inline] + pub fn try_extend(&mut self, slc: &[bigint::Limb]) -> Option<()> { + self.data.extend_from_slice(slc); + Some(()) + } + + /// Try to resize the buffer. + /// + /// If the new length is smaller than the current length, truncate + /// the input. If it's larger, then append elements to the buffer. + #[inline] + pub fn try_resize(&mut self, len: usize, value: bigint::Limb) -> Option<()> { + self.data.resize(len, value); + Some(()) + } + + // HI + + /// Get the high 64 bits from the vector. + #[inline(always)] + pub fn hi64(&self) -> (u64, bool) { + bigint::hi64(&self.data) + } + + // FROM + + /// Create StackVec from u64 value. + #[inline(always)] + pub fn from_u64(x: u64) -> Self { + bigint::from_u64(x) + } + + // MATH + + /// Normalize the integer, so any leading zero values are removed. + #[inline] + pub fn normalize(&mut self) { + bigint::normalize(self) + } + + /// Get if the big integer is normalized. + #[inline] + pub fn is_normalized(&self) -> bool { + bigint::is_normalized(self) + } + + /// AddAssign small integer. + #[inline] + pub fn add_small(&mut self, y: bigint::Limb) -> Option<()> { + bigint::small_add(self, y) + } + + /// MulAssign small integer. + #[inline] + pub fn mul_small(&mut self, y: bigint::Limb) -> Option<()> { + bigint::small_mul(self, y) + } +} + +impl PartialEq for HeapVec { + #[inline] + #[allow(clippy::op_ref)] + fn eq(&self, other: &Self) -> bool { + use core::ops::Deref; + self.len() == other.len() && self.deref() == other.deref() + } +} + +impl Eq for HeapVec { +} + +impl cmp::PartialOrd for HeapVec { + #[inline] + fn partial_cmp(&self, other: &Self) -> Option<cmp::Ordering> { + Some(bigint::compare(self, other)) + } +} + +impl cmp::Ord for HeapVec { + #[inline] + fn cmp(&self, other: &Self) -> cmp::Ordering { + bigint::compare(self, other) + } +} + +impl ops::Deref for HeapVec { + type Target = [bigint::Limb]; + #[inline] + fn deref(&self) -> &[bigint::Limb] { + &self.data + } +} + +impl ops::DerefMut for HeapVec { + #[inline] + fn deref_mut(&mut self) -> &mut [bigint::Limb] { + &mut self.data + } +} + +impl ops::MulAssign<&[bigint::Limb]> for HeapVec { + #[inline] + fn mul_assign(&mut self, rhs: &[bigint::Limb]) { + bigint::large_mul(self, rhs).unwrap(); + } +} diff --git a/third_party/rust/minimal-lexical/src/lemire.rs b/third_party/rust/minimal-lexical/src/lemire.rs new file mode 100644 index 0000000000..99b1ae7059 --- /dev/null +++ b/third_party/rust/minimal-lexical/src/lemire.rs @@ -0,0 +1,225 @@ +//! Implementation of the Eisel-Lemire algorithm. +//! +//! This is adapted from [fast-float-rust](https://github.com/aldanor/fast-float-rust), +//! a port of [fast_float](https://github.com/fastfloat/fast_float) to Rust. + +#![cfg(not(feature = "compact"))] +#![doc(hidden)] + +use crate::extended_float::ExtendedFloat; +use crate::num::Float; +use crate::number::Number; +use crate::table::{LARGEST_POWER_OF_FIVE, POWER_OF_FIVE_128, SMALLEST_POWER_OF_FIVE}; + +/// Ensure truncation of digits doesn't affect our computation, by doing 2 passes. +#[inline] +pub fn lemire<F: Float>(num: &Number) -> ExtendedFloat { + // If significant digits were truncated, then we can have rounding error + // only if `mantissa + 1` produces a different result. We also avoid + // redundantly using the Eisel-Lemire algorithm if it was unable to + // correctly round on the first pass. + let mut fp = compute_float::<F>(num.exponent, num.mantissa); + if num.many_digits && fp.exp >= 0 && fp != compute_float::<F>(num.exponent, num.mantissa + 1) { + // Need to re-calculate, since the previous values are rounded + // when the slow path algorithm expects a normalized extended float. + fp = compute_error::<F>(num.exponent, num.mantissa); + } + fp +} + +/// Compute a float using an extended-precision representation. +/// +/// Fast conversion of a the significant digits and decimal exponent +/// a float to a extended representation with a binary float. This +/// algorithm will accurately parse the vast majority of cases, +/// and uses a 128-bit representation (with a fallback 192-bit +/// representation). +/// +/// This algorithm scales the exponent by the decimal exponent +/// using pre-computed powers-of-5, and calculates if the +/// representation can be unambiguously rounded to the nearest +/// machine float. Near-halfway cases are not handled here, +/// and are represented by a negative, biased binary exponent. +/// +/// The algorithm is described in detail in "Daniel Lemire, Number Parsing +/// at a Gigabyte per Second" in section 5, "Fast Algorithm", and +/// section 6, "Exact Numbers And Ties", available online: +/// <https://arxiv.org/abs/2101.11408.pdf>. +pub fn compute_float<F: Float>(q: i32, mut w: u64) -> ExtendedFloat { + let fp_zero = ExtendedFloat { + mant: 0, + exp: 0, + }; + let fp_inf = ExtendedFloat { + mant: 0, + exp: F::INFINITE_POWER, + }; + + // Short-circuit if the value can only be a literal 0 or infinity. + if w == 0 || q < F::SMALLEST_POWER_OF_TEN { + return fp_zero; + } else if q > F::LARGEST_POWER_OF_TEN { + return fp_inf; + } + // Normalize our significant digits, so the most-significant bit is set. + let lz = w.leading_zeros() as i32; + w <<= lz; + let (lo, hi) = compute_product_approx(q, w, F::MANTISSA_SIZE as usize + 3); + if lo == 0xFFFF_FFFF_FFFF_FFFF { + // If we have failed to approximate w x 5^-q with our 128-bit value. + // Since the addition of 1 could lead to an overflow which could then + // round up over the half-way point, this can lead to improper rounding + // of a float. + // + // However, this can only occur if q ∈ [-27, 55]. The upper bound of q + // is 55 because 5^55 < 2^128, however, this can only happen if 5^q > 2^64, + // since otherwise the product can be represented in 64-bits, producing + // an exact result. For negative exponents, rounding-to-even can + // only occur if 5^-q < 2^64. + // + // For detailed explanations of rounding for negative exponents, see + // <https://arxiv.org/pdf/2101.11408.pdf#section.9.1>. For detailed + // explanations of rounding for positive exponents, see + // <https://arxiv.org/pdf/2101.11408.pdf#section.8>. + let inside_safe_exponent = (q >= -27) && (q <= 55); + if !inside_safe_exponent { + return compute_error_scaled::<F>(q, hi, lz); + } + } + let upperbit = (hi >> 63) as i32; + let mut mantissa = hi >> (upperbit + 64 - F::MANTISSA_SIZE - 3); + let mut power2 = power(q) + upperbit - lz - F::MINIMUM_EXPONENT; + if power2 <= 0 { + if -power2 + 1 >= 64 { + // Have more than 64 bits below the minimum exponent, must be 0. + return fp_zero; + } + // Have a subnormal value. + mantissa >>= -power2 + 1; + mantissa += mantissa & 1; + mantissa >>= 1; + power2 = (mantissa >= (1_u64 << F::MANTISSA_SIZE)) as i32; + return ExtendedFloat { + mant: mantissa, + exp: power2, + }; + } + // Need to handle rounding ties. Normally, we need to round up, + // but if we fall right in between and and we have an even basis, we + // need to round down. + // + // This will only occur if: + // 1. The lower 64 bits of the 128-bit representation is 0. + // IE, 5^q fits in single 64-bit word. + // 2. The least-significant bit prior to truncated mantissa is odd. + // 3. All the bits truncated when shifting to mantissa bits + 1 are 0. + // + // Or, we may fall between two floats: we are exactly halfway. + if lo <= 1 + && q >= F::MIN_EXPONENT_ROUND_TO_EVEN + && q <= F::MAX_EXPONENT_ROUND_TO_EVEN + && mantissa & 3 == 1 + && (mantissa << (upperbit + 64 - F::MANTISSA_SIZE - 3)) == hi + { + // Zero the lowest bit, so we don't round up. + mantissa &= !1_u64; + } + // Round-to-even, then shift the significant digits into place. + mantissa += mantissa & 1; + mantissa >>= 1; + if mantissa >= (2_u64 << F::MANTISSA_SIZE) { + // Rounding up overflowed, so the carry bit is set. Set the + // mantissa to 1 (only the implicit, hidden bit is set) and + // increase the exponent. + mantissa = 1_u64 << F::MANTISSA_SIZE; + power2 += 1; + } + // Zero out the hidden bit. + mantissa &= !(1_u64 << F::MANTISSA_SIZE); + if power2 >= F::INFINITE_POWER { + // Exponent is above largest normal value, must be infinite. + return fp_inf; + } + ExtendedFloat { + mant: mantissa, + exp: power2, + } +} + +/// Fallback algorithm to calculate the non-rounded representation. +/// This calculates the extended representation, and then normalizes +/// the resulting representation, so the high bit is set. +#[inline] +pub fn compute_error<F: Float>(q: i32, mut w: u64) -> ExtendedFloat { + let lz = w.leading_zeros() as i32; + w <<= lz; + let hi = compute_product_approx(q, w, F::MANTISSA_SIZE as usize + 3).1; + compute_error_scaled::<F>(q, hi, lz) +} + +/// Compute the error from a mantissa scaled to the exponent. +#[inline] +pub fn compute_error_scaled<F: Float>(q: i32, mut w: u64, lz: i32) -> ExtendedFloat { + // Want to normalize the float, but this is faster than ctlz on most architectures. + let hilz = (w >> 63) as i32 ^ 1; + w <<= hilz; + let power2 = power(q as i32) + F::EXPONENT_BIAS - hilz - lz - 62; + + ExtendedFloat { + mant: w, + exp: power2 + F::INVALID_FP, + } +} + +/// Calculate a base 2 exponent from a decimal exponent. +/// This uses a pre-computed integer approximation for +/// log2(10), where 217706 / 2^16 is accurate for the +/// entire range of non-finite decimal exponents. +#[inline] +fn power(q: i32) -> i32 { + (q.wrapping_mul(152_170 + 65536) >> 16) + 63 +} + +#[inline] +fn full_multiplication(a: u64, b: u64) -> (u64, u64) { + let r = (a as u128) * (b as u128); + (r as u64, (r >> 64) as u64) +} + +// This will compute or rather approximate w * 5**q and return a pair of 64-bit words +// approximating the result, with the "high" part corresponding to the most significant +// bits and the low part corresponding to the least significant bits. +fn compute_product_approx(q: i32, w: u64, precision: usize) -> (u64, u64) { + debug_assert!(q >= SMALLEST_POWER_OF_FIVE); + debug_assert!(q <= LARGEST_POWER_OF_FIVE); + debug_assert!(precision <= 64); + + let mask = if precision < 64 { + 0xFFFF_FFFF_FFFF_FFFF_u64 >> precision + } else { + 0xFFFF_FFFF_FFFF_FFFF_u64 + }; + + // 5^q < 2^64, then the multiplication always provides an exact value. + // That means whenever we need to round ties to even, we always have + // an exact value. + let index = (q - SMALLEST_POWER_OF_FIVE) as usize; + let (lo5, hi5) = POWER_OF_FIVE_128[index]; + // Only need one multiplication as long as there is 1 zero but + // in the explicit mantissa bits, +1 for the hidden bit, +1 to + // determine the rounding direction, +1 for if the computed + // product has a leading zero. + let (mut first_lo, mut first_hi) = full_multiplication(w, lo5); + if first_hi & mask == mask { + // Need to do a second multiplication to get better precision + // for the lower product. This will always be exact + // where q is < 55, since 5^55 < 2^128. If this wraps, + // then we need to need to round up the hi product. + let (_, second_hi) = full_multiplication(w, hi5); + first_lo = first_lo.wrapping_add(second_hi); + if second_hi > first_lo { + first_hi += 1; + } + } + (first_lo, first_hi) +} diff --git a/third_party/rust/minimal-lexical/src/lib.rs b/third_party/rust/minimal-lexical/src/lib.rs new file mode 100644 index 0000000000..75f923475f --- /dev/null +++ b/third_party/rust/minimal-lexical/src/lib.rs @@ -0,0 +1,68 @@ +//! Fast, minimal float-parsing algorithm. +//! +//! minimal-lexical has a simple, high-level API with a single +//! exported function: [`parse_float`]. +//! +//! [`parse_float`] expects a forward iterator for the integer +//! and fraction digits, as well as a parsed exponent as an [`i32`]. +//! +//! For more examples, please see [simple-example](https://github.com/Alexhuszagh/minimal-lexical/blob/master/examples/simple.rs). +//! +//! EXAMPLES +//! -------- +//! +//! ``` +//! extern crate minimal_lexical; +//! +//! // Let's say we want to parse "1.2345". +//! // First, we need an external parser to extract the integer digits ("1"), +//! // the fraction digits ("2345"), and then parse the exponent to a 32-bit +//! // integer (0). +//! // Warning: +//! // -------- +//! // Please note that leading zeros must be trimmed from the integer, +//! // and trailing zeros must be trimmed from the fraction. This cannot +//! // be handled by minimal-lexical, since we accept iterators. +//! let integer = b"1"; +//! let fraction = b"2345"; +//! let float: f64 = minimal_lexical::parse_float(integer.iter(), fraction.iter(), 0); +//! println!("float={:?}", float); // 1.235 +//! ``` +//! +//! [`parse_float`]: fn.parse_float.html +//! [`i32`]: https://doc.rust-lang.org/stable/std/primitive.i32.html + +// FEATURES + +// We want to have the same safety guarantees as Rust core, +// so we allow unused unsafe to clearly document safety guarantees. +#![allow(unused_unsafe)] +#![cfg_attr(feature = "lint", warn(unsafe_op_in_unsafe_fn))] +#![cfg_attr(not(feature = "std"), no_std)] + +#[cfg(all(feature = "alloc", not(feature = "std")))] +extern crate alloc; + +pub mod bellerophon; +pub mod bigint; +pub mod extended_float; +pub mod fpu; +pub mod heapvec; +pub mod lemire; +pub mod libm; +pub mod mask; +pub mod num; +pub mod number; +pub mod parse; +pub mod rounding; +pub mod slow; +pub mod stackvec; +pub mod table; + +mod table_bellerophon; +mod table_lemire; +mod table_small; + +// API +pub use self::num::Float; +pub use self::parse::parse_float; diff --git a/third_party/rust/minimal-lexical/src/libm.rs b/third_party/rust/minimal-lexical/src/libm.rs new file mode 100644 index 0000000000..c9f93d36ac --- /dev/null +++ b/third_party/rust/minimal-lexical/src/libm.rs @@ -0,0 +1,1238 @@ +//! A small number of math routines for floats and doubles. +//! +//! These are adapted from libm, a port of musl libc's libm to Rust. +//! libm can be found online [here](https://github.com/rust-lang/libm), +//! and is similarly licensed under an Apache2.0/MIT license + +#![cfg(all(not(feature = "std"), feature = "compact"))] +#![doc(hidden)] + +/* origin: FreeBSD /usr/src/lib/msun/src/e_powf.c */ +/* + * Conversion to float by Ian Lance Taylor, Cygnus Support, ian@cygnus.com. + */ +/* + * ==================================================== + * Copyright (C) 1993 by Sun Microsystems, Inc. All rights reserved. + * + * Developed at SunPro, a Sun Microsystems, Inc. business. + * Permission to use, copy, modify, and distribute this + * software is freely granted, provided that this notice + * is preserved. + * ==================================================== + */ + +/// # Safety +/// +/// Safe if `index < array.len()`. +macro_rules! i { + ($array:ident, $index:expr) => { + // SAFETY: safe if `index < array.len()`. + unsafe { *$array.get_unchecked($index) } + }; +} + +pub fn powf(x: f32, y: f32) -> f32 { + const BP: [f32; 2] = [1.0, 1.5]; + const DP_H: [f32; 2] = [0.0, 5.84960938e-01]; /* 0x3f15c000 */ + const DP_L: [f32; 2] = [0.0, 1.56322085e-06]; /* 0x35d1cfdc */ + const TWO24: f32 = 16777216.0; /* 0x4b800000 */ + const HUGE: f32 = 1.0e30; + const TINY: f32 = 1.0e-30; + const L1: f32 = 6.0000002384e-01; /* 0x3f19999a */ + const L2: f32 = 4.2857143283e-01; /* 0x3edb6db7 */ + const L3: f32 = 3.3333334327e-01; /* 0x3eaaaaab */ + const L4: f32 = 2.7272811532e-01; /* 0x3e8ba305 */ + const L5: f32 = 2.3066075146e-01; /* 0x3e6c3255 */ + const L6: f32 = 2.0697501302e-01; /* 0x3e53f142 */ + const P1: f32 = 1.6666667163e-01; /* 0x3e2aaaab */ + const P2: f32 = -2.7777778450e-03; /* 0xbb360b61 */ + const P3: f32 = 6.6137559770e-05; /* 0x388ab355 */ + const P4: f32 = -1.6533901999e-06; /* 0xb5ddea0e */ + const P5: f32 = 4.1381369442e-08; /* 0x3331bb4c */ + const LG2: f32 = 6.9314718246e-01; /* 0x3f317218 */ + const LG2_H: f32 = 6.93145752e-01; /* 0x3f317200 */ + const LG2_L: f32 = 1.42860654e-06; /* 0x35bfbe8c */ + const OVT: f32 = 4.2995665694e-08; /* -(128-log2(ovfl+.5ulp)) */ + const CP: f32 = 9.6179670095e-01; /* 0x3f76384f =2/(3ln2) */ + const CP_H: f32 = 9.6191406250e-01; /* 0x3f764000 =12b cp */ + const CP_L: f32 = -1.1736857402e-04; /* 0xb8f623c6 =tail of cp_h */ + const IVLN2: f32 = 1.4426950216e+00; + const IVLN2_H: f32 = 1.4426879883e+00; + const IVLN2_L: f32 = 7.0526075433e-06; + + let mut z: f32; + let mut ax: f32; + let z_h: f32; + let z_l: f32; + let mut p_h: f32; + let mut p_l: f32; + let y1: f32; + let mut t1: f32; + let t2: f32; + let mut r: f32; + let s: f32; + let mut sn: f32; + let mut t: f32; + let mut u: f32; + let mut v: f32; + let mut w: f32; + let i: i32; + let mut j: i32; + let mut k: i32; + let mut yisint: i32; + let mut n: i32; + let hx: i32; + let hy: i32; + let mut ix: i32; + let iy: i32; + let mut is: i32; + + hx = x.to_bits() as i32; + hy = y.to_bits() as i32; + + ix = hx & 0x7fffffff; + iy = hy & 0x7fffffff; + + /* x**0 = 1, even if x is NaN */ + if iy == 0 { + return 1.0; + } + + /* 1**y = 1, even if y is NaN */ + if hx == 0x3f800000 { + return 1.0; + } + + /* NaN if either arg is NaN */ + if ix > 0x7f800000 || iy > 0x7f800000 { + return x + y; + } + + /* determine if y is an odd int when x < 0 + * yisint = 0 ... y is not an integer + * yisint = 1 ... y is an odd int + * yisint = 2 ... y is an even int + */ + yisint = 0; + if hx < 0 { + if iy >= 0x4b800000 { + yisint = 2; /* even integer y */ + } else if iy >= 0x3f800000 { + k = (iy >> 23) - 0x7f; /* exponent */ + j = iy >> (23 - k); + if (j << (23 - k)) == iy { + yisint = 2 - (j & 1); + } + } + } + + /* special value of y */ + if iy == 0x7f800000 { + /* y is +-inf */ + if ix == 0x3f800000 { + /* (-1)**+-inf is 1 */ + return 1.0; + } else if ix > 0x3f800000 { + /* (|x|>1)**+-inf = inf,0 */ + return if hy >= 0 { + y + } else { + 0.0 + }; + } else { + /* (|x|<1)**+-inf = 0,inf */ + return if hy >= 0 { + 0.0 + } else { + -y + }; + } + } + if iy == 0x3f800000 { + /* y is +-1 */ + return if hy >= 0 { + x + } else { + 1.0 / x + }; + } + + if hy == 0x40000000 { + /* y is 2 */ + return x * x; + } + + if hy == 0x3f000000 + /* y is 0.5 */ + && hx >= 0 + { + /* x >= +0 */ + return sqrtf(x); + } + + ax = fabsf(x); + /* special value of x */ + if ix == 0x7f800000 || ix == 0 || ix == 0x3f800000 { + /* x is +-0,+-inf,+-1 */ + z = ax; + if hy < 0 { + /* z = (1/|x|) */ + z = 1.0 / z; + } + + if hx < 0 { + if ((ix - 0x3f800000) | yisint) == 0 { + z = (z - z) / (z - z); /* (-1)**non-int is NaN */ + } else if yisint == 1 { + z = -z; /* (x<0)**odd = -(|x|**odd) */ + } + } + return z; + } + + sn = 1.0; /* sign of result */ + if hx < 0 { + if yisint == 0 { + /* (x<0)**(non-int) is NaN */ + return (x - x) / (x - x); + } + + if yisint == 1 { + /* (x<0)**(odd int) */ + sn = -1.0; + } + } + + /* |y| is HUGE */ + if iy > 0x4d000000 { + /* if |y| > 2**27 */ + /* over/underflow if x is not close to one */ + if ix < 0x3f7ffff8 { + return if hy < 0 { + sn * HUGE * HUGE + } else { + sn * TINY * TINY + }; + } + + if ix > 0x3f800007 { + return if hy > 0 { + sn * HUGE * HUGE + } else { + sn * TINY * TINY + }; + } + + /* now |1-x| is TINY <= 2**-20, suffice to compute + log(x) by x-x^2/2+x^3/3-x^4/4 */ + t = ax - 1.; /* t has 20 trailing zeros */ + w = (t * t) * (0.5 - t * (0.333333333333 - t * 0.25)); + u = IVLN2_H * t; /* IVLN2_H has 16 sig. bits */ + v = t * IVLN2_L - w * IVLN2; + t1 = u + v; + is = t1.to_bits() as i32; + t1 = f32::from_bits(is as u32 & 0xfffff000); + t2 = v - (t1 - u); + } else { + let mut s2: f32; + let mut s_h: f32; + let s_l: f32; + let mut t_h: f32; + let mut t_l: f32; + + n = 0; + /* take care subnormal number */ + if ix < 0x00800000 { + ax *= TWO24; + n -= 24; + ix = ax.to_bits() as i32; + } + n += ((ix) >> 23) - 0x7f; + j = ix & 0x007fffff; + /* determine interval */ + ix = j | 0x3f800000; /* normalize ix */ + if j <= 0x1cc471 { + /* |x|<sqrt(3/2) */ + k = 0; + } else if j < 0x5db3d7 { + /* |x|<sqrt(3) */ + k = 1; + } else { + k = 0; + n += 1; + ix -= 0x00800000; + } + ax = f32::from_bits(ix as u32); + + /* compute s = s_h+s_l = (x-1)/(x+1) or (x-1.5)/(x+1.5) */ + u = ax - i!(BP, k as usize); /* bp[0]=1.0, bp[1]=1.5 */ + v = 1.0 / (ax + i!(BP, k as usize)); + s = u * v; + s_h = s; + is = s_h.to_bits() as i32; + s_h = f32::from_bits(is as u32 & 0xfffff000); + /* t_h=ax+bp[k] High */ + is = (((ix as u32 >> 1) & 0xfffff000) | 0x20000000) as i32; + t_h = f32::from_bits(is as u32 + 0x00400000 + ((k as u32) << 21)); + t_l = ax - (t_h - i!(BP, k as usize)); + s_l = v * ((u - s_h * t_h) - s_h * t_l); + /* compute log(ax) */ + s2 = s * s; + r = s2 * s2 * (L1 + s2 * (L2 + s2 * (L3 + s2 * (L4 + s2 * (L5 + s2 * L6))))); + r += s_l * (s_h + s); + s2 = s_h * s_h; + t_h = 3.0 + s2 + r; + is = t_h.to_bits() as i32; + t_h = f32::from_bits(is as u32 & 0xfffff000); + t_l = r - ((t_h - 3.0) - s2); + /* u+v = s*(1+...) */ + u = s_h * t_h; + v = s_l * t_h + t_l * s; + /* 2/(3log2)*(s+...) */ + p_h = u + v; + is = p_h.to_bits() as i32; + p_h = f32::from_bits(is as u32 & 0xfffff000); + p_l = v - (p_h - u); + z_h = CP_H * p_h; /* cp_h+cp_l = 2/(3*log2) */ + z_l = CP_L * p_h + p_l * CP + i!(DP_L, k as usize); + /* log2(ax) = (s+..)*2/(3*log2) = n + dp_h + z_h + z_l */ + t = n as f32; + t1 = ((z_h + z_l) + i!(DP_H, k as usize)) + t; + is = t1.to_bits() as i32; + t1 = f32::from_bits(is as u32 & 0xfffff000); + t2 = z_l - (((t1 - t) - i!(DP_H, k as usize)) - z_h); + }; + + /* split up y into y1+y2 and compute (y1+y2)*(t1+t2) */ + is = y.to_bits() as i32; + y1 = f32::from_bits(is as u32 & 0xfffff000); + p_l = (y - y1) * t1 + y * t2; + p_h = y1 * t1; + z = p_l + p_h; + j = z.to_bits() as i32; + if j > 0x43000000 { + /* if z > 128 */ + return sn * HUGE * HUGE; /* overflow */ + } else if j == 0x43000000 { + /* if z == 128 */ + if p_l + OVT > z - p_h { + return sn * HUGE * HUGE; /* overflow */ + } + } else if (j & 0x7fffffff) > 0x43160000 { + /* z < -150 */ + // FIXME: check should be (uint32_t)j > 0xc3160000 + return sn * TINY * TINY; /* underflow */ + } else if j as u32 == 0xc3160000 + /* z == -150 */ + && p_l <= z - p_h + { + return sn * TINY * TINY; /* underflow */ + } + + /* + * compute 2**(p_h+p_l) + */ + i = j & 0x7fffffff; + k = (i >> 23) - 0x7f; + n = 0; + if i > 0x3f000000 { + /* if |z| > 0.5, set n = [z+0.5] */ + n = j + (0x00800000 >> (k + 1)); + k = ((n & 0x7fffffff) >> 23) - 0x7f; /* new k for n */ + t = f32::from_bits(n as u32 & !(0x007fffff >> k)); + n = ((n & 0x007fffff) | 0x00800000) >> (23 - k); + if j < 0 { + n = -n; + } + p_h -= t; + } + t = p_l + p_h; + is = t.to_bits() as i32; + t = f32::from_bits(is as u32 & 0xffff8000); + u = t * LG2_H; + v = (p_l - (t - p_h)) * LG2 + t * LG2_L; + z = u + v; + w = v - (z - u); + t = z * z; + t1 = z - t * (P1 + t * (P2 + t * (P3 + t * (P4 + t * P5)))); + r = (z * t1) / (t1 - 2.0) - (w + z * w); + z = 1.0 - (r - z); + j = z.to_bits() as i32; + j += n << 23; + if (j >> 23) <= 0 { + /* subnormal output */ + z = scalbnf(z, n); + } else { + z = f32::from_bits(j as u32); + } + sn * z +} + +/* origin: FreeBSD /usr/src/lib/msun/src/e_sqrtf.c */ +/* + * Conversion to float by Ian Lance Taylor, Cygnus Support, ian@cygnus.com. + */ +/* + * ==================================================== + * Copyright (C) 1993 by Sun Microsystems, Inc. All rights reserved. + * + * Developed at SunPro, a Sun Microsystems, Inc. business. + * Permission to use, copy, modify, and distribute this + * software is freely granted, provided that this notice + * is preserved. + * ==================================================== + */ + +pub fn sqrtf(x: f32) -> f32 { + #[cfg(target_feature = "sse")] + { + // Note: This path is unlikely since LLVM will usually have already + // optimized sqrt calls into hardware instructions if sse is available, + // but if someone does end up here they'll apprected the speed increase. + #[cfg(target_arch = "x86")] + use core::arch::x86::*; + #[cfg(target_arch = "x86_64")] + use core::arch::x86_64::*; + // SAFETY: safe, since `_mm_set_ss` takes a 32-bit float, and returns + // a 128-bit type with the lowest 32-bits as `x`, `_mm_sqrt_ss` calculates + // the sqrt of this 128-bit vector, and `_mm_cvtss_f32` extracts the lower + // 32-bits as a 32-bit float. + unsafe { + let m = _mm_set_ss(x); + let m_sqrt = _mm_sqrt_ss(m); + _mm_cvtss_f32(m_sqrt) + } + } + #[cfg(not(target_feature = "sse"))] + { + const TINY: f32 = 1.0e-30; + + let mut z: f32; + let sign: i32 = 0x80000000u32 as i32; + let mut ix: i32; + let mut s: i32; + let mut q: i32; + let mut m: i32; + let mut t: i32; + let mut i: i32; + let mut r: u32; + + ix = x.to_bits() as i32; + + /* take care of Inf and NaN */ + if (ix as u32 & 0x7f800000) == 0x7f800000 { + return x * x + x; /* sqrt(NaN)=NaN, sqrt(+inf)=+inf, sqrt(-inf)=sNaN */ + } + + /* take care of zero */ + if ix <= 0 { + if (ix & !sign) == 0 { + return x; /* sqrt(+-0) = +-0 */ + } + if ix < 0 { + return (x - x) / (x - x); /* sqrt(-ve) = sNaN */ + } + } + + /* normalize x */ + m = ix >> 23; + if m == 0 { + /* subnormal x */ + i = 0; + while ix & 0x00800000 == 0 { + ix <<= 1; + i = i + 1; + } + m -= i - 1; + } + m -= 127; /* unbias exponent */ + ix = (ix & 0x007fffff) | 0x00800000; + if m & 1 == 1 { + /* odd m, double x to make it even */ + ix += ix; + } + m >>= 1; /* m = [m/2] */ + + /* generate sqrt(x) bit by bit */ + ix += ix; + q = 0; + s = 0; + r = 0x01000000; /* r = moving bit from right to left */ + + while r != 0 { + t = s + r as i32; + if t <= ix { + s = t + r as i32; + ix -= t; + q += r as i32; + } + ix += ix; + r >>= 1; + } + + /* use floating add to find out rounding direction */ + if ix != 0 { + z = 1.0 - TINY; /* raise inexact flag */ + if z >= 1.0 { + z = 1.0 + TINY; + if z > 1.0 { + q += 2; + } else { + q += q & 1; + } + } + } + + ix = (q >> 1) + 0x3f000000; + ix += m << 23; + f32::from_bits(ix as u32) + } +} + +/// Absolute value (magnitude) (f32) +/// Calculates the absolute value (magnitude) of the argument `x`, +/// by direct manipulation of the bit representation of `x`. +pub fn fabsf(x: f32) -> f32 { + f32::from_bits(x.to_bits() & 0x7fffffff) +} + +pub fn scalbnf(mut x: f32, mut n: i32) -> f32 { + let x1p127 = f32::from_bits(0x7f000000); // 0x1p127f === 2 ^ 127 + let x1p_126 = f32::from_bits(0x800000); // 0x1p-126f === 2 ^ -126 + let x1p24 = f32::from_bits(0x4b800000); // 0x1p24f === 2 ^ 24 + + if n > 127 { + x *= x1p127; + n -= 127; + if n > 127 { + x *= x1p127; + n -= 127; + if n > 127 { + n = 127; + } + } + } else if n < -126 { + x *= x1p_126 * x1p24; + n += 126 - 24; + if n < -126 { + x *= x1p_126 * x1p24; + n += 126 - 24; + if n < -126 { + n = -126; + } + } + } + x * f32::from_bits(((0x7f + n) as u32) << 23) +} + +/* origin: FreeBSD /usr/src/lib/msun/src/e_pow.c */ +/* + * ==================================================== + * Copyright (C) 2004 by Sun Microsystems, Inc. All rights reserved. + * + * Permission to use, copy, modify, and distribute this + * software is freely granted, provided that this notice + * is preserved. + * ==================================================== + */ + +// pow(x,y) return x**y +// +// n +// Method: Let x = 2 * (1+f) +// 1. Compute and return log2(x) in two pieces: +// log2(x) = w1 + w2, +// where w1 has 53-24 = 29 bit trailing zeros. +// 2. Perform y*log2(x) = n+y' by simulating muti-precision +// arithmetic, where |y'|<=0.5. +// 3. Return x**y = 2**n*exp(y'*log2) +// +// Special cases: +// 1. (anything) ** 0 is 1 +// 2. 1 ** (anything) is 1 +// 3. (anything except 1) ** NAN is NAN +// 4. NAN ** (anything except 0) is NAN +// 5. +-(|x| > 1) ** +INF is +INF +// 6. +-(|x| > 1) ** -INF is +0 +// 7. +-(|x| < 1) ** +INF is +0 +// 8. +-(|x| < 1) ** -INF is +INF +// 9. -1 ** +-INF is 1 +// 10. +0 ** (+anything except 0, NAN) is +0 +// 11. -0 ** (+anything except 0, NAN, odd integer) is +0 +// 12. +0 ** (-anything except 0, NAN) is +INF, raise divbyzero +// 13. -0 ** (-anything except 0, NAN, odd integer) is +INF, raise divbyzero +// 14. -0 ** (+odd integer) is -0 +// 15. -0 ** (-odd integer) is -INF, raise divbyzero +// 16. +INF ** (+anything except 0,NAN) is +INF +// 17. +INF ** (-anything except 0,NAN) is +0 +// 18. -INF ** (+odd integer) is -INF +// 19. -INF ** (anything) = -0 ** (-anything), (anything except odd integer) +// 20. (anything) ** 1 is (anything) +// 21. (anything) ** -1 is 1/(anything) +// 22. (-anything) ** (integer) is (-1)**(integer)*(+anything**integer) +// 23. (-anything except 0 and inf) ** (non-integer) is NAN +// +// Accuracy: +// pow(x,y) returns x**y nearly rounded. In particular +// pow(integer,integer) +// always returns the correct integer provided it is +// representable. +// +// Constants : +// The hexadecimal values are the intended ones for the following +// constants. The decimal values may be used, provided that the +// compiler will convert from decimal to binary accurately enough +// to produce the hexadecimal values shown. + +pub fn powd(x: f64, y: f64) -> f64 { + const BP: [f64; 2] = [1.0, 1.5]; + const DP_H: [f64; 2] = [0.0, 5.84962487220764160156e-01]; /* 0x3fe2b803_40000000 */ + const DP_L: [f64; 2] = [0.0, 1.35003920212974897128e-08]; /* 0x3E4CFDEB, 0x43CFD006 */ + const TWO53: f64 = 9007199254740992.0; /* 0x43400000_00000000 */ + const HUGE: f64 = 1.0e300; + const TINY: f64 = 1.0e-300; + + // poly coefs for (3/2)*(log(x)-2s-2/3*s**3: + const L1: f64 = 5.99999999999994648725e-01; /* 0x3fe33333_33333303 */ + const L2: f64 = 4.28571428578550184252e-01; /* 0x3fdb6db6_db6fabff */ + const L3: f64 = 3.33333329818377432918e-01; /* 0x3fd55555_518f264d */ + const L4: f64 = 2.72728123808534006489e-01; /* 0x3fd17460_a91d4101 */ + const L5: f64 = 2.30660745775561754067e-01; /* 0x3fcd864a_93c9db65 */ + const L6: f64 = 2.06975017800338417784e-01; /* 0x3fca7e28_4a454eef */ + const P1: f64 = 1.66666666666666019037e-01; /* 0x3fc55555_5555553e */ + const P2: f64 = -2.77777777770155933842e-03; /* 0xbf66c16c_16bebd93 */ + const P3: f64 = 6.61375632143793436117e-05; /* 0x3f11566a_af25de2c */ + const P4: f64 = -1.65339022054652515390e-06; /* 0xbebbbd41_c5d26bf1 */ + const P5: f64 = 4.13813679705723846039e-08; /* 0x3e663769_72bea4d0 */ + const LG2: f64 = 6.93147180559945286227e-01; /* 0x3fe62e42_fefa39ef */ + const LG2_H: f64 = 6.93147182464599609375e-01; /* 0x3fe62e43_00000000 */ + const LG2_L: f64 = -1.90465429995776804525e-09; /* 0xbe205c61_0ca86c39 */ + const OVT: f64 = 8.0085662595372944372e-017; /* -(1024-log2(ovfl+.5ulp)) */ + const CP: f64 = 9.61796693925975554329e-01; /* 0x3feec709_dc3a03fd =2/(3ln2) */ + const CP_H: f64 = 9.61796700954437255859e-01; /* 0x3feec709_e0000000 =(float)cp */ + const CP_L: f64 = -7.02846165095275826516e-09; /* 0xbe3e2fe0_145b01f5 =tail of cp_h*/ + const IVLN2: f64 = 1.44269504088896338700e+00; /* 0x3ff71547_652b82fe =1/ln2 */ + const IVLN2_H: f64 = 1.44269502162933349609e+00; /* 0x3ff71547_60000000 =24b 1/ln2*/ + const IVLN2_L: f64 = 1.92596299112661746887e-08; /* 0x3e54ae0b_f85ddf44 =1/ln2 tail*/ + + let t1: f64; + let t2: f64; + + let (hx, lx): (i32, u32) = ((x.to_bits() >> 32) as i32, x.to_bits() as u32); + let (hy, ly): (i32, u32) = ((y.to_bits() >> 32) as i32, y.to_bits() as u32); + + let mut ix: i32 = (hx & 0x7fffffff) as i32; + let iy: i32 = (hy & 0x7fffffff) as i32; + + /* x**0 = 1, even if x is NaN */ + if ((iy as u32) | ly) == 0 { + return 1.0; + } + + /* 1**y = 1, even if y is NaN */ + if hx == 0x3ff00000 && lx == 0 { + return 1.0; + } + + /* NaN if either arg is NaN */ + if ix > 0x7ff00000 + || (ix == 0x7ff00000 && lx != 0) + || iy > 0x7ff00000 + || (iy == 0x7ff00000 && ly != 0) + { + return x + y; + } + + /* determine if y is an odd int when x < 0 + * yisint = 0 ... y is not an integer + * yisint = 1 ... y is an odd int + * yisint = 2 ... y is an even int + */ + let mut yisint: i32 = 0; + let mut k: i32; + let mut j: i32; + if hx < 0 { + if iy >= 0x43400000 { + yisint = 2; /* even integer y */ + } else if iy >= 0x3ff00000 { + k = (iy >> 20) - 0x3ff; /* exponent */ + + if k > 20 { + j = (ly >> (52 - k)) as i32; + + if (j << (52 - k)) == (ly as i32) { + yisint = 2 - (j & 1); + } + } else if ly == 0 { + j = iy >> (20 - k); + + if (j << (20 - k)) == iy { + yisint = 2 - (j & 1); + } + } + } + } + + if ly == 0 { + /* special value of y */ + if iy == 0x7ff00000 { + /* y is +-inf */ + + return if ((ix - 0x3ff00000) | (lx as i32)) == 0 { + /* (-1)**+-inf is 1 */ + 1.0 + } else if ix >= 0x3ff00000 { + /* (|x|>1)**+-inf = inf,0 */ + if hy >= 0 { + y + } else { + 0.0 + } + } else { + /* (|x|<1)**+-inf = 0,inf */ + if hy >= 0 { + 0.0 + } else { + -y + } + }; + } + + if iy == 0x3ff00000 { + /* y is +-1 */ + return if hy >= 0 { + x + } else { + 1.0 / x + }; + } + + if hy == 0x40000000 { + /* y is 2 */ + return x * x; + } + + if hy == 0x3fe00000 { + /* y is 0.5 */ + if hx >= 0 { + /* x >= +0 */ + return sqrtd(x); + } + } + } + + let mut ax: f64 = fabsd(x); + if lx == 0 { + /* special value of x */ + if ix == 0x7ff00000 || ix == 0 || ix == 0x3ff00000 { + /* x is +-0,+-inf,+-1 */ + let mut z: f64 = ax; + + if hy < 0 { + /* z = (1/|x|) */ + z = 1.0 / z; + } + + if hx < 0 { + if ((ix - 0x3ff00000) | yisint) == 0 { + z = (z - z) / (z - z); /* (-1)**non-int is NaN */ + } else if yisint == 1 { + z = -z; /* (x<0)**odd = -(|x|**odd) */ + } + } + + return z; + } + } + + let mut s: f64 = 1.0; /* sign of result */ + if hx < 0 { + if yisint == 0 { + /* (x<0)**(non-int) is NaN */ + return (x - x) / (x - x); + } + + if yisint == 1 { + /* (x<0)**(odd int) */ + s = -1.0; + } + } + + /* |y| is HUGE */ + if iy > 0x41e00000 { + /* if |y| > 2**31 */ + if iy > 0x43f00000 { + /* if |y| > 2**64, must o/uflow */ + if ix <= 0x3fefffff { + return if hy < 0 { + HUGE * HUGE + } else { + TINY * TINY + }; + } + + if ix >= 0x3ff00000 { + return if hy > 0 { + HUGE * HUGE + } else { + TINY * TINY + }; + } + } + + /* over/underflow if x is not close to one */ + if ix < 0x3fefffff { + return if hy < 0 { + s * HUGE * HUGE + } else { + s * TINY * TINY + }; + } + if ix > 0x3ff00000 { + return if hy > 0 { + s * HUGE * HUGE + } else { + s * TINY * TINY + }; + } + + /* now |1-x| is TINY <= 2**-20, suffice to compute + log(x) by x-x^2/2+x^3/3-x^4/4 */ + let t: f64 = ax - 1.0; /* t has 20 trailing zeros */ + let w: f64 = (t * t) * (0.5 - t * (0.3333333333333333333333 - t * 0.25)); + let u: f64 = IVLN2_H * t; /* ivln2_h has 21 sig. bits */ + let v: f64 = t * IVLN2_L - w * IVLN2; + t1 = with_set_low_word(u + v, 0); + t2 = v - (t1 - u); + } else { + // double ss,s2,s_h,s_l,t_h,t_l; + let mut n: i32 = 0; + + if ix < 0x00100000 { + /* take care subnormal number */ + ax *= TWO53; + n -= 53; + ix = get_high_word(ax) as i32; + } + + n += (ix >> 20) - 0x3ff; + j = ix & 0x000fffff; + + /* determine interval */ + let k: i32; + ix = j | 0x3ff00000; /* normalize ix */ + if j <= 0x3988E { + /* |x|<sqrt(3/2) */ + k = 0; + } else if j < 0xBB67A { + /* |x|<sqrt(3) */ + k = 1; + } else { + k = 0; + n += 1; + ix -= 0x00100000; + } + ax = with_set_high_word(ax, ix as u32); + + /* compute ss = s_h+s_l = (x-1)/(x+1) or (x-1.5)/(x+1.5) */ + let u: f64 = ax - i!(BP, k as usize); /* bp[0]=1.0, bp[1]=1.5 */ + let v: f64 = 1.0 / (ax + i!(BP, k as usize)); + let ss: f64 = u * v; + let s_h = with_set_low_word(ss, 0); + + /* t_h=ax+bp[k] High */ + let t_h: f64 = with_set_high_word( + 0.0, + ((ix as u32 >> 1) | 0x20000000) + 0x00080000 + ((k as u32) << 18), + ); + let t_l: f64 = ax - (t_h - i!(BP, k as usize)); + let s_l: f64 = v * ((u - s_h * t_h) - s_h * t_l); + + /* compute log(ax) */ + let s2: f64 = ss * ss; + let mut r: f64 = s2 * s2 * (L1 + s2 * (L2 + s2 * (L3 + s2 * (L4 + s2 * (L5 + s2 * L6))))); + r += s_l * (s_h + ss); + let s2: f64 = s_h * s_h; + let t_h: f64 = with_set_low_word(3.0 + s2 + r, 0); + let t_l: f64 = r - ((t_h - 3.0) - s2); + + /* u+v = ss*(1+...) */ + let u: f64 = s_h * t_h; + let v: f64 = s_l * t_h + t_l * ss; + + /* 2/(3log2)*(ss+...) */ + let p_h: f64 = with_set_low_word(u + v, 0); + let p_l = v - (p_h - u); + let z_h: f64 = CP_H * p_h; /* cp_h+cp_l = 2/(3*log2) */ + let z_l: f64 = CP_L * p_h + p_l * CP + i!(DP_L, k as usize); + + /* log2(ax) = (ss+..)*2/(3*log2) = n + dp_h + z_h + z_l */ + let t: f64 = n as f64; + t1 = with_set_low_word(((z_h + z_l) + i!(DP_H, k as usize)) + t, 0); + t2 = z_l - (((t1 - t) - i!(DP_H, k as usize)) - z_h); + } + + /* split up y into y1+y2 and compute (y1+y2)*(t1+t2) */ + let y1: f64 = with_set_low_word(y, 0); + let p_l: f64 = (y - y1) * t1 + y * t2; + let mut p_h: f64 = y1 * t1; + let z: f64 = p_l + p_h; + let mut j: i32 = (z.to_bits() >> 32) as i32; + let i: i32 = z.to_bits() as i32; + // let (j, i): (i32, i32) = ((z.to_bits() >> 32) as i32, z.to_bits() as i32); + + if j >= 0x40900000 { + /* z >= 1024 */ + if (j - 0x40900000) | i != 0 { + /* if z > 1024 */ + return s * HUGE * HUGE; /* overflow */ + } + + if p_l + OVT > z - p_h { + return s * HUGE * HUGE; /* overflow */ + } + } else if (j & 0x7fffffff) >= 0x4090cc00 { + /* z <= -1075 */ + // FIXME: instead of abs(j) use unsigned j + + if (((j as u32) - 0xc090cc00) | (i as u32)) != 0 { + /* z < -1075 */ + return s * TINY * TINY; /* underflow */ + } + + if p_l <= z - p_h { + return s * TINY * TINY; /* underflow */ + } + } + + /* compute 2**(p_h+p_l) */ + let i: i32 = j & (0x7fffffff as i32); + k = (i >> 20) - 0x3ff; + let mut n: i32 = 0; + + if i > 0x3fe00000 { + /* if |z| > 0.5, set n = [z+0.5] */ + n = j + (0x00100000 >> (k + 1)); + k = ((n & 0x7fffffff) >> 20) - 0x3ff; /* new k for n */ + let t: f64 = with_set_high_word(0.0, (n & !(0x000fffff >> k)) as u32); + n = ((n & 0x000fffff) | 0x00100000) >> (20 - k); + if j < 0 { + n = -n; + } + p_h -= t; + } + + let t: f64 = with_set_low_word(p_l + p_h, 0); + let u: f64 = t * LG2_H; + let v: f64 = (p_l - (t - p_h)) * LG2 + t * LG2_L; + let mut z: f64 = u + v; + let w: f64 = v - (z - u); + let t: f64 = z * z; + let t1: f64 = z - t * (P1 + t * (P2 + t * (P3 + t * (P4 + t * P5)))); + let r: f64 = (z * t1) / (t1 - 2.0) - (w + z * w); + z = 1.0 - (r - z); + j = get_high_word(z) as i32; + j += n << 20; + + if (j >> 20) <= 0 { + /* subnormal output */ + z = scalbnd(z, n); + } else { + z = with_set_high_word(z, j as u32); + } + + s * z +} + +/// Absolute value (magnitude) (f64) +/// Calculates the absolute value (magnitude) of the argument `x`, +/// by direct manipulation of the bit representation of `x`. +pub fn fabsd(x: f64) -> f64 { + f64::from_bits(x.to_bits() & (u64::MAX / 2)) +} + +pub fn scalbnd(x: f64, mut n: i32) -> f64 { + let x1p1023 = f64::from_bits(0x7fe0000000000000); // 0x1p1023 === 2 ^ 1023 + let x1p53 = f64::from_bits(0x4340000000000000); // 0x1p53 === 2 ^ 53 + let x1p_1022 = f64::from_bits(0x0010000000000000); // 0x1p-1022 === 2 ^ (-1022) + + let mut y = x; + + if n > 1023 { + y *= x1p1023; + n -= 1023; + if n > 1023 { + y *= x1p1023; + n -= 1023; + if n > 1023 { + n = 1023; + } + } + } else if n < -1022 { + /* make sure final n < -53 to avoid double + rounding in the subnormal range */ + y *= x1p_1022 * x1p53; + n += 1022 - 53; + if n < -1022 { + y *= x1p_1022 * x1p53; + n += 1022 - 53; + if n < -1022 { + n = -1022; + } + } + } + y * f64::from_bits(((0x3ff + n) as u64) << 52) +} + +/* origin: FreeBSD /usr/src/lib/msun/src/e_sqrt.c */ +/* + * ==================================================== + * Copyright (C) 1993 by Sun Microsystems, Inc. All rights reserved. + * + * Developed at SunSoft, a Sun Microsystems, Inc. business. + * Permission to use, copy, modify, and distribute this + * software is freely granted, provided that this notice + * is preserved. + * ==================================================== + */ +/* sqrt(x) + * Return correctly rounded sqrt. + * ------------------------------------------ + * | Use the hardware sqrt if you have one | + * ------------------------------------------ + * Method: + * Bit by bit method using integer arithmetic. (Slow, but portable) + * 1. Normalization + * Scale x to y in [1,4) with even powers of 2: + * find an integer k such that 1 <= (y=x*2^(2k)) < 4, then + * sqrt(x) = 2^k * sqrt(y) + * 2. Bit by bit computation + * Let q = sqrt(y) truncated to i bit after binary point (q = 1), + * i 0 + * i+1 2 + * s = 2*q , and y = 2 * ( y - q ). (1) + * i i i i + * + * To compute q from q , one checks whether + * i+1 i + * + * -(i+1) 2 + * (q + 2 ) <= y. (2) + * i + * -(i+1) + * If (2) is false, then q = q ; otherwise q = q + 2 . + * i+1 i i+1 i + * + * With some algebraic manipulation, it is not difficult to see + * that (2) is equivalent to + * -(i+1) + * s + 2 <= y (3) + * i i + * + * The advantage of (3) is that s and y can be computed by + * i i + * the following recurrence formula: + * if (3) is false + * + * s = s , y = y ; (4) + * i+1 i i+1 i + * + * otherwise, + * -i -(i+1) + * s = s + 2 , y = y - s - 2 (5) + * i+1 i i+1 i i + * + * One may easily use induction to prove (4) and (5). + * Note. Since the left hand side of (3) contain only i+2 bits, + * it does not necessary to do a full (53-bit) comparison + * in (3). + * 3. Final rounding + * After generating the 53 bits result, we compute one more bit. + * Together with the remainder, we can decide whether the + * result is exact, bigger than 1/2ulp, or less than 1/2ulp + * (it will never equal to 1/2ulp). + * The rounding mode can be detected by checking whether + * huge + tiny is equal to huge, and whether huge - tiny is + * equal to huge for some floating point number "huge" and "tiny". + * + * Special cases: + * sqrt(+-0) = +-0 ... exact + * sqrt(inf) = inf + * sqrt(-ve) = NaN ... with invalid signal + * sqrt(NaN) = NaN ... with invalid signal for signaling NaN + */ + +pub fn sqrtd(x: f64) -> f64 { + #[cfg(target_feature = "sse2")] + { + // Note: This path is unlikely since LLVM will usually have already + // optimized sqrt calls into hardware instructions if sse2 is available, + // but if someone does end up here they'll apprected the speed increase. + #[cfg(target_arch = "x86")] + use core::arch::x86::*; + #[cfg(target_arch = "x86_64")] + use core::arch::x86_64::*; + // SAFETY: safe, since `_mm_set_sd` takes a 64-bit float, and returns + // a 128-bit type with the lowest 64-bits as `x`, `_mm_sqrt_ss` calculates + // the sqrt of this 128-bit vector, and `_mm_cvtss_f64` extracts the lower + // 64-bits as a 64-bit float. + unsafe { + let m = _mm_set_sd(x); + let m_sqrt = _mm_sqrt_pd(m); + _mm_cvtsd_f64(m_sqrt) + } + } + #[cfg(not(target_feature = "sse2"))] + { + use core::num::Wrapping; + + const TINY: f64 = 1.0e-300; + + let mut z: f64; + let sign: Wrapping<u32> = Wrapping(0x80000000); + let mut ix0: i32; + let mut s0: i32; + let mut q: i32; + let mut m: i32; + let mut t: i32; + let mut i: i32; + let mut r: Wrapping<u32>; + let mut t1: Wrapping<u32>; + let mut s1: Wrapping<u32>; + let mut ix1: Wrapping<u32>; + let mut q1: Wrapping<u32>; + + ix0 = (x.to_bits() >> 32) as i32; + ix1 = Wrapping(x.to_bits() as u32); + + /* take care of Inf and NaN */ + if (ix0 & 0x7ff00000) == 0x7ff00000 { + return x * x + x; /* sqrt(NaN)=NaN, sqrt(+inf)=+inf, sqrt(-inf)=sNaN */ + } + /* take care of zero */ + if ix0 <= 0 { + if ((ix0 & !(sign.0 as i32)) | ix1.0 as i32) == 0 { + return x; /* sqrt(+-0) = +-0 */ + } + if ix0 < 0 { + return (x - x) / (x - x); /* sqrt(-ve) = sNaN */ + } + } + /* normalize x */ + m = ix0 >> 20; + if m == 0 { + /* subnormal x */ + while ix0 == 0 { + m -= 21; + ix0 |= (ix1 >> 11).0 as i32; + ix1 <<= 21; + } + i = 0; + while (ix0 & 0x00100000) == 0 { + i += 1; + ix0 <<= 1; + } + m -= i - 1; + ix0 |= (ix1 >> (32 - i) as usize).0 as i32; + ix1 = ix1 << i as usize; + } + m -= 1023; /* unbias exponent */ + ix0 = (ix0 & 0x000fffff) | 0x00100000; + if (m & 1) == 1 { + /* odd m, double x to make it even */ + ix0 += ix0 + ((ix1 & sign) >> 31).0 as i32; + ix1 += ix1; + } + m >>= 1; /* m = [m/2] */ + + /* generate sqrt(x) bit by bit */ + ix0 += ix0 + ((ix1 & sign) >> 31).0 as i32; + ix1 += ix1; + q = 0; /* [q,q1] = sqrt(x) */ + q1 = Wrapping(0); + s0 = 0; + s1 = Wrapping(0); + r = Wrapping(0x00200000); /* r = moving bit from right to left */ + + while r != Wrapping(0) { + t = s0 + r.0 as i32; + if t <= ix0 { + s0 = t + r.0 as i32; + ix0 -= t; + q += r.0 as i32; + } + ix0 += ix0 + ((ix1 & sign) >> 31).0 as i32; + ix1 += ix1; + r >>= 1; + } + + r = sign; + while r != Wrapping(0) { + t1 = s1 + r; + t = s0; + if t < ix0 || (t == ix0 && t1 <= ix1) { + s1 = t1 + r; + if (t1 & sign) == sign && (s1 & sign) == Wrapping(0) { + s0 += 1; + } + ix0 -= t; + if ix1 < t1 { + ix0 -= 1; + } + ix1 -= t1; + q1 += r; + } + ix0 += ix0 + ((ix1 & sign) >> 31).0 as i32; + ix1 += ix1; + r >>= 1; + } + + /* use floating add to find out rounding direction */ + if (ix0 as u32 | ix1.0) != 0 { + z = 1.0 - TINY; /* raise inexact flag */ + if z >= 1.0 { + z = 1.0 + TINY; + if q1.0 == 0xffffffff { + q1 = Wrapping(0); + q += 1; + } else if z > 1.0 { + if q1.0 == 0xfffffffe { + q += 1; + } + q1 += Wrapping(2); + } else { + q1 += q1 & Wrapping(1); + } + } + } + ix0 = (q >> 1) + 0x3fe00000; + ix1 = q1 >> 1; + if (q & 1) == 1 { + ix1 |= sign; + } + ix0 += m << 20; + f64::from_bits((ix0 as u64) << 32 | ix1.0 as u64) + } +} + +#[inline] +fn get_high_word(x: f64) -> u32 { + (x.to_bits() >> 32) as u32 +} + +#[inline] +fn with_set_high_word(f: f64, hi: u32) -> f64 { + let mut tmp = f.to_bits(); + tmp &= 0x00000000_ffffffff; + tmp |= (hi as u64) << 32; + f64::from_bits(tmp) +} + +#[inline] +fn with_set_low_word(f: f64, lo: u32) -> f64 { + let mut tmp = f.to_bits(); + tmp &= 0xffffffff_00000000; + tmp |= lo as u64; + f64::from_bits(tmp) +} diff --git a/third_party/rust/minimal-lexical/src/mask.rs b/third_party/rust/minimal-lexical/src/mask.rs new file mode 100644 index 0000000000..1957c8be03 --- /dev/null +++ b/third_party/rust/minimal-lexical/src/mask.rs @@ -0,0 +1,60 @@ +//! Utilities to generate bitmasks. + +#![doc(hidden)] + +/// Generate a bitwise mask for the lower `n` bits. +/// +/// # Examples +/// +/// ```rust +/// # use minimal_lexical::mask::lower_n_mask; +/// # pub fn main() { +/// assert_eq!(lower_n_mask(2), 0b11); +/// # } +/// ``` +#[inline] +pub fn lower_n_mask(n: u64) -> u64 { + debug_assert!(n <= 64, "lower_n_mask() overflow in shl."); + + match n == 64 { + // u64::MAX for older Rustc versions. + true => 0xffff_ffff_ffff_ffff, + false => (1 << n) - 1, + } +} + +/// Calculate the halfway point for the lower `n` bits. +/// +/// # Examples +/// +/// ```rust +/// # use minimal_lexical::mask::lower_n_halfway; +/// # pub fn main() { +/// assert_eq!(lower_n_halfway(2), 0b10); +/// # } +/// ``` +#[inline] +pub fn lower_n_halfway(n: u64) -> u64 { + debug_assert!(n <= 64, "lower_n_halfway() overflow in shl."); + + match n == 0 { + true => 0, + false => nth_bit(n - 1), + } +} + +/// Calculate a scalar factor of 2 above the halfway point. +/// +/// # Examples +/// +/// ```rust +/// # use minimal_lexical::mask::nth_bit; +/// # pub fn main() { +/// assert_eq!(nth_bit(2), 0b100); +/// # } +/// ``` +#[inline] +pub fn nth_bit(n: u64) -> u64 { + debug_assert!(n < 64, "nth_bit() overflow in shl."); + 1 << n +} diff --git a/third_party/rust/minimal-lexical/src/num.rs b/third_party/rust/minimal-lexical/src/num.rs new file mode 100644 index 0000000000..9f682b9cbb --- /dev/null +++ b/third_party/rust/minimal-lexical/src/num.rs @@ -0,0 +1,308 @@ +//! Utilities for Rust numbers. + +#![doc(hidden)] + +#[cfg(all(not(feature = "std"), feature = "compact"))] +use crate::libm::{powd, powf}; +#[cfg(not(feature = "compact"))] +use crate::table::{SMALL_F32_POW10, SMALL_F64_POW10, SMALL_INT_POW10, SMALL_INT_POW5}; +#[cfg(not(feature = "compact"))] +use core::hint; +use core::ops; + +/// Generic floating-point type, to be used in generic code for parsing. +/// +/// Although the trait is part of the public API, the trait provides methods +/// and constants that are effectively non-public: they may be removed +/// at any time without any breaking changes. +pub trait Float: + Sized + + Copy + + PartialEq + + PartialOrd + + Send + + Sync + + ops::Add<Output = Self> + + ops::AddAssign + + ops::Div<Output = Self> + + ops::DivAssign + + ops::Mul<Output = Self> + + ops::MulAssign + + ops::Rem<Output = Self> + + ops::RemAssign + + ops::Sub<Output = Self> + + ops::SubAssign + + ops::Neg<Output = Self> +{ + /// Maximum number of digits that can contribute in the mantissa. + /// + /// We can exactly represent a float in radix `b` from radix 2 if + /// `b` is divisible by 2. This function calculates the exact number of + /// digits required to exactly represent that float. + /// + /// According to the "Handbook of Floating Point Arithmetic", + /// for IEEE754, with emin being the min exponent, p2 being the + /// precision, and b being the radix, the number of digits follows as: + /// + /// `−emin + p2 + ⌊(emin + 1) log(2, b) − log(1 − 2^(−p2), b)⌋` + /// + /// For f32, this follows as: + /// emin = -126 + /// p2 = 24 + /// + /// For f64, this follows as: + /// emin = -1022 + /// p2 = 53 + /// + /// In Python: + /// `-emin + p2 + math.floor((emin+1)*math.log(2, b) - math.log(1-2**(-p2), b))` + /// + /// This was used to calculate the maximum number of digits for [2, 36]. + const MAX_DIGITS: usize; + + // MASKS + + /// Bitmask for the sign bit. + const SIGN_MASK: u64; + /// Bitmask for the exponent, including the hidden bit. + const EXPONENT_MASK: u64; + /// Bitmask for the hidden bit in exponent, which is an implicit 1 in the fraction. + const HIDDEN_BIT_MASK: u64; + /// Bitmask for the mantissa (fraction), excluding the hidden bit. + const MANTISSA_MASK: u64; + + // PROPERTIES + + /// Size of the significand (mantissa) without hidden bit. + const MANTISSA_SIZE: i32; + /// Bias of the exponet + const EXPONENT_BIAS: i32; + /// Exponent portion of a denormal float. + const DENORMAL_EXPONENT: i32; + /// Maximum exponent value in float. + const MAX_EXPONENT: i32; + + // ROUNDING + + /// Mask to determine if a full-carry occurred (1 in bit above hidden bit). + const CARRY_MASK: u64; + + /// Bias for marking an invalid extended float. + // Value is `i16::MIN`, using hard-coded constants for older Rustc versions. + const INVALID_FP: i32 = -0x8000; + + // Maximum mantissa for the fast-path (`1 << 53` for f64). + const MAX_MANTISSA_FAST_PATH: u64 = 2_u64 << Self::MANTISSA_SIZE; + + // Largest exponent value `(1 << EXP_BITS) - 1`. + const INFINITE_POWER: i32 = Self::MAX_EXPONENT + Self::EXPONENT_BIAS; + + // Round-to-even only happens for negative values of q + // when q ≥ −4 in the 64-bit case and when q ≥ −17 in + // the 32-bitcase. + // + // When q ≥ 0,we have that 5^q ≤ 2m+1. In the 64-bit case,we + // have 5^q ≤ 2m+1 ≤ 2^54 or q ≤ 23. In the 32-bit case,we have + // 5^q ≤ 2m+1 ≤ 2^25 or q ≤ 10. + // + // When q < 0, we have w ≥ (2m+1)×5^−q. We must have that w < 2^64 + // so (2m+1)×5^−q < 2^64. We have that 2m+1 > 2^53 (64-bit case) + // or 2m+1 > 2^24 (32-bit case). Hence,we must have 2^53×5^−q < 2^64 + // (64-bit) and 2^24×5^−q < 2^64 (32-bit). Hence we have 5^−q < 2^11 + // or q ≥ −4 (64-bit case) and 5^−q < 2^40 or q ≥ −17 (32-bitcase). + // + // Thus we have that we only need to round ties to even when + // we have that q ∈ [−4,23](in the 64-bit case) or q∈[−17,10] + // (in the 32-bit case). In both cases,the power of five(5^|q|) + // fits in a 64-bit word. + const MIN_EXPONENT_ROUND_TO_EVEN: i32; + const MAX_EXPONENT_ROUND_TO_EVEN: i32; + + /// Minimum normal exponent value `-(1 << (EXPONENT_SIZE - 1)) + 1`. + const MINIMUM_EXPONENT: i32; + + /// Smallest decimal exponent for a non-zero value. + const SMALLEST_POWER_OF_TEN: i32; + + /// Largest decimal exponent for a non-infinite value. + const LARGEST_POWER_OF_TEN: i32; + + /// Minimum exponent that for a fast path case, or `-⌊(MANTISSA_SIZE+1)/log2(10)⌋` + const MIN_EXPONENT_FAST_PATH: i32; + + /// Maximum exponent that for a fast path case, or `⌊(MANTISSA_SIZE+1)/log2(5)⌋` + const MAX_EXPONENT_FAST_PATH: i32; + + /// Maximum exponent that can be represented for a disguised-fast path case. + /// This is `MAX_EXPONENT_FAST_PATH + ⌊(MANTISSA_SIZE+1)/log2(10)⌋` + const MAX_EXPONENT_DISGUISED_FAST_PATH: i32; + + /// Convert 64-bit integer to float. + fn from_u64(u: u64) -> Self; + + // Re-exported methods from std. + fn from_bits(u: u64) -> Self; + fn to_bits(self) -> u64; + + /// Get a small power-of-radix for fast-path multiplication. + /// + /// # Safety + /// + /// Safe as long as the exponent is smaller than the table size. + unsafe fn pow_fast_path(exponent: usize) -> Self; + + /// Get a small, integral power-of-radix for fast-path multiplication. + /// + /// # Safety + /// + /// Safe as long as the exponent is smaller than the table size. + #[inline(always)] + unsafe fn int_pow_fast_path(exponent: usize, radix: u32) -> u64 { + // SAFETY: safe as long as the exponent is smaller than the radix table. + #[cfg(not(feature = "compact"))] + return match radix { + 5 => unsafe { *SMALL_INT_POW5.get_unchecked(exponent) }, + 10 => unsafe { *SMALL_INT_POW10.get_unchecked(exponent) }, + _ => unsafe { hint::unreachable_unchecked() }, + }; + + #[cfg(feature = "compact")] + return (radix as u64).pow(exponent as u32); + } + + /// Returns true if the float is a denormal. + #[inline] + fn is_denormal(self) -> bool { + self.to_bits() & Self::EXPONENT_MASK == 0 + } + + /// Get exponent component from the float. + #[inline] + fn exponent(self) -> i32 { + if self.is_denormal() { + return Self::DENORMAL_EXPONENT; + } + + let bits = self.to_bits(); + let biased_e: i32 = ((bits & Self::EXPONENT_MASK) >> Self::MANTISSA_SIZE) as i32; + biased_e - Self::EXPONENT_BIAS + } + + /// Get mantissa (significand) component from float. + #[inline] + fn mantissa(self) -> u64 { + let bits = self.to_bits(); + let s = bits & Self::MANTISSA_MASK; + if !self.is_denormal() { + s + Self::HIDDEN_BIT_MASK + } else { + s + } + } +} + +impl Float for f32 { + const MAX_DIGITS: usize = 114; + const SIGN_MASK: u64 = 0x80000000; + const EXPONENT_MASK: u64 = 0x7F800000; + const HIDDEN_BIT_MASK: u64 = 0x00800000; + const MANTISSA_MASK: u64 = 0x007FFFFF; + const MANTISSA_SIZE: i32 = 23; + const EXPONENT_BIAS: i32 = 127 + Self::MANTISSA_SIZE; + const DENORMAL_EXPONENT: i32 = 1 - Self::EXPONENT_BIAS; + const MAX_EXPONENT: i32 = 0xFF - Self::EXPONENT_BIAS; + const CARRY_MASK: u64 = 0x1000000; + const MIN_EXPONENT_ROUND_TO_EVEN: i32 = -17; + const MAX_EXPONENT_ROUND_TO_EVEN: i32 = 10; + const MINIMUM_EXPONENT: i32 = -127; + const SMALLEST_POWER_OF_TEN: i32 = -65; + const LARGEST_POWER_OF_TEN: i32 = 38; + const MIN_EXPONENT_FAST_PATH: i32 = -10; + const MAX_EXPONENT_FAST_PATH: i32 = 10; + const MAX_EXPONENT_DISGUISED_FAST_PATH: i32 = 17; + + #[inline(always)] + unsafe fn pow_fast_path(exponent: usize) -> Self { + // SAFETY: safe as long as the exponent is smaller than the radix table. + #[cfg(not(feature = "compact"))] + return unsafe { *SMALL_F32_POW10.get_unchecked(exponent) }; + + #[cfg(feature = "compact")] + return powf(10.0f32, exponent as f32); + } + + #[inline] + fn from_u64(u: u64) -> f32 { + u as _ + } + + #[inline] + fn from_bits(u: u64) -> f32 { + // Constant is `u32::MAX` for older Rustc versions. + debug_assert!(u <= 0xffff_ffff); + f32::from_bits(u as u32) + } + + #[inline] + fn to_bits(self) -> u64 { + f32::to_bits(self) as u64 + } +} + +impl Float for f64 { + const MAX_DIGITS: usize = 769; + const SIGN_MASK: u64 = 0x8000000000000000; + const EXPONENT_MASK: u64 = 0x7FF0000000000000; + const HIDDEN_BIT_MASK: u64 = 0x0010000000000000; + const MANTISSA_MASK: u64 = 0x000FFFFFFFFFFFFF; + const MANTISSA_SIZE: i32 = 52; + const EXPONENT_BIAS: i32 = 1023 + Self::MANTISSA_SIZE; + const DENORMAL_EXPONENT: i32 = 1 - Self::EXPONENT_BIAS; + const MAX_EXPONENT: i32 = 0x7FF - Self::EXPONENT_BIAS; + const CARRY_MASK: u64 = 0x20000000000000; + const MIN_EXPONENT_ROUND_TO_EVEN: i32 = -4; + const MAX_EXPONENT_ROUND_TO_EVEN: i32 = 23; + const MINIMUM_EXPONENT: i32 = -1023; + const SMALLEST_POWER_OF_TEN: i32 = -342; + const LARGEST_POWER_OF_TEN: i32 = 308; + const MIN_EXPONENT_FAST_PATH: i32 = -22; + const MAX_EXPONENT_FAST_PATH: i32 = 22; + const MAX_EXPONENT_DISGUISED_FAST_PATH: i32 = 37; + + #[inline(always)] + unsafe fn pow_fast_path(exponent: usize) -> Self { + // SAFETY: safe as long as the exponent is smaller than the radix table. + #[cfg(not(feature = "compact"))] + return unsafe { *SMALL_F64_POW10.get_unchecked(exponent) }; + + #[cfg(feature = "compact")] + return powd(10.0f64, exponent as f64); + } + + #[inline] + fn from_u64(u: u64) -> f64 { + u as _ + } + + #[inline] + fn from_bits(u: u64) -> f64 { + f64::from_bits(u) + } + + #[inline] + fn to_bits(self) -> u64 { + f64::to_bits(self) + } +} + +#[inline(always)] +#[cfg(all(feature = "std", feature = "compact"))] +pub fn powf(x: f32, y: f32) -> f32 { + x.powf(y) +} + +#[inline(always)] +#[cfg(all(feature = "std", feature = "compact"))] +pub fn powd(x: f64, y: f64) -> f64 { + x.powf(y) +} diff --git a/third_party/rust/minimal-lexical/src/number.rs b/third_party/rust/minimal-lexical/src/number.rs new file mode 100644 index 0000000000..5981f9dd79 --- /dev/null +++ b/third_party/rust/minimal-lexical/src/number.rs @@ -0,0 +1,83 @@ +//! Representation of a float as the significant digits and exponent. +//! +//! This is adapted from [fast-float-rust](https://github.com/aldanor/fast-float-rust), +//! a port of [fast_float](https://github.com/fastfloat/fast_float) to Rust. + +#![doc(hidden)] + +#[cfg(feature = "nightly")] +use crate::fpu::set_precision; +use crate::num::Float; + +/// Representation of a number as the significant digits and exponent. +/// +/// This is only used if the exponent base and the significant digit +/// radix are the same, since we need to be able to move powers in and +/// out of the exponent. +#[derive(Clone, Copy, Debug, Default, PartialEq, Eq)] +pub struct Number { + /// The exponent of the float, scaled to the mantissa. + pub exponent: i32, + /// The significant digits of the float. + pub mantissa: u64, + /// If the significant digits were truncated. + pub many_digits: bool, +} + +impl Number { + /// Detect if the float can be accurately reconstructed from native floats. + #[inline] + pub fn is_fast_path<F: Float>(&self) -> bool { + F::MIN_EXPONENT_FAST_PATH <= self.exponent + && self.exponent <= F::MAX_EXPONENT_DISGUISED_FAST_PATH + && self.mantissa <= F::MAX_MANTISSA_FAST_PATH + && !self.many_digits + } + + /// The fast path algorithmn using machine-sized integers and floats. + /// + /// This is extracted into a separate function so that it can be attempted before constructing + /// a Decimal. This only works if both the mantissa and the exponent + /// can be exactly represented as a machine float, since IEE-754 guarantees + /// no rounding will occur. + /// + /// There is an exception: disguised fast-path cases, where we can shift + /// powers-of-10 from the exponent to the significant digits. + pub fn try_fast_path<F: Float>(&self) -> Option<F> { + // The fast path crucially depends on arithmetic being rounded to the correct number of bits + // without any intermediate rounding. On x86 (without SSE or SSE2) this requires the precision + // of the x87 FPU stack to be changed so that it directly rounds to 64/32 bit. + // The `set_precision` function takes care of setting the precision on architectures which + // require setting it by changing the global state (like the control word of the x87 FPU). + #[cfg(feature = "nightly")] + let _cw = set_precision::<F>(); + + if self.is_fast_path::<F>() { + let max_exponent = F::MAX_EXPONENT_FAST_PATH; + Some(if self.exponent <= max_exponent { + // normal fast path + let value = F::from_u64(self.mantissa); + if self.exponent < 0 { + // SAFETY: safe, since the `exponent <= max_exponent`. + value / unsafe { F::pow_fast_path((-self.exponent) as _) } + } else { + // SAFETY: safe, since the `exponent <= max_exponent`. + value * unsafe { F::pow_fast_path(self.exponent as _) } + } + } else { + // disguised fast path + let shift = self.exponent - max_exponent; + // SAFETY: safe, since `shift <= (max_disguised - max_exponent)`. + let int_power = unsafe { F::int_pow_fast_path(shift as usize, 10) }; + let mantissa = self.mantissa.checked_mul(int_power)?; + if mantissa > F::MAX_MANTISSA_FAST_PATH { + return None; + } + // SAFETY: safe, since the `table.len() - 1 == max_exponent`. + F::from_u64(mantissa) * unsafe { F::pow_fast_path(max_exponent as _) } + }) + } else { + None + } + } +} diff --git a/third_party/rust/minimal-lexical/src/parse.rs b/third_party/rust/minimal-lexical/src/parse.rs new file mode 100644 index 0000000000..9349699eb3 --- /dev/null +++ b/third_party/rust/minimal-lexical/src/parse.rs @@ -0,0 +1,201 @@ +//! Parse byte iterators to float. + +#![doc(hidden)] + +#[cfg(feature = "compact")] +use crate::bellerophon::bellerophon; +use crate::extended_float::{extended_to_float, ExtendedFloat}; +#[cfg(not(feature = "compact"))] +use crate::lemire::lemire; +use crate::num::Float; +use crate::number::Number; +use crate::slow::slow; + +/// Try to parse the significant digits quickly. +/// +/// This attempts a very quick parse, to deal with common cases. +/// +/// * `integer` - Slice containing the integer digits. +/// * `fraction` - Slice containing the fraction digits. +#[inline] +fn parse_number_fast<'a, Iter1, Iter2>( + integer: Iter1, + fraction: Iter2, + exponent: i32, +) -> Option<Number> +where + Iter1: Iterator<Item = &'a u8>, + Iter2: Iterator<Item = &'a u8>, +{ + let mut num = Number::default(); + let mut integer_count: usize = 0; + let mut fraction_count: usize = 0; + for &c in integer { + integer_count += 1; + let digit = c - b'0'; + num.mantissa = num.mantissa.wrapping_mul(10).wrapping_add(digit as u64); + } + for &c in fraction { + fraction_count += 1; + let digit = c - b'0'; + num.mantissa = num.mantissa.wrapping_mul(10).wrapping_add(digit as u64); + } + + if integer_count + fraction_count <= 19 { + // Can't overflow, since must be <= 19. + num.exponent = exponent.saturating_sub(fraction_count as i32); + Some(num) + } else { + None + } +} + +/// Parse the significant digits of the float and adjust the exponent. +/// +/// * `integer` - Slice containing the integer digits. +/// * `fraction` - Slice containing the fraction digits. +#[inline] +fn parse_number<'a, Iter1, Iter2>(mut integer: Iter1, mut fraction: Iter2, exponent: i32) -> Number +where + Iter1: Iterator<Item = &'a u8> + Clone, + Iter2: Iterator<Item = &'a u8> + Clone, +{ + // NOTE: for performance, we do this in 2 passes: + if let Some(num) = parse_number_fast(integer.clone(), fraction.clone(), exponent) { + return num; + } + + // Can only add 19 digits. + let mut num = Number::default(); + let mut count = 0; + while let Some(&c) = integer.next() { + count += 1; + if count == 20 { + // Only the integer digits affect the exponent. + num.many_digits = true; + num.exponent = exponent.saturating_add(into_i32(1 + integer.count())); + return num; + } else { + let digit = c - b'0'; + num.mantissa = num.mantissa * 10 + digit as u64; + } + } + + // Skip leading fraction zeros. + // This is required otherwise we might have a 0 mantissa and many digits. + let mut fraction_count: usize = 0; + if count == 0 { + for &c in &mut fraction { + fraction_count += 1; + if c != b'0' { + count += 1; + let digit = c - b'0'; + num.mantissa = num.mantissa * 10 + digit as u64; + break; + } + } + } + for c in fraction { + fraction_count += 1; + count += 1; + if count == 20 { + num.many_digits = true; + // This can't wrap, since we have at most 20 digits. + // We've adjusted the exponent too high by `fraction_count - 1`. + // Note: -1 is due to incrementing this loop iteration, which we + // didn't use. + num.exponent = exponent.saturating_sub(fraction_count as i32 - 1); + return num; + } else { + let digit = c - b'0'; + num.mantissa = num.mantissa * 10 + digit as u64; + } + } + + // No truncated digits: easy. + // Cannot overflow: <= 20 digits. + num.exponent = exponent.saturating_sub(fraction_count as i32); + num +} + +/// Parse float from extracted float components. +/// +/// * `integer` - Cloneable, forward iterator over integer digits. +/// * `fraction` - Cloneable, forward iterator over integer digits. +/// * `exponent` - Parsed, 32-bit exponent. +/// +/// # Preconditions +/// 1. The integer should not have leading zeros. +/// 2. The fraction should not have trailing zeros. +/// 3. All bytes in `integer` and `fraction` should be valid digits, +/// in the range [`b'0', b'9']. +/// +/// # Panics +/// +/// Although passing garbage input will not cause memory safety issues, +/// it is very likely to cause a panic with a large number of digits, or +/// in debug mode. The big-integer arithmetic without the `alloc` feature +/// assumes a maximum, fixed-width input, which assumes at maximum a +/// value of `10^(769 + 342)`, or ~4000 bits of storage. Passing in +/// nonsensical digits may require up to ~6000 bits of storage, which will +/// panic when attempting to add it to the big integer. It is therefore +/// up to the caller to validate this input. +/// +/// We cannot efficiently remove trailing zeros while only accepting a +/// forward iterator. +pub fn parse_float<'a, F, Iter1, Iter2>(integer: Iter1, fraction: Iter2, exponent: i32) -> F +where + F: Float, + Iter1: Iterator<Item = &'a u8> + Clone, + Iter2: Iterator<Item = &'a u8> + Clone, +{ + // Parse the mantissa and attempt the fast and moderate-path algorithms. + let num = parse_number(integer.clone(), fraction.clone(), exponent); + // Try the fast-path algorithm. + if let Some(value) = num.try_fast_path() { + return value; + } + + // Now try the moderate path algorithm. + let mut fp = moderate_path::<F>(&num); + if fp.exp < 0 { + // Undo the invalid extended float biasing. + fp.exp -= F::INVALID_FP; + fp = slow::<F, _, _>(num, fp, integer, fraction); + } + + // Unable to correctly round the float using the fast or moderate algorithms. + // Fallback to a slower, but always correct algorithm. If we have + // lossy, we can't be here. + extended_to_float::<F>(fp) +} + +/// Wrapper for different moderate-path algorithms. +/// A return exponent of `-1` indicates an invalid value. +#[inline] +pub fn moderate_path<F: Float>(num: &Number) -> ExtendedFloat { + #[cfg(not(feature = "compact"))] + return lemire::<F>(num); + + #[cfg(feature = "compact")] + return bellerophon::<F>(num); +} + +/// Convert usize into i32 without overflow. +/// +/// This is needed to ensure when adjusting the exponent relative to +/// the mantissa we do not overflow for comically-long exponents. +#[inline] +fn into_i32(value: usize) -> i32 { + if value > i32::max_value() as usize { + i32::max_value() + } else { + value as i32 + } +} + +// Add digit to mantissa. +#[inline] +pub fn add_digit(value: u64, digit: u8) -> Option<u64> { + value.checked_mul(10)?.checked_add(digit as u64) +} diff --git a/third_party/rust/minimal-lexical/src/rounding.rs b/third_party/rust/minimal-lexical/src/rounding.rs new file mode 100644 index 0000000000..7c466dec4d --- /dev/null +++ b/third_party/rust/minimal-lexical/src/rounding.rs @@ -0,0 +1,131 @@ +//! Defines rounding schemes for floating-point numbers. + +#![doc(hidden)] + +use crate::extended_float::ExtendedFloat; +use crate::mask::{lower_n_halfway, lower_n_mask}; +use crate::num::Float; + +// ROUNDING +// -------- + +/// Round an extended-precision float to the nearest machine float. +/// +/// Shifts the significant digits into place, adjusts the exponent, +/// so it can be easily converted to a native float. +#[cfg_attr(not(feature = "compact"), inline)] +pub fn round<F, Cb>(fp: &mut ExtendedFloat, cb: Cb) +where + F: Float, + Cb: Fn(&mut ExtendedFloat, i32), +{ + let fp_inf = ExtendedFloat { + mant: 0, + exp: F::INFINITE_POWER, + }; + + // Calculate our shift in significant digits. + let mantissa_shift = 64 - F::MANTISSA_SIZE - 1; + + // Check for a denormal float, if after the shift the exponent is negative. + if -fp.exp >= mantissa_shift { + // Have a denormal float that isn't a literal 0. + // The extra 1 is to adjust for the denormal float, which is + // `1 - F::EXPONENT_BIAS`. This works as before, because our + // old logic rounded to `F::DENORMAL_EXPONENT` (now 1), and then + // checked if `exp == F::DENORMAL_EXPONENT` and no hidden mask + // bit was set. Here, we handle that here, rather than later. + // + // This might round-down to 0, but shift will be at **max** 65, + // for halfway cases rounding towards 0. + let shift = -fp.exp + 1; + debug_assert!(shift <= 65); + cb(fp, shift.min(64)); + // Check for round-up: if rounding-nearest carried us to the hidden bit. + fp.exp = (fp.mant >= F::HIDDEN_BIT_MASK) as i32; + return; + } + + // The float is normal, round to the hidden bit. + cb(fp, mantissa_shift); + + // Check if we carried, and if so, shift the bit to the hidden bit. + let carry_mask = F::CARRY_MASK; + if fp.mant & carry_mask == carry_mask { + fp.mant >>= 1; + fp.exp += 1; + } + + // Handle if we carried and check for overflow again. + if fp.exp >= F::INFINITE_POWER { + // Exponent is above largest normal value, must be infinite. + *fp = fp_inf; + return; + } + + // Remove the hidden bit. + fp.mant &= F::MANTISSA_MASK; +} + +/// Shift right N-bytes and round towards a direction. +/// +/// Callback should take the following parameters: +/// 1. is_odd +/// 1. is_halfway +/// 1. is_above +#[cfg_attr(not(feature = "compact"), inline)] +pub fn round_nearest_tie_even<Cb>(fp: &mut ExtendedFloat, shift: i32, cb: Cb) +where + // is_odd, is_halfway, is_above + Cb: Fn(bool, bool, bool) -> bool, +{ + // Ensure we've already handled denormal values that underflow. + debug_assert!(shift <= 64); + + // Extract the truncated bits using mask. + // Calculate if the value of the truncated bits are either above + // the mid-way point, or equal to it. + // + // For example, for 4 truncated bytes, the mask would be 0b1111 + // and the midway point would be 0b1000. + let mask = lower_n_mask(shift as u64); + let halfway = lower_n_halfway(shift as u64); + let truncated_bits = fp.mant & mask; + let is_above = truncated_bits > halfway; + let is_halfway = truncated_bits == halfway; + + // Bit shift so the leading bit is in the hidden bit. + // This optimixes pretty well: + // ```text + // mov ecx, esi + // shr rdi, cl + // xor eax, eax + // cmp esi, 64 + // cmovne rax, rdi + // ret + // ``` + fp.mant = match shift == 64 { + true => 0, + false => fp.mant >> shift, + }; + fp.exp += shift; + + // Extract the last bit after shifting (and determine if it is odd). + let is_odd = fp.mant & 1 == 1; + + // Calculate if we need to roundup. + // We need to roundup if we are above halfway, or if we are odd + // and at half-way (need to tie-to-even). Avoid the branch here. + fp.mant += cb(is_odd, is_halfway, is_above) as u64; +} + +/// Round our significant digits into place, truncating them. +#[cfg_attr(not(feature = "compact"), inline)] +pub fn round_down(fp: &mut ExtendedFloat, shift: i32) { + // Might have a shift greater than 64 if we have an error. + fp.mant = match shift == 64 { + true => 0, + false => fp.mant >> shift, + }; + fp.exp += shift; +} diff --git a/third_party/rust/minimal-lexical/src/slow.rs b/third_party/rust/minimal-lexical/src/slow.rs new file mode 100644 index 0000000000..59d526ba42 --- /dev/null +++ b/third_party/rust/minimal-lexical/src/slow.rs @@ -0,0 +1,403 @@ +//! Slow, fallback cases where we cannot unambiguously round a float. +//! +//! This occurs when we cannot determine the exact representation using +//! both the fast path (native) cases nor the Lemire/Bellerophon algorithms, +//! and therefore must fallback to a slow, arbitrary-precision representation. + +#![doc(hidden)] + +use crate::bigint::{Bigint, Limb, LIMB_BITS}; +use crate::extended_float::{extended_to_float, ExtendedFloat}; +use crate::num::Float; +use crate::number::Number; +use crate::rounding::{round, round_down, round_nearest_tie_even}; +use core::cmp; + +// ALGORITHM +// --------- + +/// Parse the significant digits and biased, binary exponent of a float. +/// +/// This is a fallback algorithm that uses a big-integer representation +/// of the float, and therefore is considerably slower than faster +/// approximations. However, it will always determine how to round +/// the significant digits to the nearest machine float, allowing +/// use to handle near half-way cases. +/// +/// Near half-way cases are halfway between two consecutive machine floats. +/// For example, the float `16777217.0` has a bitwise representation of +/// `100000000000000000000000 1`. Rounding to a single-precision float, +/// the trailing `1` is truncated. Using round-nearest, tie-even, any +/// value above `16777217.0` must be rounded up to `16777218.0`, while +/// any value before or equal to `16777217.0` must be rounded down +/// to `16777216.0`. These near-halfway conversions therefore may require +/// a large number of digits to unambiguously determine how to round. +#[inline] +pub fn slow<'a, F, Iter1, Iter2>( + num: Number, + fp: ExtendedFloat, + integer: Iter1, + fraction: Iter2, +) -> ExtendedFloat +where + F: Float, + Iter1: Iterator<Item = &'a u8> + Clone, + Iter2: Iterator<Item = &'a u8> + Clone, +{ + // Ensure our preconditions are valid: + // 1. The significant digits are not shifted into place. + debug_assert!(fp.mant & (1 << 63) != 0); + + // This assumes the sign bit has already been parsed, and we're + // starting with the integer digits, and the float format has been + // correctly validated. + let sci_exp = scientific_exponent(&num); + + // We have 2 major algorithms we use for this: + // 1. An algorithm with a finite number of digits and a positive exponent. + // 2. An algorithm with a finite number of digits and a negative exponent. + let (bigmant, digits) = parse_mantissa(integer, fraction, F::MAX_DIGITS); + let exponent = sci_exp + 1 - digits as i32; + if exponent >= 0 { + positive_digit_comp::<F>(bigmant, exponent) + } else { + negative_digit_comp::<F>(bigmant, fp, exponent) + } +} + +/// Generate the significant digits with a positive exponent relative to mantissa. +pub fn positive_digit_comp<F: Float>(mut bigmant: Bigint, exponent: i32) -> ExtendedFloat { + // Simple, we just need to multiply by the power of the radix. + // Now, we can calculate the mantissa and the exponent from this. + // The binary exponent is the binary exponent for the mantissa + // shifted to the hidden bit. + bigmant.pow(10, exponent as u32).unwrap(); + + // Get the exact representation of the float from the big integer. + // hi64 checks **all** the remaining bits after the mantissa, + // so it will check if **any** truncated digits exist. + let (mant, is_truncated) = bigmant.hi64(); + let exp = bigmant.bit_length() as i32 - 64 + F::EXPONENT_BIAS; + let mut fp = ExtendedFloat { + mant, + exp, + }; + + // Shift the digits into position and determine if we need to round-up. + round::<F, _>(&mut fp, |f, s| { + round_nearest_tie_even(f, s, |is_odd, is_halfway, is_above| { + is_above || (is_halfway && is_truncated) || (is_odd && is_halfway) + }); + }); + fp +} + +/// Generate the significant digits with a negative exponent relative to mantissa. +/// +/// This algorithm is quite simple: we have the significant digits `m1 * b^N1`, +/// where `m1` is the bigint mantissa, `b` is the radix, and `N1` is the radix +/// exponent. We then calculate the theoretical representation of `b+h`, which +/// is `m2 * 2^N2`, where `m2` is the bigint mantissa and `N2` is the binary +/// exponent. If we had infinite, efficient floating precision, this would be +/// equal to `m1 / b^-N1` and then compare it to `m2 * 2^N2`. +/// +/// Since we cannot divide and keep precision, we must multiply the other: +/// if we want to do `m1 / b^-N1 >= m2 * 2^N2`, we can do +/// `m1 >= m2 * b^-N1 * 2^N2` Going to the decimal case, we can show and example +/// and simplify this further: `m1 >= m2 * 2^N2 * 10^-N1`. Since we can remove +/// a power-of-two, this is `m1 >= m2 * 2^(N2 - N1) * 5^-N1`. Therefore, if +/// `N2 - N1 > 0`, we need have `m1 >= m2 * 2^(N2 - N1) * 5^-N1`, otherwise, +/// we have `m1 * 2^(N1 - N2) >= m2 * 5^-N1`, where the resulting exponents +/// are all positive. +/// +/// This allows us to compare both floats using integers efficiently +/// without any loss of precision. +#[allow(clippy::comparison_chain)] +pub fn negative_digit_comp<F: Float>( + bigmant: Bigint, + mut fp: ExtendedFloat, + exponent: i32, +) -> ExtendedFloat { + // Ensure our preconditions are valid: + // 1. The significant digits are not shifted into place. + debug_assert!(fp.mant & (1 << 63) != 0); + + // Get the significant digits and radix exponent for the real digits. + let mut real_digits = bigmant; + let real_exp = exponent; + debug_assert!(real_exp < 0); + + // Round down our extended-precision float and calculate `b`. + let mut b = fp; + round::<F, _>(&mut b, round_down); + let b = extended_to_float::<F>(b); + + // Get the significant digits and the binary exponent for `b+h`. + let theor = bh(b); + let mut theor_digits = Bigint::from_u64(theor.mant); + let theor_exp = theor.exp; + + // We need to scale the real digits and `b+h` digits to be the same + // order. We currently have `real_exp`, in `radix`, that needs to be + // shifted to `theor_digits` (since it is negative), and `theor_exp` + // to either `theor_digits` or `real_digits` as a power of 2 (since it + // may be positive or negative). Try to remove as many powers of 2 + // as possible. All values are relative to `theor_digits`, that is, + // reflect the power you need to multiply `theor_digits` by. + // + // Both are on opposite-sides of equation, can factor out a + // power of two. + // + // Example: 10^-10, 2^-10 -> ( 0, 10, 0) + // Example: 10^-10, 2^-15 -> (-5, 10, 0) + // Example: 10^-10, 2^-5 -> ( 5, 10, 0) + // Example: 10^-10, 2^5 -> (15, 10, 0) + let binary_exp = theor_exp - real_exp; + let halfradix_exp = -real_exp; + if halfradix_exp != 0 { + theor_digits.pow(5, halfradix_exp as u32).unwrap(); + } + if binary_exp > 0 { + theor_digits.pow(2, binary_exp as u32).unwrap(); + } else if binary_exp < 0 { + real_digits.pow(2, (-binary_exp) as u32).unwrap(); + } + + // Compare our theoretical and real digits and round nearest, tie even. + let ord = real_digits.data.cmp(&theor_digits.data); + round::<F, _>(&mut fp, |f, s| { + round_nearest_tie_even(f, s, |is_odd, _, _| { + // Can ignore `is_halfway` and `is_above`, since those were + // calculates using less significant digits. + match ord { + cmp::Ordering::Greater => true, + cmp::Ordering::Less => false, + cmp::Ordering::Equal if is_odd => true, + cmp::Ordering::Equal => false, + } + }); + }); + fp +} + +/// Add a digit to the temporary value. +macro_rules! add_digit { + ($c:ident, $value:ident, $counter:ident, $count:ident) => {{ + let digit = $c - b'0'; + $value *= 10 as Limb; + $value += digit as Limb; + + // Increment our counters. + $counter += 1; + $count += 1; + }}; +} + +/// Add a temporary value to our mantissa. +macro_rules! add_temporary { + // Multiply by the small power and add the native value. + (@mul $result:ident, $power:expr, $value:expr) => { + $result.data.mul_small($power).unwrap(); + $result.data.add_small($value).unwrap(); + }; + + // # Safety + // + // Safe is `counter <= step`, or smaller than the table size. + ($format:ident, $result:ident, $counter:ident, $value:ident) => { + if $counter != 0 { + // SAFETY: safe, since `counter <= step`, or smaller than the table size. + let small_power = unsafe { f64::int_pow_fast_path($counter, 10) }; + add_temporary!(@mul $result, small_power as Limb, $value); + $counter = 0; + $value = 0; + } + }; + + // Add a temporary where we won't read the counter results internally. + // + // # Safety + // + // Safe is `counter <= step`, or smaller than the table size. + (@end $format:ident, $result:ident, $counter:ident, $value:ident) => { + if $counter != 0 { + // SAFETY: safe, since `counter <= step`, or smaller than the table size. + let small_power = unsafe { f64::int_pow_fast_path($counter, 10) }; + add_temporary!(@mul $result, small_power as Limb, $value); + } + }; + + // Add the maximum native value. + (@max $format:ident, $result:ident, $counter:ident, $value:ident, $max:ident) => { + add_temporary!(@mul $result, $max, $value); + $counter = 0; + $value = 0; + }; +} + +/// Round-up a truncated value. +macro_rules! round_up_truncated { + ($format:ident, $result:ident, $count:ident) => {{ + // Need to round-up. + // Can't just add 1, since this can accidentally round-up + // values to a halfway point, which can cause invalid results. + add_temporary!(@mul $result, 10, 1); + $count += 1; + }}; +} + +/// Check and round-up the fraction if any non-zero digits exist. +macro_rules! round_up_nonzero { + ($format:ident, $iter:expr, $result:ident, $count:ident) => {{ + for &digit in $iter { + if digit != b'0' { + round_up_truncated!($format, $result, $count); + return ($result, $count); + } + } + }}; +} + +/// Parse the full mantissa into a big integer. +/// +/// Returns the parsed mantissa and the number of digits in the mantissa. +/// The max digits is the maximum number of digits plus one. +pub fn parse_mantissa<'a, Iter1, Iter2>( + mut integer: Iter1, + mut fraction: Iter2, + max_digits: usize, +) -> (Bigint, usize) +where + Iter1: Iterator<Item = &'a u8> + Clone, + Iter2: Iterator<Item = &'a u8> + Clone, +{ + // Iteratively process all the data in the mantissa. + // We do this via small, intermediate values which once we reach + // the maximum number of digits we can process without overflow, + // we add the temporary to the big integer. + let mut counter: usize = 0; + let mut count: usize = 0; + let mut value: Limb = 0; + let mut result = Bigint::new(); + + // Now use our pre-computed small powers iteratively. + // This is calculated as `⌊log(2^BITS - 1, 10)⌋`. + let step: usize = if LIMB_BITS == 32 { + 9 + } else { + 19 + }; + let max_native = (10 as Limb).pow(step as u32); + + // Process the integer digits. + 'integer: loop { + // Parse a digit at a time, until we reach step. + while counter < step && count < max_digits { + if let Some(&c) = integer.next() { + add_digit!(c, value, counter, count); + } else { + break 'integer; + } + } + + // Check if we've exhausted our max digits. + if count == max_digits { + // Need to check if we're truncated, and round-up accordingly. + // SAFETY: safe since `counter <= step`. + add_temporary!(@end format, result, counter, value); + round_up_nonzero!(format, integer, result, count); + round_up_nonzero!(format, fraction, result, count); + return (result, count); + } else { + // Add our temporary from the loop. + // SAFETY: safe since `counter <= step`. + add_temporary!(@max format, result, counter, value, max_native); + } + } + + // Skip leading fraction zeros. + // Required to get an accurate count. + if count == 0 { + for &c in &mut fraction { + if c != b'0' { + add_digit!(c, value, counter, count); + break; + } + } + } + + // Process the fraction digits. + 'fraction: loop { + // Parse a digit at a time, until we reach step. + while counter < step && count < max_digits { + if let Some(&c) = fraction.next() { + add_digit!(c, value, counter, count); + } else { + break 'fraction; + } + } + + // Check if we've exhausted our max digits. + if count == max_digits { + // SAFETY: safe since `counter <= step`. + add_temporary!(@end format, result, counter, value); + round_up_nonzero!(format, fraction, result, count); + return (result, count); + } else { + // Add our temporary from the loop. + // SAFETY: safe since `counter <= step`. + add_temporary!(@max format, result, counter, value, max_native); + } + } + + // We will always have a remainder, as long as we entered the loop + // once, or counter % step is 0. + // SAFETY: safe since `counter <= step`. + add_temporary!(@end format, result, counter, value); + + (result, count) +} + +// SCALING +// ------- + +/// Calculate the scientific exponent from a `Number` value. +/// Any other attempts would require slowdowns for faster algorithms. +#[inline] +pub fn scientific_exponent(num: &Number) -> i32 { + // Use power reduction to make this faster. + let mut mantissa = num.mantissa; + let mut exponent = num.exponent; + while mantissa >= 10000 { + mantissa /= 10000; + exponent += 4; + } + while mantissa >= 100 { + mantissa /= 100; + exponent += 2; + } + while mantissa >= 10 { + mantissa /= 10; + exponent += 1; + } + exponent as i32 +} + +/// Calculate `b` from a a representation of `b` as a float. +#[inline] +pub fn b<F: Float>(float: F) -> ExtendedFloat { + ExtendedFloat { + mant: float.mantissa(), + exp: float.exponent(), + } +} + +/// Calculate `b+h` from a a representation of `b` as a float. +#[inline] +pub fn bh<F: Float>(float: F) -> ExtendedFloat { + let fp = b(float); + ExtendedFloat { + mant: (fp.mant << 1) + 1, + exp: fp.exp - 1, + } +} diff --git a/third_party/rust/minimal-lexical/src/stackvec.rs b/third_party/rust/minimal-lexical/src/stackvec.rs new file mode 100644 index 0000000000..d9bc259555 --- /dev/null +++ b/third_party/rust/minimal-lexical/src/stackvec.rs @@ -0,0 +1,308 @@ +//! Simple stack-allocated vector. + +#![cfg(not(feature = "alloc"))] +#![doc(hidden)] + +use crate::bigint; +use core::{cmp, mem, ops, ptr, slice}; + +/// Simple stack vector implementation. +#[derive(Clone)] +pub struct StackVec { + /// The raw buffer for the elements. + data: [mem::MaybeUninit<bigint::Limb>; bigint::BIGINT_LIMBS], + /// The number of elements in the array (we never need more than u16::MAX). + length: u16, +} + +#[allow(clippy::new_without_default)] +impl StackVec { + /// Construct an empty vector. + #[inline] + pub const fn new() -> Self { + Self { + length: 0, + data: [mem::MaybeUninit::uninit(); bigint::BIGINT_LIMBS], + } + } + + /// Construct a vector from an existing slice. + #[inline] + pub fn try_from(x: &[bigint::Limb]) -> Option<Self> { + let mut vec = Self::new(); + vec.try_extend(x)?; + Some(vec) + } + + /// Sets the length of a vector. + /// + /// This will explicitly set the size of the vector, without actually + /// modifying its buffers, so it is up to the caller to ensure that the + /// vector is actually the specified size. + /// + /// # Safety + /// + /// Safe as long as `len` is less than `BIGINT_LIMBS`. + #[inline] + pub unsafe fn set_len(&mut self, len: usize) { + // Constant is `u16::MAX` for older Rustc versions. + debug_assert!(len <= 0xffff); + debug_assert!(len <= bigint::BIGINT_LIMBS); + self.length = len as u16; + } + + /// The number of elements stored in the vector. + #[inline] + pub const fn len(&self) -> usize { + self.length as usize + } + + /// If the vector is empty. + #[inline] + pub const fn is_empty(&self) -> bool { + self.len() == 0 + } + + /// The number of items the vector can hold. + #[inline] + pub const fn capacity(&self) -> usize { + bigint::BIGINT_LIMBS as usize + } + + /// Append an item to the vector, without bounds checking. + /// + /// # Safety + /// + /// Safe if `self.len() < self.capacity()`. + #[inline] + pub unsafe fn push_unchecked(&mut self, value: bigint::Limb) { + debug_assert!(self.len() < self.capacity()); + // SAFETY: safe, capacity is less than the current size. + unsafe { + ptr::write(self.as_mut_ptr().add(self.len()), value); + self.length += 1; + } + } + + /// Append an item to the vector. + #[inline] + pub fn try_push(&mut self, value: bigint::Limb) -> Option<()> { + if self.len() < self.capacity() { + // SAFETY: safe, capacity is less than the current size. + unsafe { self.push_unchecked(value) }; + Some(()) + } else { + None + } + } + + /// Remove an item from the end of a vector, without bounds checking. + /// + /// # Safety + /// + /// Safe if `self.len() > 0`. + #[inline] + pub unsafe fn pop_unchecked(&mut self) -> bigint::Limb { + debug_assert!(!self.is_empty()); + // SAFETY: safe if `self.length > 0`. + // We have a trivial drop and copy, so this is safe. + self.length -= 1; + unsafe { ptr::read(self.as_mut_ptr().add(self.len())) } + } + + /// Remove an item from the end of the vector and return it, or None if empty. + #[inline] + pub fn pop(&mut self) -> Option<bigint::Limb> { + if self.is_empty() { + None + } else { + // SAFETY: safe, since `self.len() > 0`. + unsafe { Some(self.pop_unchecked()) } + } + } + + /// Add items from a slice to the vector, without bounds checking. + /// + /// # Safety + /// + /// Safe if `self.len() + slc.len() <= self.capacity()`. + #[inline] + pub unsafe fn extend_unchecked(&mut self, slc: &[bigint::Limb]) { + let index = self.len(); + let new_len = index + slc.len(); + debug_assert!(self.len() + slc.len() <= self.capacity()); + let src = slc.as_ptr(); + // SAFETY: safe if `self.len() + slc.len() <= self.capacity()`. + unsafe { + let dst = self.as_mut_ptr().add(index); + ptr::copy_nonoverlapping(src, dst, slc.len()); + self.set_len(new_len); + } + } + + /// Copy elements from a slice and append them to the vector. + #[inline] + pub fn try_extend(&mut self, slc: &[bigint::Limb]) -> Option<()> { + if self.len() + slc.len() <= self.capacity() { + // SAFETY: safe, since `self.len() + slc.len() <= self.capacity()`. + unsafe { self.extend_unchecked(slc) }; + Some(()) + } else { + None + } + } + + /// Truncate vector to new length, dropping any items after `len`. + /// + /// # Safety + /// + /// Safe as long as `len <= self.capacity()`. + unsafe fn truncate_unchecked(&mut self, len: usize) { + debug_assert!(len <= self.capacity()); + self.length = len as u16; + } + + /// Resize the buffer, without bounds checking. + /// + /// # Safety + /// + /// Safe as long as `len <= self.capacity()`. + #[inline] + pub unsafe fn resize_unchecked(&mut self, len: usize, value: bigint::Limb) { + debug_assert!(len <= self.capacity()); + let old_len = self.len(); + if len > old_len { + // We have a trivial drop, so there's no worry here. + // Just, don't set the length until all values have been written, + // so we don't accidentally read uninitialized memory. + + // SAFETY: safe if `len < self.capacity()`. + let count = len - old_len; + for index in 0..count { + unsafe { + let dst = self.as_mut_ptr().add(old_len + index); + ptr::write(dst, value); + } + } + self.length = len as u16; + } else { + // SAFETY: safe since `len < self.len()`. + unsafe { self.truncate_unchecked(len) }; + } + } + + /// Try to resize the buffer. + /// + /// If the new length is smaller than the current length, truncate + /// the input. If it's larger, then append elements to the buffer. + #[inline] + pub fn try_resize(&mut self, len: usize, value: bigint::Limb) -> Option<()> { + if len > self.capacity() { + None + } else { + // SAFETY: safe, since `len <= self.capacity()`. + unsafe { self.resize_unchecked(len, value) }; + Some(()) + } + } + + // HI + + /// Get the high 64 bits from the vector. + #[inline(always)] + pub fn hi64(&self) -> (u64, bool) { + bigint::hi64(self) + } + + // FROM + + /// Create StackVec from u64 value. + #[inline(always)] + pub fn from_u64(x: u64) -> Self { + bigint::from_u64(x) + } + + // MATH + + /// Normalize the integer, so any leading zero values are removed. + #[inline] + pub fn normalize(&mut self) { + bigint::normalize(self) + } + + /// Get if the big integer is normalized. + #[inline] + pub fn is_normalized(&self) -> bool { + bigint::is_normalized(self) + } + + /// AddAssign small integer. + #[inline] + pub fn add_small(&mut self, y: bigint::Limb) -> Option<()> { + bigint::small_add(self, y) + } + + /// MulAssign small integer. + #[inline] + pub fn mul_small(&mut self, y: bigint::Limb) -> Option<()> { + bigint::small_mul(self, y) + } +} + +impl PartialEq for StackVec { + #[inline] + #[allow(clippy::op_ref)] + fn eq(&self, other: &Self) -> bool { + use core::ops::Deref; + self.len() == other.len() && self.deref() == other.deref() + } +} + +impl Eq for StackVec { +} + +impl cmp::PartialOrd for StackVec { + #[inline] + fn partial_cmp(&self, other: &Self) -> Option<cmp::Ordering> { + Some(bigint::compare(self, other)) + } +} + +impl cmp::Ord for StackVec { + #[inline] + fn cmp(&self, other: &Self) -> cmp::Ordering { + bigint::compare(self, other) + } +} + +impl ops::Deref for StackVec { + type Target = [bigint::Limb]; + #[inline] + fn deref(&self) -> &[bigint::Limb] { + // SAFETY: safe since `self.data[..self.len()]` must be initialized + // and `self.len() <= self.capacity()`. + unsafe { + let ptr = self.data.as_ptr() as *const bigint::Limb; + slice::from_raw_parts(ptr, self.len()) + } + } +} + +impl ops::DerefMut for StackVec { + #[inline] + fn deref_mut(&mut self) -> &mut [bigint::Limb] { + // SAFETY: safe since `self.data[..self.len()]` must be initialized + // and `self.len() <= self.capacity()`. + unsafe { + let ptr = self.data.as_mut_ptr() as *mut bigint::Limb; + slice::from_raw_parts_mut(ptr, self.len()) + } + } +} + +impl ops::MulAssign<&[bigint::Limb]> for StackVec { + #[inline] + fn mul_assign(&mut self, rhs: &[bigint::Limb]) { + bigint::large_mul(self, rhs).unwrap(); + } +} diff --git a/third_party/rust/minimal-lexical/src/table.rs b/third_party/rust/minimal-lexical/src/table.rs new file mode 100644 index 0000000000..7b1367e326 --- /dev/null +++ b/third_party/rust/minimal-lexical/src/table.rs @@ -0,0 +1,11 @@ +//! Pre-computed tables for parsing float strings. + +#![doc(hidden)] + +// Re-export all the feature-specific files. +#[cfg(feature = "compact")] +pub use crate::table_bellerophon::*; +#[cfg(not(feature = "compact"))] +pub use crate::table_lemire::*; +#[cfg(not(feature = "compact"))] +pub use crate::table_small::*; diff --git a/third_party/rust/minimal-lexical/src/table_bellerophon.rs b/third_party/rust/minimal-lexical/src/table_bellerophon.rs new file mode 100644 index 0000000000..f85f8e6fb3 --- /dev/null +++ b/third_party/rust/minimal-lexical/src/table_bellerophon.rs @@ -0,0 +1,119 @@ +//! Cached exponents for basen values with 80-bit extended floats. +//! +//! Exact versions of base**n as an extended-precision float, with both +//! large and small powers. Use the large powers to minimize the amount +//! of compounded error. This is used in the Bellerophon algorithm. +//! +//! These values were calculated using Python, using the arbitrary-precision +//! integer to calculate exact extended-representation of each value. +//! These values are all normalized. +//! +//! DO NOT MODIFY: Generated by `etc/bellerophon_table.py` + +#![cfg(feature = "compact")] +#![doc(hidden)] + +use crate::bellerophon::BellerophonPowers; + +// HIGH LEVEL +// ---------- + +pub const BASE10_POWERS: BellerophonPowers = BellerophonPowers { + small: &BASE10_SMALL_MANTISSA, + large: &BASE10_LARGE_MANTISSA, + small_int: &BASE10_SMALL_INT_POWERS, + step: BASE10_STEP, + bias: BASE10_BIAS, + log2: BASE10_LOG2_MULT, + log2_shift: BASE10_LOG2_SHIFT, +}; + +// LOW-LEVEL +// --------- + +const BASE10_SMALL_MANTISSA: [u64; 10] = [ + 9223372036854775808, // 10^0 + 11529215046068469760, // 10^1 + 14411518807585587200, // 10^2 + 18014398509481984000, // 10^3 + 11258999068426240000, // 10^4 + 14073748835532800000, // 10^5 + 17592186044416000000, // 10^6 + 10995116277760000000, // 10^7 + 13743895347200000000, // 10^8 + 17179869184000000000, // 10^9 +]; +const BASE10_LARGE_MANTISSA: [u64; 66] = [ + 11555125961253852697, // 10^-350 + 13451937075301367670, // 10^-340 + 15660115838168849784, // 10^-330 + 18230774251475056848, // 10^-320 + 10611707258198326947, // 10^-310 + 12353653155963782858, // 10^-300 + 14381545078898527261, // 10^-290 + 16742321987285426889, // 10^-280 + 9745314011399999080, // 10^-270 + 11345038669416679861, // 10^-260 + 13207363278391631158, // 10^-250 + 15375394465392026070, // 10^-240 + 17899314949046850752, // 10^-230 + 10418772551374772303, // 10^-220 + 12129047596099288555, // 10^-210 + 14120069793541087484, // 10^-200 + 16437924692338667210, // 10^-190 + 9568131466127621947, // 10^-180 + 11138771039116687545, // 10^-170 + 12967236152753102995, // 10^-160 + 15095849699286165408, // 10^-150 + 17573882009934360870, // 10^-140 + 10229345649675443343, // 10^-130 + 11908525658859223294, // 10^-120 + 13863348470604074297, // 10^-110 + 16139061738043178685, // 10^-100 + 9394170331095332911, // 10^-90 + 10936253623915059621, // 10^-80 + 12731474852090538039, // 10^-70 + 14821387422376473014, // 10^-60 + 17254365866976409468, // 10^-50 + 10043362776618689222, // 10^-40 + 11692013098647223345, // 10^-30 + 13611294676837538538, // 10^-20 + 15845632502852867518, // 10^-10 + 9223372036854775808, // 10^0 + 10737418240000000000, // 10^10 + 12500000000000000000, // 10^20 + 14551915228366851806, // 10^30 + 16940658945086006781, // 10^40 + 9860761315262647567, // 10^50 + 11479437019748901445, // 10^60 + 13363823550460978230, // 10^70 + 15557538194652854267, // 10^80 + 18111358157653424735, // 10^90 + 10542197943230523224, // 10^100 + 12272733663244316382, // 10^110 + 14287342391028437277, // 10^120 + 16632655625031838749, // 10^130 + 9681479787123295682, // 10^140 + 11270725851789228247, // 10^150 + 13120851772591970218, // 10^160 + 15274681817498023410, // 10^170 + 17782069995880619867, // 10^180 + 10350527006597618960, // 10^190 + 12049599325514420588, // 10^200 + 14027579833653779454, // 10^210 + 16330252207878254650, // 10^220 + 9505457831475799117, // 10^230 + 11065809325636130661, // 10^240 + 12882297539194266616, // 10^250 + 14996968138956309548, // 10^260 + 17458768723248864463, // 10^270 + 10162340898095201970, // 10^280 + 11830521861667747109, // 10^290 + 13772540099066387756, // 10^300 +]; +const BASE10_SMALL_INT_POWERS: [u64; 10] = + [1, 10, 100, 1000, 10000, 100000, 1000000, 10000000, 100000000, 1000000000]; +const BASE10_STEP: i32 = 10; +const BASE10_BIAS: i32 = 350; +const BASE10_LOG2_MULT: i64 = 217706; +const BASE10_LOG2_SHIFT: i32 = 16; diff --git a/third_party/rust/minimal-lexical/src/table_lemire.rs b/third_party/rust/minimal-lexical/src/table_lemire.rs new file mode 100644 index 0000000000..110e1dab2b --- /dev/null +++ b/third_party/rust/minimal-lexical/src/table_lemire.rs @@ -0,0 +1,676 @@ +//! Pre-computed tables powers-of-5 for extended-precision representations. +//! +//! These tables enable fast scaling of the significant digits +//! of a float to the decimal exponent, with minimal rounding +//! errors, in a 128 or 192-bit representation. +//! +//! DO NOT MODIFY: Generated by `etc/lemire_table.py` +//! +//! This adapted from the Rust implementation, based on the fast-float-rust +//! implementation, and is similarly subject to an Apache2.0/MIT license. + +#![doc(hidden)] +#![cfg(not(feature = "compact"))] + +pub const SMALLEST_POWER_OF_FIVE: i32 = -342; +pub const LARGEST_POWER_OF_FIVE: i32 = 308; +pub const N_POWERS_OF_FIVE: usize = (LARGEST_POWER_OF_FIVE - SMALLEST_POWER_OF_FIVE + 1) as usize; + +// Use static to avoid long compile times: Rust compiler errors +// can have the entire table compiled multiple times, and then +// emit code multiple times, even if it's stripped out in +// the final binary. +#[rustfmt::skip] +pub static POWER_OF_FIVE_128: [(u64, u64); N_POWERS_OF_FIVE] = [ + (0xeef453d6923bd65a, 0x113faa2906a13b3f), // 5^-342 + (0x9558b4661b6565f8, 0x4ac7ca59a424c507), // 5^-341 + (0xbaaee17fa23ebf76, 0x5d79bcf00d2df649), // 5^-340 + (0xe95a99df8ace6f53, 0xf4d82c2c107973dc), // 5^-339 + (0x91d8a02bb6c10594, 0x79071b9b8a4be869), // 5^-338 + (0xb64ec836a47146f9, 0x9748e2826cdee284), // 5^-337 + (0xe3e27a444d8d98b7, 0xfd1b1b2308169b25), // 5^-336 + (0x8e6d8c6ab0787f72, 0xfe30f0f5e50e20f7), // 5^-335 + (0xb208ef855c969f4f, 0xbdbd2d335e51a935), // 5^-334 + (0xde8b2b66b3bc4723, 0xad2c788035e61382), // 5^-333 + (0x8b16fb203055ac76, 0x4c3bcb5021afcc31), // 5^-332 + (0xaddcb9e83c6b1793, 0xdf4abe242a1bbf3d), // 5^-331 + (0xd953e8624b85dd78, 0xd71d6dad34a2af0d), // 5^-330 + (0x87d4713d6f33aa6b, 0x8672648c40e5ad68), // 5^-329 + (0xa9c98d8ccb009506, 0x680efdaf511f18c2), // 5^-328 + (0xd43bf0effdc0ba48, 0x212bd1b2566def2), // 5^-327 + (0x84a57695fe98746d, 0x14bb630f7604b57), // 5^-326 + (0xa5ced43b7e3e9188, 0x419ea3bd35385e2d), // 5^-325 + (0xcf42894a5dce35ea, 0x52064cac828675b9), // 5^-324 + (0x818995ce7aa0e1b2, 0x7343efebd1940993), // 5^-323 + (0xa1ebfb4219491a1f, 0x1014ebe6c5f90bf8), // 5^-322 + (0xca66fa129f9b60a6, 0xd41a26e077774ef6), // 5^-321 + (0xfd00b897478238d0, 0x8920b098955522b4), // 5^-320 + (0x9e20735e8cb16382, 0x55b46e5f5d5535b0), // 5^-319 + (0xc5a890362fddbc62, 0xeb2189f734aa831d), // 5^-318 + (0xf712b443bbd52b7b, 0xa5e9ec7501d523e4), // 5^-317 + (0x9a6bb0aa55653b2d, 0x47b233c92125366e), // 5^-316 + (0xc1069cd4eabe89f8, 0x999ec0bb696e840a), // 5^-315 + (0xf148440a256e2c76, 0xc00670ea43ca250d), // 5^-314 + (0x96cd2a865764dbca, 0x380406926a5e5728), // 5^-313 + (0xbc807527ed3e12bc, 0xc605083704f5ecf2), // 5^-312 + (0xeba09271e88d976b, 0xf7864a44c633682e), // 5^-311 + (0x93445b8731587ea3, 0x7ab3ee6afbe0211d), // 5^-310 + (0xb8157268fdae9e4c, 0x5960ea05bad82964), // 5^-309 + (0xe61acf033d1a45df, 0x6fb92487298e33bd), // 5^-308 + (0x8fd0c16206306bab, 0xa5d3b6d479f8e056), // 5^-307 + (0xb3c4f1ba87bc8696, 0x8f48a4899877186c), // 5^-306 + (0xe0b62e2929aba83c, 0x331acdabfe94de87), // 5^-305 + (0x8c71dcd9ba0b4925, 0x9ff0c08b7f1d0b14), // 5^-304 + (0xaf8e5410288e1b6f, 0x7ecf0ae5ee44dd9), // 5^-303 + (0xdb71e91432b1a24a, 0xc9e82cd9f69d6150), // 5^-302 + (0x892731ac9faf056e, 0xbe311c083a225cd2), // 5^-301 + (0xab70fe17c79ac6ca, 0x6dbd630a48aaf406), // 5^-300 + (0xd64d3d9db981787d, 0x92cbbccdad5b108), // 5^-299 + (0x85f0468293f0eb4e, 0x25bbf56008c58ea5), // 5^-298 + (0xa76c582338ed2621, 0xaf2af2b80af6f24e), // 5^-297 + (0xd1476e2c07286faa, 0x1af5af660db4aee1), // 5^-296 + (0x82cca4db847945ca, 0x50d98d9fc890ed4d), // 5^-295 + (0xa37fce126597973c, 0xe50ff107bab528a0), // 5^-294 + (0xcc5fc196fefd7d0c, 0x1e53ed49a96272c8), // 5^-293 + (0xff77b1fcbebcdc4f, 0x25e8e89c13bb0f7a), // 5^-292 + (0x9faacf3df73609b1, 0x77b191618c54e9ac), // 5^-291 + (0xc795830d75038c1d, 0xd59df5b9ef6a2417), // 5^-290 + (0xf97ae3d0d2446f25, 0x4b0573286b44ad1d), // 5^-289 + (0x9becce62836ac577, 0x4ee367f9430aec32), // 5^-288 + (0xc2e801fb244576d5, 0x229c41f793cda73f), // 5^-287 + (0xf3a20279ed56d48a, 0x6b43527578c1110f), // 5^-286 + (0x9845418c345644d6, 0x830a13896b78aaa9), // 5^-285 + (0xbe5691ef416bd60c, 0x23cc986bc656d553), // 5^-284 + (0xedec366b11c6cb8f, 0x2cbfbe86b7ec8aa8), // 5^-283 + (0x94b3a202eb1c3f39, 0x7bf7d71432f3d6a9), // 5^-282 + (0xb9e08a83a5e34f07, 0xdaf5ccd93fb0cc53), // 5^-281 + (0xe858ad248f5c22c9, 0xd1b3400f8f9cff68), // 5^-280 + (0x91376c36d99995be, 0x23100809b9c21fa1), // 5^-279 + (0xb58547448ffffb2d, 0xabd40a0c2832a78a), // 5^-278 + (0xe2e69915b3fff9f9, 0x16c90c8f323f516c), // 5^-277 + (0x8dd01fad907ffc3b, 0xae3da7d97f6792e3), // 5^-276 + (0xb1442798f49ffb4a, 0x99cd11cfdf41779c), // 5^-275 + (0xdd95317f31c7fa1d, 0x40405643d711d583), // 5^-274 + (0x8a7d3eef7f1cfc52, 0x482835ea666b2572), // 5^-273 + (0xad1c8eab5ee43b66, 0xda3243650005eecf), // 5^-272 + (0xd863b256369d4a40, 0x90bed43e40076a82), // 5^-271 + (0x873e4f75e2224e68, 0x5a7744a6e804a291), // 5^-270 + (0xa90de3535aaae202, 0x711515d0a205cb36), // 5^-269 + (0xd3515c2831559a83, 0xd5a5b44ca873e03), // 5^-268 + (0x8412d9991ed58091, 0xe858790afe9486c2), // 5^-267 + (0xa5178fff668ae0b6, 0x626e974dbe39a872), // 5^-266 + (0xce5d73ff402d98e3, 0xfb0a3d212dc8128f), // 5^-265 + (0x80fa687f881c7f8e, 0x7ce66634bc9d0b99), // 5^-264 + (0xa139029f6a239f72, 0x1c1fffc1ebc44e80), // 5^-263 + (0xc987434744ac874e, 0xa327ffb266b56220), // 5^-262 + (0xfbe9141915d7a922, 0x4bf1ff9f0062baa8), // 5^-261 + (0x9d71ac8fada6c9b5, 0x6f773fc3603db4a9), // 5^-260 + (0xc4ce17b399107c22, 0xcb550fb4384d21d3), // 5^-259 + (0xf6019da07f549b2b, 0x7e2a53a146606a48), // 5^-258 + (0x99c102844f94e0fb, 0x2eda7444cbfc426d), // 5^-257 + (0xc0314325637a1939, 0xfa911155fefb5308), // 5^-256 + (0xf03d93eebc589f88, 0x793555ab7eba27ca), // 5^-255 + (0x96267c7535b763b5, 0x4bc1558b2f3458de), // 5^-254 + (0xbbb01b9283253ca2, 0x9eb1aaedfb016f16), // 5^-253 + (0xea9c227723ee8bcb, 0x465e15a979c1cadc), // 5^-252 + (0x92a1958a7675175f, 0xbfacd89ec191ec9), // 5^-251 + (0xb749faed14125d36, 0xcef980ec671f667b), // 5^-250 + (0xe51c79a85916f484, 0x82b7e12780e7401a), // 5^-249 + (0x8f31cc0937ae58d2, 0xd1b2ecb8b0908810), // 5^-248 + (0xb2fe3f0b8599ef07, 0x861fa7e6dcb4aa15), // 5^-247 + (0xdfbdcece67006ac9, 0x67a791e093e1d49a), // 5^-246 + (0x8bd6a141006042bd, 0xe0c8bb2c5c6d24e0), // 5^-245 + (0xaecc49914078536d, 0x58fae9f773886e18), // 5^-244 + (0xda7f5bf590966848, 0xaf39a475506a899e), // 5^-243 + (0x888f99797a5e012d, 0x6d8406c952429603), // 5^-242 + (0xaab37fd7d8f58178, 0xc8e5087ba6d33b83), // 5^-241 + (0xd5605fcdcf32e1d6, 0xfb1e4a9a90880a64), // 5^-240 + (0x855c3be0a17fcd26, 0x5cf2eea09a55067f), // 5^-239 + (0xa6b34ad8c9dfc06f, 0xf42faa48c0ea481e), // 5^-238 + (0xd0601d8efc57b08b, 0xf13b94daf124da26), // 5^-237 + (0x823c12795db6ce57, 0x76c53d08d6b70858), // 5^-236 + (0xa2cb1717b52481ed, 0x54768c4b0c64ca6e), // 5^-235 + (0xcb7ddcdda26da268, 0xa9942f5dcf7dfd09), // 5^-234 + (0xfe5d54150b090b02, 0xd3f93b35435d7c4c), // 5^-233 + (0x9efa548d26e5a6e1, 0xc47bc5014a1a6daf), // 5^-232 + (0xc6b8e9b0709f109a, 0x359ab6419ca1091b), // 5^-231 + (0xf867241c8cc6d4c0, 0xc30163d203c94b62), // 5^-230 + (0x9b407691d7fc44f8, 0x79e0de63425dcf1d), // 5^-229 + (0xc21094364dfb5636, 0x985915fc12f542e4), // 5^-228 + (0xf294b943e17a2bc4, 0x3e6f5b7b17b2939d), // 5^-227 + (0x979cf3ca6cec5b5a, 0xa705992ceecf9c42), // 5^-226 + (0xbd8430bd08277231, 0x50c6ff782a838353), // 5^-225 + (0xece53cec4a314ebd, 0xa4f8bf5635246428), // 5^-224 + (0x940f4613ae5ed136, 0x871b7795e136be99), // 5^-223 + (0xb913179899f68584, 0x28e2557b59846e3f), // 5^-222 + (0xe757dd7ec07426e5, 0x331aeada2fe589cf), // 5^-221 + (0x9096ea6f3848984f, 0x3ff0d2c85def7621), // 5^-220 + (0xb4bca50b065abe63, 0xfed077a756b53a9), // 5^-219 + (0xe1ebce4dc7f16dfb, 0xd3e8495912c62894), // 5^-218 + (0x8d3360f09cf6e4bd, 0x64712dd7abbbd95c), // 5^-217 + (0xb080392cc4349dec, 0xbd8d794d96aacfb3), // 5^-216 + (0xdca04777f541c567, 0xecf0d7a0fc5583a0), // 5^-215 + (0x89e42caaf9491b60, 0xf41686c49db57244), // 5^-214 + (0xac5d37d5b79b6239, 0x311c2875c522ced5), // 5^-213 + (0xd77485cb25823ac7, 0x7d633293366b828b), // 5^-212 + (0x86a8d39ef77164bc, 0xae5dff9c02033197), // 5^-211 + (0xa8530886b54dbdeb, 0xd9f57f830283fdfc), // 5^-210 + (0xd267caa862a12d66, 0xd072df63c324fd7b), // 5^-209 + (0x8380dea93da4bc60, 0x4247cb9e59f71e6d), // 5^-208 + (0xa46116538d0deb78, 0x52d9be85f074e608), // 5^-207 + (0xcd795be870516656, 0x67902e276c921f8b), // 5^-206 + (0x806bd9714632dff6, 0xba1cd8a3db53b6), // 5^-205 + (0xa086cfcd97bf97f3, 0x80e8a40eccd228a4), // 5^-204 + (0xc8a883c0fdaf7df0, 0x6122cd128006b2cd), // 5^-203 + (0xfad2a4b13d1b5d6c, 0x796b805720085f81), // 5^-202 + (0x9cc3a6eec6311a63, 0xcbe3303674053bb0), // 5^-201 + (0xc3f490aa77bd60fc, 0xbedbfc4411068a9c), // 5^-200 + (0xf4f1b4d515acb93b, 0xee92fb5515482d44), // 5^-199 + (0x991711052d8bf3c5, 0x751bdd152d4d1c4a), // 5^-198 + (0xbf5cd54678eef0b6, 0xd262d45a78a0635d), // 5^-197 + (0xef340a98172aace4, 0x86fb897116c87c34), // 5^-196 + (0x9580869f0e7aac0e, 0xd45d35e6ae3d4da0), // 5^-195 + (0xbae0a846d2195712, 0x8974836059cca109), // 5^-194 + (0xe998d258869facd7, 0x2bd1a438703fc94b), // 5^-193 + (0x91ff83775423cc06, 0x7b6306a34627ddcf), // 5^-192 + (0xb67f6455292cbf08, 0x1a3bc84c17b1d542), // 5^-191 + (0xe41f3d6a7377eeca, 0x20caba5f1d9e4a93), // 5^-190 + (0x8e938662882af53e, 0x547eb47b7282ee9c), // 5^-189 + (0xb23867fb2a35b28d, 0xe99e619a4f23aa43), // 5^-188 + (0xdec681f9f4c31f31, 0x6405fa00e2ec94d4), // 5^-187 + (0x8b3c113c38f9f37e, 0xde83bc408dd3dd04), // 5^-186 + (0xae0b158b4738705e, 0x9624ab50b148d445), // 5^-185 + (0xd98ddaee19068c76, 0x3badd624dd9b0957), // 5^-184 + (0x87f8a8d4cfa417c9, 0xe54ca5d70a80e5d6), // 5^-183 + (0xa9f6d30a038d1dbc, 0x5e9fcf4ccd211f4c), // 5^-182 + (0xd47487cc8470652b, 0x7647c3200069671f), // 5^-181 + (0x84c8d4dfd2c63f3b, 0x29ecd9f40041e073), // 5^-180 + (0xa5fb0a17c777cf09, 0xf468107100525890), // 5^-179 + (0xcf79cc9db955c2cc, 0x7182148d4066eeb4), // 5^-178 + (0x81ac1fe293d599bf, 0xc6f14cd848405530), // 5^-177 + (0xa21727db38cb002f, 0xb8ada00e5a506a7c), // 5^-176 + (0xca9cf1d206fdc03b, 0xa6d90811f0e4851c), // 5^-175 + (0xfd442e4688bd304a, 0x908f4a166d1da663), // 5^-174 + (0x9e4a9cec15763e2e, 0x9a598e4e043287fe), // 5^-173 + (0xc5dd44271ad3cdba, 0x40eff1e1853f29fd), // 5^-172 + (0xf7549530e188c128, 0xd12bee59e68ef47c), // 5^-171 + (0x9a94dd3e8cf578b9, 0x82bb74f8301958ce), // 5^-170 + (0xc13a148e3032d6e7, 0xe36a52363c1faf01), // 5^-169 + (0xf18899b1bc3f8ca1, 0xdc44e6c3cb279ac1), // 5^-168 + (0x96f5600f15a7b7e5, 0x29ab103a5ef8c0b9), // 5^-167 + (0xbcb2b812db11a5de, 0x7415d448f6b6f0e7), // 5^-166 + (0xebdf661791d60f56, 0x111b495b3464ad21), // 5^-165 + (0x936b9fcebb25c995, 0xcab10dd900beec34), // 5^-164 + (0xb84687c269ef3bfb, 0x3d5d514f40eea742), // 5^-163 + (0xe65829b3046b0afa, 0xcb4a5a3112a5112), // 5^-162 + (0x8ff71a0fe2c2e6dc, 0x47f0e785eaba72ab), // 5^-161 + (0xb3f4e093db73a093, 0x59ed216765690f56), // 5^-160 + (0xe0f218b8d25088b8, 0x306869c13ec3532c), // 5^-159 + (0x8c974f7383725573, 0x1e414218c73a13fb), // 5^-158 + (0xafbd2350644eeacf, 0xe5d1929ef90898fa), // 5^-157 + (0xdbac6c247d62a583, 0xdf45f746b74abf39), // 5^-156 + (0x894bc396ce5da772, 0x6b8bba8c328eb783), // 5^-155 + (0xab9eb47c81f5114f, 0x66ea92f3f326564), // 5^-154 + (0xd686619ba27255a2, 0xc80a537b0efefebd), // 5^-153 + (0x8613fd0145877585, 0xbd06742ce95f5f36), // 5^-152 + (0xa798fc4196e952e7, 0x2c48113823b73704), // 5^-151 + (0xd17f3b51fca3a7a0, 0xf75a15862ca504c5), // 5^-150 + (0x82ef85133de648c4, 0x9a984d73dbe722fb), // 5^-149 + (0xa3ab66580d5fdaf5, 0xc13e60d0d2e0ebba), // 5^-148 + (0xcc963fee10b7d1b3, 0x318df905079926a8), // 5^-147 + (0xffbbcfe994e5c61f, 0xfdf17746497f7052), // 5^-146 + (0x9fd561f1fd0f9bd3, 0xfeb6ea8bedefa633), // 5^-145 + (0xc7caba6e7c5382c8, 0xfe64a52ee96b8fc0), // 5^-144 + (0xf9bd690a1b68637b, 0x3dfdce7aa3c673b0), // 5^-143 + (0x9c1661a651213e2d, 0x6bea10ca65c084e), // 5^-142 + (0xc31bfa0fe5698db8, 0x486e494fcff30a62), // 5^-141 + (0xf3e2f893dec3f126, 0x5a89dba3c3efccfa), // 5^-140 + (0x986ddb5c6b3a76b7, 0xf89629465a75e01c), // 5^-139 + (0xbe89523386091465, 0xf6bbb397f1135823), // 5^-138 + (0xee2ba6c0678b597f, 0x746aa07ded582e2c), // 5^-137 + (0x94db483840b717ef, 0xa8c2a44eb4571cdc), // 5^-136 + (0xba121a4650e4ddeb, 0x92f34d62616ce413), // 5^-135 + (0xe896a0d7e51e1566, 0x77b020baf9c81d17), // 5^-134 + (0x915e2486ef32cd60, 0xace1474dc1d122e), // 5^-133 + (0xb5b5ada8aaff80b8, 0xd819992132456ba), // 5^-132 + (0xe3231912d5bf60e6, 0x10e1fff697ed6c69), // 5^-131 + (0x8df5efabc5979c8f, 0xca8d3ffa1ef463c1), // 5^-130 + (0xb1736b96b6fd83b3, 0xbd308ff8a6b17cb2), // 5^-129 + (0xddd0467c64bce4a0, 0xac7cb3f6d05ddbde), // 5^-128 + (0x8aa22c0dbef60ee4, 0x6bcdf07a423aa96b), // 5^-127 + (0xad4ab7112eb3929d, 0x86c16c98d2c953c6), // 5^-126 + (0xd89d64d57a607744, 0xe871c7bf077ba8b7), // 5^-125 + (0x87625f056c7c4a8b, 0x11471cd764ad4972), // 5^-124 + (0xa93af6c6c79b5d2d, 0xd598e40d3dd89bcf), // 5^-123 + (0xd389b47879823479, 0x4aff1d108d4ec2c3), // 5^-122 + (0x843610cb4bf160cb, 0xcedf722a585139ba), // 5^-121 + (0xa54394fe1eedb8fe, 0xc2974eb4ee658828), // 5^-120 + (0xce947a3da6a9273e, 0x733d226229feea32), // 5^-119 + (0x811ccc668829b887, 0x806357d5a3f525f), // 5^-118 + (0xa163ff802a3426a8, 0xca07c2dcb0cf26f7), // 5^-117 + (0xc9bcff6034c13052, 0xfc89b393dd02f0b5), // 5^-116 + (0xfc2c3f3841f17c67, 0xbbac2078d443ace2), // 5^-115 + (0x9d9ba7832936edc0, 0xd54b944b84aa4c0d), // 5^-114 + (0xc5029163f384a931, 0xa9e795e65d4df11), // 5^-113 + (0xf64335bcf065d37d, 0x4d4617b5ff4a16d5), // 5^-112 + (0x99ea0196163fa42e, 0x504bced1bf8e4e45), // 5^-111 + (0xc06481fb9bcf8d39, 0xe45ec2862f71e1d6), // 5^-110 + (0xf07da27a82c37088, 0x5d767327bb4e5a4c), // 5^-109 + (0x964e858c91ba2655, 0x3a6a07f8d510f86f), // 5^-108 + (0xbbe226efb628afea, 0x890489f70a55368b), // 5^-107 + (0xeadab0aba3b2dbe5, 0x2b45ac74ccea842e), // 5^-106 + (0x92c8ae6b464fc96f, 0x3b0b8bc90012929d), // 5^-105 + (0xb77ada0617e3bbcb, 0x9ce6ebb40173744), // 5^-104 + (0xe55990879ddcaabd, 0xcc420a6a101d0515), // 5^-103 + (0x8f57fa54c2a9eab6, 0x9fa946824a12232d), // 5^-102 + (0xb32df8e9f3546564, 0x47939822dc96abf9), // 5^-101 + (0xdff9772470297ebd, 0x59787e2b93bc56f7), // 5^-100 + (0x8bfbea76c619ef36, 0x57eb4edb3c55b65a), // 5^-99 + (0xaefae51477a06b03, 0xede622920b6b23f1), // 5^-98 + (0xdab99e59958885c4, 0xe95fab368e45eced), // 5^-97 + (0x88b402f7fd75539b, 0x11dbcb0218ebb414), // 5^-96 + (0xaae103b5fcd2a881, 0xd652bdc29f26a119), // 5^-95 + (0xd59944a37c0752a2, 0x4be76d3346f0495f), // 5^-94 + (0x857fcae62d8493a5, 0x6f70a4400c562ddb), // 5^-93 + (0xa6dfbd9fb8e5b88e, 0xcb4ccd500f6bb952), // 5^-92 + (0xd097ad07a71f26b2, 0x7e2000a41346a7a7), // 5^-91 + (0x825ecc24c873782f, 0x8ed400668c0c28c8), // 5^-90 + (0xa2f67f2dfa90563b, 0x728900802f0f32fa), // 5^-89 + (0xcbb41ef979346bca, 0x4f2b40a03ad2ffb9), // 5^-88 + (0xfea126b7d78186bc, 0xe2f610c84987bfa8), // 5^-87 + (0x9f24b832e6b0f436, 0xdd9ca7d2df4d7c9), // 5^-86 + (0xc6ede63fa05d3143, 0x91503d1c79720dbb), // 5^-85 + (0xf8a95fcf88747d94, 0x75a44c6397ce912a), // 5^-84 + (0x9b69dbe1b548ce7c, 0xc986afbe3ee11aba), // 5^-83 + (0xc24452da229b021b, 0xfbe85badce996168), // 5^-82 + (0xf2d56790ab41c2a2, 0xfae27299423fb9c3), // 5^-81 + (0x97c560ba6b0919a5, 0xdccd879fc967d41a), // 5^-80 + (0xbdb6b8e905cb600f, 0x5400e987bbc1c920), // 5^-79 + (0xed246723473e3813, 0x290123e9aab23b68), // 5^-78 + (0x9436c0760c86e30b, 0xf9a0b6720aaf6521), // 5^-77 + (0xb94470938fa89bce, 0xf808e40e8d5b3e69), // 5^-76 + (0xe7958cb87392c2c2, 0xb60b1d1230b20e04), // 5^-75 + (0x90bd77f3483bb9b9, 0xb1c6f22b5e6f48c2), // 5^-74 + (0xb4ecd5f01a4aa828, 0x1e38aeb6360b1af3), // 5^-73 + (0xe2280b6c20dd5232, 0x25c6da63c38de1b0), // 5^-72 + (0x8d590723948a535f, 0x579c487e5a38ad0e), // 5^-71 + (0xb0af48ec79ace837, 0x2d835a9df0c6d851), // 5^-70 + (0xdcdb1b2798182244, 0xf8e431456cf88e65), // 5^-69 + (0x8a08f0f8bf0f156b, 0x1b8e9ecb641b58ff), // 5^-68 + (0xac8b2d36eed2dac5, 0xe272467e3d222f3f), // 5^-67 + (0xd7adf884aa879177, 0x5b0ed81dcc6abb0f), // 5^-66 + (0x86ccbb52ea94baea, 0x98e947129fc2b4e9), // 5^-65 + (0xa87fea27a539e9a5, 0x3f2398d747b36224), // 5^-64 + (0xd29fe4b18e88640e, 0x8eec7f0d19a03aad), // 5^-63 + (0x83a3eeeef9153e89, 0x1953cf68300424ac), // 5^-62 + (0xa48ceaaab75a8e2b, 0x5fa8c3423c052dd7), // 5^-61 + (0xcdb02555653131b6, 0x3792f412cb06794d), // 5^-60 + (0x808e17555f3ebf11, 0xe2bbd88bbee40bd0), // 5^-59 + (0xa0b19d2ab70e6ed6, 0x5b6aceaeae9d0ec4), // 5^-58 + (0xc8de047564d20a8b, 0xf245825a5a445275), // 5^-57 + (0xfb158592be068d2e, 0xeed6e2f0f0d56712), // 5^-56 + (0x9ced737bb6c4183d, 0x55464dd69685606b), // 5^-55 + (0xc428d05aa4751e4c, 0xaa97e14c3c26b886), // 5^-54 + (0xf53304714d9265df, 0xd53dd99f4b3066a8), // 5^-53 + (0x993fe2c6d07b7fab, 0xe546a8038efe4029), // 5^-52 + (0xbf8fdb78849a5f96, 0xde98520472bdd033), // 5^-51 + (0xef73d256a5c0f77c, 0x963e66858f6d4440), // 5^-50 + (0x95a8637627989aad, 0xdde7001379a44aa8), // 5^-49 + (0xbb127c53b17ec159, 0x5560c018580d5d52), // 5^-48 + (0xe9d71b689dde71af, 0xaab8f01e6e10b4a6), // 5^-47 + (0x9226712162ab070d, 0xcab3961304ca70e8), // 5^-46 + (0xb6b00d69bb55c8d1, 0x3d607b97c5fd0d22), // 5^-45 + (0xe45c10c42a2b3b05, 0x8cb89a7db77c506a), // 5^-44 + (0x8eb98a7a9a5b04e3, 0x77f3608e92adb242), // 5^-43 + (0xb267ed1940f1c61c, 0x55f038b237591ed3), // 5^-42 + (0xdf01e85f912e37a3, 0x6b6c46dec52f6688), // 5^-41 + (0x8b61313bbabce2c6, 0x2323ac4b3b3da015), // 5^-40 + (0xae397d8aa96c1b77, 0xabec975e0a0d081a), // 5^-39 + (0xd9c7dced53c72255, 0x96e7bd358c904a21), // 5^-38 + (0x881cea14545c7575, 0x7e50d64177da2e54), // 5^-37 + (0xaa242499697392d2, 0xdde50bd1d5d0b9e9), // 5^-36 + (0xd4ad2dbfc3d07787, 0x955e4ec64b44e864), // 5^-35 + (0x84ec3c97da624ab4, 0xbd5af13bef0b113e), // 5^-34 + (0xa6274bbdd0fadd61, 0xecb1ad8aeacdd58e), // 5^-33 + (0xcfb11ead453994ba, 0x67de18eda5814af2), // 5^-32 + (0x81ceb32c4b43fcf4, 0x80eacf948770ced7), // 5^-31 + (0xa2425ff75e14fc31, 0xa1258379a94d028d), // 5^-30 + (0xcad2f7f5359a3b3e, 0x96ee45813a04330), // 5^-29 + (0xfd87b5f28300ca0d, 0x8bca9d6e188853fc), // 5^-28 + (0x9e74d1b791e07e48, 0x775ea264cf55347e), // 5^-27 + (0xc612062576589dda, 0x95364afe032a819e), // 5^-26 + (0xf79687aed3eec551, 0x3a83ddbd83f52205), // 5^-25 + (0x9abe14cd44753b52, 0xc4926a9672793543), // 5^-24 + (0xc16d9a0095928a27, 0x75b7053c0f178294), // 5^-23 + (0xf1c90080baf72cb1, 0x5324c68b12dd6339), // 5^-22 + (0x971da05074da7bee, 0xd3f6fc16ebca5e04), // 5^-21 + (0xbce5086492111aea, 0x88f4bb1ca6bcf585), // 5^-20 + (0xec1e4a7db69561a5, 0x2b31e9e3d06c32e6), // 5^-19 + (0x9392ee8e921d5d07, 0x3aff322e62439fd0), // 5^-18 + (0xb877aa3236a4b449, 0x9befeb9fad487c3), // 5^-17 + (0xe69594bec44de15b, 0x4c2ebe687989a9b4), // 5^-16 + (0x901d7cf73ab0acd9, 0xf9d37014bf60a11), // 5^-15 + (0xb424dc35095cd80f, 0x538484c19ef38c95), // 5^-14 + (0xe12e13424bb40e13, 0x2865a5f206b06fba), // 5^-13 + (0x8cbccc096f5088cb, 0xf93f87b7442e45d4), // 5^-12 + (0xafebff0bcb24aafe, 0xf78f69a51539d749), // 5^-11 + (0xdbe6fecebdedd5be, 0xb573440e5a884d1c), // 5^-10 + (0x89705f4136b4a597, 0x31680a88f8953031), // 5^-9 + (0xabcc77118461cefc, 0xfdc20d2b36ba7c3e), // 5^-8 + (0xd6bf94d5e57a42bc, 0x3d32907604691b4d), // 5^-7 + (0x8637bd05af6c69b5, 0xa63f9a49c2c1b110), // 5^-6 + (0xa7c5ac471b478423, 0xfcf80dc33721d54), // 5^-5 + (0xd1b71758e219652b, 0xd3c36113404ea4a9), // 5^-4 + (0x83126e978d4fdf3b, 0x645a1cac083126ea), // 5^-3 + (0xa3d70a3d70a3d70a, 0x3d70a3d70a3d70a4), // 5^-2 + (0xcccccccccccccccc, 0xcccccccccccccccd), // 5^-1 + (0x8000000000000000, 0x0), // 5^0 + (0xa000000000000000, 0x0), // 5^1 + (0xc800000000000000, 0x0), // 5^2 + (0xfa00000000000000, 0x0), // 5^3 + (0x9c40000000000000, 0x0), // 5^4 + (0xc350000000000000, 0x0), // 5^5 + (0xf424000000000000, 0x0), // 5^6 + (0x9896800000000000, 0x0), // 5^7 + (0xbebc200000000000, 0x0), // 5^8 + (0xee6b280000000000, 0x0), // 5^9 + (0x9502f90000000000, 0x0), // 5^10 + (0xba43b74000000000, 0x0), // 5^11 + (0xe8d4a51000000000, 0x0), // 5^12 + (0x9184e72a00000000, 0x0), // 5^13 + (0xb5e620f480000000, 0x0), // 5^14 + (0xe35fa931a0000000, 0x0), // 5^15 + (0x8e1bc9bf04000000, 0x0), // 5^16 + (0xb1a2bc2ec5000000, 0x0), // 5^17 + (0xde0b6b3a76400000, 0x0), // 5^18 + (0x8ac7230489e80000, 0x0), // 5^19 + (0xad78ebc5ac620000, 0x0), // 5^20 + (0xd8d726b7177a8000, 0x0), // 5^21 + (0x878678326eac9000, 0x0), // 5^22 + (0xa968163f0a57b400, 0x0), // 5^23 + (0xd3c21bcecceda100, 0x0), // 5^24 + (0x84595161401484a0, 0x0), // 5^25 + (0xa56fa5b99019a5c8, 0x0), // 5^26 + (0xcecb8f27f4200f3a, 0x0), // 5^27 + (0x813f3978f8940984, 0x4000000000000000), // 5^28 + (0xa18f07d736b90be5, 0x5000000000000000), // 5^29 + (0xc9f2c9cd04674ede, 0xa400000000000000), // 5^30 + (0xfc6f7c4045812296, 0x4d00000000000000), // 5^31 + (0x9dc5ada82b70b59d, 0xf020000000000000), // 5^32 + (0xc5371912364ce305, 0x6c28000000000000), // 5^33 + (0xf684df56c3e01bc6, 0xc732000000000000), // 5^34 + (0x9a130b963a6c115c, 0x3c7f400000000000), // 5^35 + (0xc097ce7bc90715b3, 0x4b9f100000000000), // 5^36 + (0xf0bdc21abb48db20, 0x1e86d40000000000), // 5^37 + (0x96769950b50d88f4, 0x1314448000000000), // 5^38 + (0xbc143fa4e250eb31, 0x17d955a000000000), // 5^39 + (0xeb194f8e1ae525fd, 0x5dcfab0800000000), // 5^40 + (0x92efd1b8d0cf37be, 0x5aa1cae500000000), // 5^41 + (0xb7abc627050305ad, 0xf14a3d9e40000000), // 5^42 + (0xe596b7b0c643c719, 0x6d9ccd05d0000000), // 5^43 + (0x8f7e32ce7bea5c6f, 0xe4820023a2000000), // 5^44 + (0xb35dbf821ae4f38b, 0xdda2802c8a800000), // 5^45 + (0xe0352f62a19e306e, 0xd50b2037ad200000), // 5^46 + (0x8c213d9da502de45, 0x4526f422cc340000), // 5^47 + (0xaf298d050e4395d6, 0x9670b12b7f410000), // 5^48 + (0xdaf3f04651d47b4c, 0x3c0cdd765f114000), // 5^49 + (0x88d8762bf324cd0f, 0xa5880a69fb6ac800), // 5^50 + (0xab0e93b6efee0053, 0x8eea0d047a457a00), // 5^51 + (0xd5d238a4abe98068, 0x72a4904598d6d880), // 5^52 + (0x85a36366eb71f041, 0x47a6da2b7f864750), // 5^53 + (0xa70c3c40a64e6c51, 0x999090b65f67d924), // 5^54 + (0xd0cf4b50cfe20765, 0xfff4b4e3f741cf6d), // 5^55 + (0x82818f1281ed449f, 0xbff8f10e7a8921a4), // 5^56 + (0xa321f2d7226895c7, 0xaff72d52192b6a0d), // 5^57 + (0xcbea6f8ceb02bb39, 0x9bf4f8a69f764490), // 5^58 + (0xfee50b7025c36a08, 0x2f236d04753d5b4), // 5^59 + (0x9f4f2726179a2245, 0x1d762422c946590), // 5^60 + (0xc722f0ef9d80aad6, 0x424d3ad2b7b97ef5), // 5^61 + (0xf8ebad2b84e0d58b, 0xd2e0898765a7deb2), // 5^62 + (0x9b934c3b330c8577, 0x63cc55f49f88eb2f), // 5^63 + (0xc2781f49ffcfa6d5, 0x3cbf6b71c76b25fb), // 5^64 + (0xf316271c7fc3908a, 0x8bef464e3945ef7a), // 5^65 + (0x97edd871cfda3a56, 0x97758bf0e3cbb5ac), // 5^66 + (0xbde94e8e43d0c8ec, 0x3d52eeed1cbea317), // 5^67 + (0xed63a231d4c4fb27, 0x4ca7aaa863ee4bdd), // 5^68 + (0x945e455f24fb1cf8, 0x8fe8caa93e74ef6a), // 5^69 + (0xb975d6b6ee39e436, 0xb3e2fd538e122b44), // 5^70 + (0xe7d34c64a9c85d44, 0x60dbbca87196b616), // 5^71 + (0x90e40fbeea1d3a4a, 0xbc8955e946fe31cd), // 5^72 + (0xb51d13aea4a488dd, 0x6babab6398bdbe41), // 5^73 + (0xe264589a4dcdab14, 0xc696963c7eed2dd1), // 5^74 + (0x8d7eb76070a08aec, 0xfc1e1de5cf543ca2), // 5^75 + (0xb0de65388cc8ada8, 0x3b25a55f43294bcb), // 5^76 + (0xdd15fe86affad912, 0x49ef0eb713f39ebe), // 5^77 + (0x8a2dbf142dfcc7ab, 0x6e3569326c784337), // 5^78 + (0xacb92ed9397bf996, 0x49c2c37f07965404), // 5^79 + (0xd7e77a8f87daf7fb, 0xdc33745ec97be906), // 5^80 + (0x86f0ac99b4e8dafd, 0x69a028bb3ded71a3), // 5^81 + (0xa8acd7c0222311bc, 0xc40832ea0d68ce0c), // 5^82 + (0xd2d80db02aabd62b, 0xf50a3fa490c30190), // 5^83 + (0x83c7088e1aab65db, 0x792667c6da79e0fa), // 5^84 + (0xa4b8cab1a1563f52, 0x577001b891185938), // 5^85 + (0xcde6fd5e09abcf26, 0xed4c0226b55e6f86), // 5^86 + (0x80b05e5ac60b6178, 0x544f8158315b05b4), // 5^87 + (0xa0dc75f1778e39d6, 0x696361ae3db1c721), // 5^88 + (0xc913936dd571c84c, 0x3bc3a19cd1e38e9), // 5^89 + (0xfb5878494ace3a5f, 0x4ab48a04065c723), // 5^90 + (0x9d174b2dcec0e47b, 0x62eb0d64283f9c76), // 5^91 + (0xc45d1df942711d9a, 0x3ba5d0bd324f8394), // 5^92 + (0xf5746577930d6500, 0xca8f44ec7ee36479), // 5^93 + (0x9968bf6abbe85f20, 0x7e998b13cf4e1ecb), // 5^94 + (0xbfc2ef456ae276e8, 0x9e3fedd8c321a67e), // 5^95 + (0xefb3ab16c59b14a2, 0xc5cfe94ef3ea101e), // 5^96 + (0x95d04aee3b80ece5, 0xbba1f1d158724a12), // 5^97 + (0xbb445da9ca61281f, 0x2a8a6e45ae8edc97), // 5^98 + (0xea1575143cf97226, 0xf52d09d71a3293bd), // 5^99 + (0x924d692ca61be758, 0x593c2626705f9c56), // 5^100 + (0xb6e0c377cfa2e12e, 0x6f8b2fb00c77836c), // 5^101 + (0xe498f455c38b997a, 0xb6dfb9c0f956447), // 5^102 + (0x8edf98b59a373fec, 0x4724bd4189bd5eac), // 5^103 + (0xb2977ee300c50fe7, 0x58edec91ec2cb657), // 5^104 + (0xdf3d5e9bc0f653e1, 0x2f2967b66737e3ed), // 5^105 + (0x8b865b215899f46c, 0xbd79e0d20082ee74), // 5^106 + (0xae67f1e9aec07187, 0xecd8590680a3aa11), // 5^107 + (0xda01ee641a708de9, 0xe80e6f4820cc9495), // 5^108 + (0x884134fe908658b2, 0x3109058d147fdcdd), // 5^109 + (0xaa51823e34a7eede, 0xbd4b46f0599fd415), // 5^110 + (0xd4e5e2cdc1d1ea96, 0x6c9e18ac7007c91a), // 5^111 + (0x850fadc09923329e, 0x3e2cf6bc604ddb0), // 5^112 + (0xa6539930bf6bff45, 0x84db8346b786151c), // 5^113 + (0xcfe87f7cef46ff16, 0xe612641865679a63), // 5^114 + (0x81f14fae158c5f6e, 0x4fcb7e8f3f60c07e), // 5^115 + (0xa26da3999aef7749, 0xe3be5e330f38f09d), // 5^116 + (0xcb090c8001ab551c, 0x5cadf5bfd3072cc5), // 5^117 + (0xfdcb4fa002162a63, 0x73d9732fc7c8f7f6), // 5^118 + (0x9e9f11c4014dda7e, 0x2867e7fddcdd9afa), // 5^119 + (0xc646d63501a1511d, 0xb281e1fd541501b8), // 5^120 + (0xf7d88bc24209a565, 0x1f225a7ca91a4226), // 5^121 + (0x9ae757596946075f, 0x3375788de9b06958), // 5^122 + (0xc1a12d2fc3978937, 0x52d6b1641c83ae), // 5^123 + (0xf209787bb47d6b84, 0xc0678c5dbd23a49a), // 5^124 + (0x9745eb4d50ce6332, 0xf840b7ba963646e0), // 5^125 + (0xbd176620a501fbff, 0xb650e5a93bc3d898), // 5^126 + (0xec5d3fa8ce427aff, 0xa3e51f138ab4cebe), // 5^127 + (0x93ba47c980e98cdf, 0xc66f336c36b10137), // 5^128 + (0xb8a8d9bbe123f017, 0xb80b0047445d4184), // 5^129 + (0xe6d3102ad96cec1d, 0xa60dc059157491e5), // 5^130 + (0x9043ea1ac7e41392, 0x87c89837ad68db2f), // 5^131 + (0xb454e4a179dd1877, 0x29babe4598c311fb), // 5^132 + (0xe16a1dc9d8545e94, 0xf4296dd6fef3d67a), // 5^133 + (0x8ce2529e2734bb1d, 0x1899e4a65f58660c), // 5^134 + (0xb01ae745b101e9e4, 0x5ec05dcff72e7f8f), // 5^135 + (0xdc21a1171d42645d, 0x76707543f4fa1f73), // 5^136 + (0x899504ae72497eba, 0x6a06494a791c53a8), // 5^137 + (0xabfa45da0edbde69, 0x487db9d17636892), // 5^138 + (0xd6f8d7509292d603, 0x45a9d2845d3c42b6), // 5^139 + (0x865b86925b9bc5c2, 0xb8a2392ba45a9b2), // 5^140 + (0xa7f26836f282b732, 0x8e6cac7768d7141e), // 5^141 + (0xd1ef0244af2364ff, 0x3207d795430cd926), // 5^142 + (0x8335616aed761f1f, 0x7f44e6bd49e807b8), // 5^143 + (0xa402b9c5a8d3a6e7, 0x5f16206c9c6209a6), // 5^144 + (0xcd036837130890a1, 0x36dba887c37a8c0f), // 5^145 + (0x802221226be55a64, 0xc2494954da2c9789), // 5^146 + (0xa02aa96b06deb0fd, 0xf2db9baa10b7bd6c), // 5^147 + (0xc83553c5c8965d3d, 0x6f92829494e5acc7), // 5^148 + (0xfa42a8b73abbf48c, 0xcb772339ba1f17f9), // 5^149 + (0x9c69a97284b578d7, 0xff2a760414536efb), // 5^150 + (0xc38413cf25e2d70d, 0xfef5138519684aba), // 5^151 + (0xf46518c2ef5b8cd1, 0x7eb258665fc25d69), // 5^152 + (0x98bf2f79d5993802, 0xef2f773ffbd97a61), // 5^153 + (0xbeeefb584aff8603, 0xaafb550ffacfd8fa), // 5^154 + (0xeeaaba2e5dbf6784, 0x95ba2a53f983cf38), // 5^155 + (0x952ab45cfa97a0b2, 0xdd945a747bf26183), // 5^156 + (0xba756174393d88df, 0x94f971119aeef9e4), // 5^157 + (0xe912b9d1478ceb17, 0x7a37cd5601aab85d), // 5^158 + (0x91abb422ccb812ee, 0xac62e055c10ab33a), // 5^159 + (0xb616a12b7fe617aa, 0x577b986b314d6009), // 5^160 + (0xe39c49765fdf9d94, 0xed5a7e85fda0b80b), // 5^161 + (0x8e41ade9fbebc27d, 0x14588f13be847307), // 5^162 + (0xb1d219647ae6b31c, 0x596eb2d8ae258fc8), // 5^163 + (0xde469fbd99a05fe3, 0x6fca5f8ed9aef3bb), // 5^164 + (0x8aec23d680043bee, 0x25de7bb9480d5854), // 5^165 + (0xada72ccc20054ae9, 0xaf561aa79a10ae6a), // 5^166 + (0xd910f7ff28069da4, 0x1b2ba1518094da04), // 5^167 + (0x87aa9aff79042286, 0x90fb44d2f05d0842), // 5^168 + (0xa99541bf57452b28, 0x353a1607ac744a53), // 5^169 + (0xd3fa922f2d1675f2, 0x42889b8997915ce8), // 5^170 + (0x847c9b5d7c2e09b7, 0x69956135febada11), // 5^171 + (0xa59bc234db398c25, 0x43fab9837e699095), // 5^172 + (0xcf02b2c21207ef2e, 0x94f967e45e03f4bb), // 5^173 + (0x8161afb94b44f57d, 0x1d1be0eebac278f5), // 5^174 + (0xa1ba1ba79e1632dc, 0x6462d92a69731732), // 5^175 + (0xca28a291859bbf93, 0x7d7b8f7503cfdcfe), // 5^176 + (0xfcb2cb35e702af78, 0x5cda735244c3d43e), // 5^177 + (0x9defbf01b061adab, 0x3a0888136afa64a7), // 5^178 + (0xc56baec21c7a1916, 0x88aaa1845b8fdd0), // 5^179 + (0xf6c69a72a3989f5b, 0x8aad549e57273d45), // 5^180 + (0x9a3c2087a63f6399, 0x36ac54e2f678864b), // 5^181 + (0xc0cb28a98fcf3c7f, 0x84576a1bb416a7dd), // 5^182 + (0xf0fdf2d3f3c30b9f, 0x656d44a2a11c51d5), // 5^183 + (0x969eb7c47859e743, 0x9f644ae5a4b1b325), // 5^184 + (0xbc4665b596706114, 0x873d5d9f0dde1fee), // 5^185 + (0xeb57ff22fc0c7959, 0xa90cb506d155a7ea), // 5^186 + (0x9316ff75dd87cbd8, 0x9a7f12442d588f2), // 5^187 + (0xb7dcbf5354e9bece, 0xc11ed6d538aeb2f), // 5^188 + (0xe5d3ef282a242e81, 0x8f1668c8a86da5fa), // 5^189 + (0x8fa475791a569d10, 0xf96e017d694487bc), // 5^190 + (0xb38d92d760ec4455, 0x37c981dcc395a9ac), // 5^191 + (0xe070f78d3927556a, 0x85bbe253f47b1417), // 5^192 + (0x8c469ab843b89562, 0x93956d7478ccec8e), // 5^193 + (0xaf58416654a6babb, 0x387ac8d1970027b2), // 5^194 + (0xdb2e51bfe9d0696a, 0x6997b05fcc0319e), // 5^195 + (0x88fcf317f22241e2, 0x441fece3bdf81f03), // 5^196 + (0xab3c2fddeeaad25a, 0xd527e81cad7626c3), // 5^197 + (0xd60b3bd56a5586f1, 0x8a71e223d8d3b074), // 5^198 + (0x85c7056562757456, 0xf6872d5667844e49), // 5^199 + (0xa738c6bebb12d16c, 0xb428f8ac016561db), // 5^200 + (0xd106f86e69d785c7, 0xe13336d701beba52), // 5^201 + (0x82a45b450226b39c, 0xecc0024661173473), // 5^202 + (0xa34d721642b06084, 0x27f002d7f95d0190), // 5^203 + (0xcc20ce9bd35c78a5, 0x31ec038df7b441f4), // 5^204 + (0xff290242c83396ce, 0x7e67047175a15271), // 5^205 + (0x9f79a169bd203e41, 0xf0062c6e984d386), // 5^206 + (0xc75809c42c684dd1, 0x52c07b78a3e60868), // 5^207 + (0xf92e0c3537826145, 0xa7709a56ccdf8a82), // 5^208 + (0x9bbcc7a142b17ccb, 0x88a66076400bb691), // 5^209 + (0xc2abf989935ddbfe, 0x6acff893d00ea435), // 5^210 + (0xf356f7ebf83552fe, 0x583f6b8c4124d43), // 5^211 + (0x98165af37b2153de, 0xc3727a337a8b704a), // 5^212 + (0xbe1bf1b059e9a8d6, 0x744f18c0592e4c5c), // 5^213 + (0xeda2ee1c7064130c, 0x1162def06f79df73), // 5^214 + (0x9485d4d1c63e8be7, 0x8addcb5645ac2ba8), // 5^215 + (0xb9a74a0637ce2ee1, 0x6d953e2bd7173692), // 5^216 + (0xe8111c87c5c1ba99, 0xc8fa8db6ccdd0437), // 5^217 + (0x910ab1d4db9914a0, 0x1d9c9892400a22a2), // 5^218 + (0xb54d5e4a127f59c8, 0x2503beb6d00cab4b), // 5^219 + (0xe2a0b5dc971f303a, 0x2e44ae64840fd61d), // 5^220 + (0x8da471a9de737e24, 0x5ceaecfed289e5d2), // 5^221 + (0xb10d8e1456105dad, 0x7425a83e872c5f47), // 5^222 + (0xdd50f1996b947518, 0xd12f124e28f77719), // 5^223 + (0x8a5296ffe33cc92f, 0x82bd6b70d99aaa6f), // 5^224 + (0xace73cbfdc0bfb7b, 0x636cc64d1001550b), // 5^225 + (0xd8210befd30efa5a, 0x3c47f7e05401aa4e), // 5^226 + (0x8714a775e3e95c78, 0x65acfaec34810a71), // 5^227 + (0xa8d9d1535ce3b396, 0x7f1839a741a14d0d), // 5^228 + (0xd31045a8341ca07c, 0x1ede48111209a050), // 5^229 + (0x83ea2b892091e44d, 0x934aed0aab460432), // 5^230 + (0xa4e4b66b68b65d60, 0xf81da84d5617853f), // 5^231 + (0xce1de40642e3f4b9, 0x36251260ab9d668e), // 5^232 + (0x80d2ae83e9ce78f3, 0xc1d72b7c6b426019), // 5^233 + (0xa1075a24e4421730, 0xb24cf65b8612f81f), // 5^234 + (0xc94930ae1d529cfc, 0xdee033f26797b627), // 5^235 + (0xfb9b7cd9a4a7443c, 0x169840ef017da3b1), // 5^236 + (0x9d412e0806e88aa5, 0x8e1f289560ee864e), // 5^237 + (0xc491798a08a2ad4e, 0xf1a6f2bab92a27e2), // 5^238 + (0xf5b5d7ec8acb58a2, 0xae10af696774b1db), // 5^239 + (0x9991a6f3d6bf1765, 0xacca6da1e0a8ef29), // 5^240 + (0xbff610b0cc6edd3f, 0x17fd090a58d32af3), // 5^241 + (0xeff394dcff8a948e, 0xddfc4b4cef07f5b0), // 5^242 + (0x95f83d0a1fb69cd9, 0x4abdaf101564f98e), // 5^243 + (0xbb764c4ca7a4440f, 0x9d6d1ad41abe37f1), // 5^244 + (0xea53df5fd18d5513, 0x84c86189216dc5ed), // 5^245 + (0x92746b9be2f8552c, 0x32fd3cf5b4e49bb4), // 5^246 + (0xb7118682dbb66a77, 0x3fbc8c33221dc2a1), // 5^247 + (0xe4d5e82392a40515, 0xfabaf3feaa5334a), // 5^248 + (0x8f05b1163ba6832d, 0x29cb4d87f2a7400e), // 5^249 + (0xb2c71d5bca9023f8, 0x743e20e9ef511012), // 5^250 + (0xdf78e4b2bd342cf6, 0x914da9246b255416), // 5^251 + (0x8bab8eefb6409c1a, 0x1ad089b6c2f7548e), // 5^252 + (0xae9672aba3d0c320, 0xa184ac2473b529b1), // 5^253 + (0xda3c0f568cc4f3e8, 0xc9e5d72d90a2741e), // 5^254 + (0x8865899617fb1871, 0x7e2fa67c7a658892), // 5^255 + (0xaa7eebfb9df9de8d, 0xddbb901b98feeab7), // 5^256 + (0xd51ea6fa85785631, 0x552a74227f3ea565), // 5^257 + (0x8533285c936b35de, 0xd53a88958f87275f), // 5^258 + (0xa67ff273b8460356, 0x8a892abaf368f137), // 5^259 + (0xd01fef10a657842c, 0x2d2b7569b0432d85), // 5^260 + (0x8213f56a67f6b29b, 0x9c3b29620e29fc73), // 5^261 + (0xa298f2c501f45f42, 0x8349f3ba91b47b8f), // 5^262 + (0xcb3f2f7642717713, 0x241c70a936219a73), // 5^263 + (0xfe0efb53d30dd4d7, 0xed238cd383aa0110), // 5^264 + (0x9ec95d1463e8a506, 0xf4363804324a40aa), // 5^265 + (0xc67bb4597ce2ce48, 0xb143c6053edcd0d5), // 5^266 + (0xf81aa16fdc1b81da, 0xdd94b7868e94050a), // 5^267 + (0x9b10a4e5e9913128, 0xca7cf2b4191c8326), // 5^268 + (0xc1d4ce1f63f57d72, 0xfd1c2f611f63a3f0), // 5^269 + (0xf24a01a73cf2dccf, 0xbc633b39673c8cec), // 5^270 + (0x976e41088617ca01, 0xd5be0503e085d813), // 5^271 + (0xbd49d14aa79dbc82, 0x4b2d8644d8a74e18), // 5^272 + (0xec9c459d51852ba2, 0xddf8e7d60ed1219e), // 5^273 + (0x93e1ab8252f33b45, 0xcabb90e5c942b503), // 5^274 + (0xb8da1662e7b00a17, 0x3d6a751f3b936243), // 5^275 + (0xe7109bfba19c0c9d, 0xcc512670a783ad4), // 5^276 + (0x906a617d450187e2, 0x27fb2b80668b24c5), // 5^277 + (0xb484f9dc9641e9da, 0xb1f9f660802dedf6), // 5^278 + (0xe1a63853bbd26451, 0x5e7873f8a0396973), // 5^279 + (0x8d07e33455637eb2, 0xdb0b487b6423e1e8), // 5^280 + (0xb049dc016abc5e5f, 0x91ce1a9a3d2cda62), // 5^281 + (0xdc5c5301c56b75f7, 0x7641a140cc7810fb), // 5^282 + (0x89b9b3e11b6329ba, 0xa9e904c87fcb0a9d), // 5^283 + (0xac2820d9623bf429, 0x546345fa9fbdcd44), // 5^284 + (0xd732290fbacaf133, 0xa97c177947ad4095), // 5^285 + (0x867f59a9d4bed6c0, 0x49ed8eabcccc485d), // 5^286 + (0xa81f301449ee8c70, 0x5c68f256bfff5a74), // 5^287 + (0xd226fc195c6a2f8c, 0x73832eec6fff3111), // 5^288 + (0x83585d8fd9c25db7, 0xc831fd53c5ff7eab), // 5^289 + (0xa42e74f3d032f525, 0xba3e7ca8b77f5e55), // 5^290 + (0xcd3a1230c43fb26f, 0x28ce1bd2e55f35eb), // 5^291 + (0x80444b5e7aa7cf85, 0x7980d163cf5b81b3), // 5^292 + (0xa0555e361951c366, 0xd7e105bcc332621f), // 5^293 + (0xc86ab5c39fa63440, 0x8dd9472bf3fefaa7), // 5^294 + (0xfa856334878fc150, 0xb14f98f6f0feb951), // 5^295 + (0x9c935e00d4b9d8d2, 0x6ed1bf9a569f33d3), // 5^296 + (0xc3b8358109e84f07, 0xa862f80ec4700c8), // 5^297 + (0xf4a642e14c6262c8, 0xcd27bb612758c0fa), // 5^298 + (0x98e7e9cccfbd7dbd, 0x8038d51cb897789c), // 5^299 + (0xbf21e44003acdd2c, 0xe0470a63e6bd56c3), // 5^300 + (0xeeea5d5004981478, 0x1858ccfce06cac74), // 5^301 + (0x95527a5202df0ccb, 0xf37801e0c43ebc8), // 5^302 + (0xbaa718e68396cffd, 0xd30560258f54e6ba), // 5^303 + (0xe950df20247c83fd, 0x47c6b82ef32a2069), // 5^304 + (0x91d28b7416cdd27e, 0x4cdc331d57fa5441), // 5^305 + (0xb6472e511c81471d, 0xe0133fe4adf8e952), // 5^306 + (0xe3d8f9e563a198e5, 0x58180fddd97723a6), // 5^307 + (0x8e679c2f5e44ff8f, 0x570f09eaa7ea7648), // 5^308 +]; diff --git a/third_party/rust/minimal-lexical/src/table_small.rs b/third_party/rust/minimal-lexical/src/table_small.rs new file mode 100644 index 0000000000..9da69916fb --- /dev/null +++ b/third_party/rust/minimal-lexical/src/table_small.rs @@ -0,0 +1,90 @@ +//! Pre-computed small tables for parsing decimal strings. + +#![doc(hidden)] +#![cfg(not(feature = "compact"))] + +/// Pre-computed, small powers-of-5. +pub const SMALL_INT_POW5: [u64; 28] = [ + 1, + 5, + 25, + 125, + 625, + 3125, + 15625, + 78125, + 390625, + 1953125, + 9765625, + 48828125, + 244140625, + 1220703125, + 6103515625, + 30517578125, + 152587890625, + 762939453125, + 3814697265625, + 19073486328125, + 95367431640625, + 476837158203125, + 2384185791015625, + 11920928955078125, + 59604644775390625, + 298023223876953125, + 1490116119384765625, + 7450580596923828125, +]; + +/// Pre-computed, small powers-of-10. +pub const SMALL_INT_POW10: [u64; 20] = [ + 1, + 10, + 100, + 1000, + 10000, + 100000, + 1000000, + 10000000, + 100000000, + 1000000000, + 10000000000, + 100000000000, + 1000000000000, + 10000000000000, + 100000000000000, + 1000000000000000, + 10000000000000000, + 100000000000000000, + 1000000000000000000, + 10000000000000000000, +]; + +/// Pre-computed, small powers-of-10. +pub const SMALL_F32_POW10: [f32; 16] = + [1e0, 1e1, 1e2, 1e3, 1e4, 1e5, 1e6, 1e7, 1e8, 1e9, 1e10, 0., 0., 0., 0., 0.]; + +/// Pre-computed, small powers-of-10. +pub const SMALL_F64_POW10: [f64; 32] = [ + 1e0, 1e1, 1e2, 1e3, 1e4, 1e5, 1e6, 1e7, 1e8, 1e9, 1e10, 1e11, 1e12, 1e13, 1e14, 1e15, 1e16, + 1e17, 1e18, 1e19, 1e20, 1e21, 1e22, 0., 0., 0., 0., 0., 0., 0., 0., 0., +]; + +/// Pre-computed large power-of-5 for 32-bit limbs. +#[cfg(not(all(target_pointer_width = "64", not(target_arch = "sparc"))))] +pub const LARGE_POW5: [u32; 10] = [ + 4279965485, 329373468, 4020270615, 2137533757, 4287402176, 1057042919, 1071430142, 2440757623, + 381945767, 46164893, +]; + +/// Pre-computed large power-of-5 for 64-bit limbs. +#[cfg(all(target_pointer_width = "64", not(target_arch = "sparc")))] +pub const LARGE_POW5: [u64; 5] = [ + 1414648277510068013, + 9180637584431281687, + 4539964771860779200, + 10482974169319127550, + 198276706040285095, +]; + +/// Step for large power-of-5 for 32-bit limbs. +pub const LARGE_POW5_STEP: u32 = 135; |