From 698f8c2f01ea549d77d7dc3338a12e04c11057b9 Mon Sep 17 00:00:00 2001 From: Daniel Baumann Date: Wed, 17 Apr 2024 14:02:58 +0200 Subject: Adding upstream version 1.64.0+dfsg1. Signed-off-by: Daniel Baumann --- vendor/compiler_builtins/src/float/div.rs | 467 ++++++++++++++++++++++++++++++ 1 file changed, 467 insertions(+) create mode 100644 vendor/compiler_builtins/src/float/div.rs (limited to 'vendor/compiler_builtins/src/float/div.rs') diff --git a/vendor/compiler_builtins/src/float/div.rs b/vendor/compiler_builtins/src/float/div.rs new file mode 100644 index 000000000..528a8368d --- /dev/null +++ b/vendor/compiler_builtins/src/float/div.rs @@ -0,0 +1,467 @@ +// The functions are complex with many branches, and explicit +// `return`s makes it clear where function exit points are +#![allow(clippy::needless_return)] + +use float::Float; +use int::{CastInto, DInt, HInt, Int}; + +fn div32(a: F, b: F) -> F +where + u32: CastInto, + F::Int: CastInto, + i32: CastInto, + F::Int: CastInto, + F::Int: HInt, +{ + let one = F::Int::ONE; + let zero = F::Int::ZERO; + + // let bits = F::BITS; + let significand_bits = F::SIGNIFICAND_BITS; + let max_exponent = F::EXPONENT_MAX; + + let exponent_bias = F::EXPONENT_BIAS; + + let implicit_bit = F::IMPLICIT_BIT; + let significand_mask = F::SIGNIFICAND_MASK; + let sign_bit = F::SIGN_MASK as F::Int; + let abs_mask = sign_bit - one; + let exponent_mask = F::EXPONENT_MASK; + let inf_rep = exponent_mask; + let quiet_bit = implicit_bit >> 1; + let qnan_rep = exponent_mask | quiet_bit; + + #[inline(always)] + fn negate_u32(a: u32) -> u32 { + (::wrapping_neg(a as i32)) as u32 + } + + let a_rep = a.repr(); + let b_rep = b.repr(); + + let a_exponent = (a_rep >> significand_bits) & max_exponent.cast(); + let b_exponent = (b_rep >> significand_bits) & max_exponent.cast(); + let quotient_sign = (a_rep ^ b_rep) & sign_bit; + + let mut a_significand = a_rep & significand_mask; + let mut b_significand = b_rep & significand_mask; + let mut scale = 0; + + // Detect if a or b is zero, denormal, infinity, or NaN. + if a_exponent.wrapping_sub(one) >= (max_exponent - 1).cast() + || b_exponent.wrapping_sub(one) >= (max_exponent - 1).cast() + { + let a_abs = a_rep & abs_mask; + let b_abs = b_rep & abs_mask; + + // NaN / anything = qNaN + if a_abs > inf_rep { + return F::from_repr(a_rep | quiet_bit); + } + // anything / NaN = qNaN + if b_abs > inf_rep { + return F::from_repr(b_rep | quiet_bit); + } + + if a_abs == inf_rep { + if b_abs == inf_rep { + // infinity / infinity = NaN + return F::from_repr(qnan_rep); + } else { + // infinity / anything else = +/- infinity + return F::from_repr(a_abs | quotient_sign); + } + } + + // anything else / infinity = +/- 0 + if b_abs == inf_rep { + return F::from_repr(quotient_sign); + } + + if a_abs == zero { + if b_abs == zero { + // zero / zero = NaN + return F::from_repr(qnan_rep); + } else { + // zero / anything else = +/- zero + return F::from_repr(quotient_sign); + } + } + + // anything else / zero = +/- infinity + if b_abs == zero { + return F::from_repr(inf_rep | quotient_sign); + } + + // one or both of a or b is denormal, the other (if applicable) is a + // normal number. Renormalize one or both of a and b, and set scale to + // include the necessary exponent adjustment. + if a_abs < implicit_bit { + let (exponent, significand) = F::normalize(a_significand); + scale += exponent; + a_significand = significand; + } + + if b_abs < implicit_bit { + let (exponent, significand) = F::normalize(b_significand); + scale -= exponent; + b_significand = significand; + } + } + + // Or in the implicit significand bit. (If we fell through from the + // denormal path it was already set by normalize( ), but setting it twice + // won't hurt anything.) + a_significand |= implicit_bit; + b_significand |= implicit_bit; + let mut quotient_exponent: i32 = CastInto::::cast(a_exponent) + .wrapping_sub(CastInto::::cast(b_exponent)) + .wrapping_add(scale); + + // Align the significand of b as a Q31 fixed-point number in the range + // [1, 2.0) and get a Q32 approximate reciprocal using a small minimax + // polynomial approximation: reciprocal = 3/4 + 1/sqrt(2) - b/2. This + // is accurate to about 3.5 binary digits. + let q31b = CastInto::::cast(b_significand << 8.cast()); + let mut reciprocal = (0x7504f333u32).wrapping_sub(q31b); + + // Now refine the reciprocal estimate using a Newton-Raphson iteration: + // + // x1 = x0 * (2 - x0 * b) + // + // This doubles the number of correct binary digits in the approximation + // with each iteration, so after three iterations, we have about 28 binary + // digits of accuracy. + + let mut correction: u32 = + negate_u32(((reciprocal as u64).wrapping_mul(q31b as u64) >> 32) as u32); + reciprocal = ((reciprocal as u64).wrapping_mul(correction as u64) as u64 >> 31) as u32; + correction = negate_u32(((reciprocal as u64).wrapping_mul(q31b as u64) >> 32) as u32); + reciprocal = ((reciprocal as u64).wrapping_mul(correction as u64) as u64 >> 31) as u32; + correction = negate_u32(((reciprocal as u64).wrapping_mul(q31b as u64) >> 32) as u32); + reciprocal = ((reciprocal as u64).wrapping_mul(correction as u64) as u64 >> 31) as u32; + + // Exhaustive testing shows that the error in reciprocal after three steps + // is in the interval [-0x1.f58108p-31, 0x1.d0e48cp-29], in line with our + // expectations. We bump the reciprocal by a tiny value to force the error + // to be strictly positive (in the range [0x1.4fdfp-37,0x1.287246p-29], to + // be specific). This also causes 1/1 to give a sensible approximation + // instead of zero (due to overflow). + reciprocal = reciprocal.wrapping_sub(2); + + // The numerical reciprocal is accurate to within 2^-28, lies in the + // interval [0x1.000000eep-1, 0x1.fffffffcp-1], and is strictly smaller + // than the true reciprocal of b. Multiplying a by this reciprocal thus + // gives a numerical q = a/b in Q24 with the following properties: + // + // 1. q < a/b + // 2. q is in the interval [0x1.000000eep-1, 0x1.fffffffcp0) + // 3. the error in q is at most 2^-24 + 2^-27 -- the 2^24 term comes + // from the fact that we truncate the product, and the 2^27 term + // is the error in the reciprocal of b scaled by the maximum + // possible value of a. As a consequence of this error bound, + // either q or nextafter(q) is the correctly rounded + let mut quotient = (a_significand << 1).widen_mul(reciprocal.cast()).hi(); + + // Two cases: quotient is in [0.5, 1.0) or quotient is in [1.0, 2.0). + // In either case, we are going to compute a residual of the form + // + // r = a - q*b + // + // We know from the construction of q that r satisfies: + // + // 0 <= r < ulp(q)*b + // + // if r is greater than 1/2 ulp(q)*b, then q rounds up. Otherwise, we + // already have the correct result. The exact halfway case cannot occur. + // We also take this time to right shift quotient if it falls in the [1,2) + // range and adjust the exponent accordingly. + let residual = if quotient < (implicit_bit << 1) { + quotient_exponent = quotient_exponent.wrapping_sub(1); + (a_significand << (significand_bits + 1)).wrapping_sub(quotient.wrapping_mul(b_significand)) + } else { + quotient >>= 1; + (a_significand << significand_bits).wrapping_sub(quotient.wrapping_mul(b_significand)) + }; + + let written_exponent = quotient_exponent.wrapping_add(exponent_bias as i32); + + if written_exponent >= max_exponent as i32 { + // If we have overflowed the exponent, return infinity. + return F::from_repr(inf_rep | quotient_sign); + } else if written_exponent < 1 { + // Flush denormals to zero. In the future, it would be nice to add + // code to round them correctly. + return F::from_repr(quotient_sign); + } else { + let round = ((residual << 1) > b_significand) as u32; + // Clear the implicit bits + let mut abs_result = quotient & significand_mask; + // Insert the exponent + abs_result |= written_exponent.cast() << significand_bits; + // Round + abs_result = abs_result.wrapping_add(round.cast()); + // Insert the sign and return + return F::from_repr(abs_result | quotient_sign); + } +} + +fn div64(a: F, b: F) -> F +where + u32: CastInto, + F::Int: CastInto, + i32: CastInto, + F::Int: CastInto, + u64: CastInto, + F::Int: CastInto, + i64: CastInto, + F::Int: CastInto, + F::Int: HInt, +{ + let one = F::Int::ONE; + let zero = F::Int::ZERO; + + // let bits = F::BITS; + let significand_bits = F::SIGNIFICAND_BITS; + let max_exponent = F::EXPONENT_MAX; + + let exponent_bias = F::EXPONENT_BIAS; + + let implicit_bit = F::IMPLICIT_BIT; + let significand_mask = F::SIGNIFICAND_MASK; + let sign_bit = F::SIGN_MASK as F::Int; + let abs_mask = sign_bit - one; + let exponent_mask = F::EXPONENT_MASK; + let inf_rep = exponent_mask; + let quiet_bit = implicit_bit >> 1; + let qnan_rep = exponent_mask | quiet_bit; + // let exponent_bits = F::EXPONENT_BITS; + + #[inline(always)] + fn negate_u32(a: u32) -> u32 { + (::wrapping_neg(a as i32)) as u32 + } + + #[inline(always)] + fn negate_u64(a: u64) -> u64 { + (::wrapping_neg(a as i64)) as u64 + } + + let a_rep = a.repr(); + let b_rep = b.repr(); + + let a_exponent = (a_rep >> significand_bits) & max_exponent.cast(); + let b_exponent = (b_rep >> significand_bits) & max_exponent.cast(); + let quotient_sign = (a_rep ^ b_rep) & sign_bit; + + let mut a_significand = a_rep & significand_mask; + let mut b_significand = b_rep & significand_mask; + let mut scale = 0; + + // Detect if a or b is zero, denormal, infinity, or NaN. + if a_exponent.wrapping_sub(one) >= (max_exponent - 1).cast() + || b_exponent.wrapping_sub(one) >= (max_exponent - 1).cast() + { + let a_abs = a_rep & abs_mask; + let b_abs = b_rep & abs_mask; + + // NaN / anything = qNaN + if a_abs > inf_rep { + return F::from_repr(a_rep | quiet_bit); + } + // anything / NaN = qNaN + if b_abs > inf_rep { + return F::from_repr(b_rep | quiet_bit); + } + + if a_abs == inf_rep { + if b_abs == inf_rep { + // infinity / infinity = NaN + return F::from_repr(qnan_rep); + } else { + // infinity / anything else = +/- infinity + return F::from_repr(a_abs | quotient_sign); + } + } + + // anything else / infinity = +/- 0 + if b_abs == inf_rep { + return F::from_repr(quotient_sign); + } + + if a_abs == zero { + if b_abs == zero { + // zero / zero = NaN + return F::from_repr(qnan_rep); + } else { + // zero / anything else = +/- zero + return F::from_repr(quotient_sign); + } + } + + // anything else / zero = +/- infinity + if b_abs == zero { + return F::from_repr(inf_rep | quotient_sign); + } + + // one or both of a or b is denormal, the other (if applicable) is a + // normal number. Renormalize one or both of a and b, and set scale to + // include the necessary exponent adjustment. + if a_abs < implicit_bit { + let (exponent, significand) = F::normalize(a_significand); + scale += exponent; + a_significand = significand; + } + + if b_abs < implicit_bit { + let (exponent, significand) = F::normalize(b_significand); + scale -= exponent; + b_significand = significand; + } + } + + // Or in the implicit significand bit. (If we fell through from the + // denormal path it was already set by normalize( ), but setting it twice + // won't hurt anything.) + a_significand |= implicit_bit; + b_significand |= implicit_bit; + let mut quotient_exponent: i32 = CastInto::::cast(a_exponent) + .wrapping_sub(CastInto::::cast(b_exponent)) + .wrapping_add(scale); + + // Align the significand of b as a Q31 fixed-point number in the range + // [1, 2.0) and get a Q32 approximate reciprocal using a small minimax + // polynomial approximation: reciprocal = 3/4 + 1/sqrt(2) - b/2. This + // is accurate to about 3.5 binary digits. + let q31b = CastInto::::cast(b_significand >> 21.cast()); + let mut recip32 = (0x7504f333u32).wrapping_sub(q31b); + + // Now refine the reciprocal estimate using a Newton-Raphson iteration: + // + // x1 = x0 * (2 - x0 * b) + // + // This doubles the number of correct binary digits in the approximation + // with each iteration, so after three iterations, we have about 28 binary + // digits of accuracy. + + let mut correction32: u32 = + negate_u32(((recip32 as u64).wrapping_mul(q31b as u64) >> 32) as u32); + recip32 = ((recip32 as u64).wrapping_mul(correction32 as u64) >> 31) as u32; + correction32 = negate_u32(((recip32 as u64).wrapping_mul(q31b as u64) >> 32) as u32); + recip32 = ((recip32 as u64).wrapping_mul(correction32 as u64) >> 31) as u32; + correction32 = negate_u32(((recip32 as u64).wrapping_mul(q31b as u64) >> 32) as u32); + recip32 = ((recip32 as u64).wrapping_mul(correction32 as u64) >> 31) as u32; + + // recip32 might have overflowed to exactly zero in the preceeding + // computation if the high word of b is exactly 1.0. This would sabotage + // the full-width final stage of the computation that follows, so we adjust + // recip32 downward by one bit. + recip32 = recip32.wrapping_sub(1); + + // We need to perform one more iteration to get us to 56 binary digits; + // The last iteration needs to happen with extra precision. + let q63blo = CastInto::::cast(b_significand << 11.cast()); + + let correction: u64 = negate_u64( + (recip32 as u64) + .wrapping_mul(q31b as u64) + .wrapping_add((recip32 as u64).wrapping_mul(q63blo as u64) >> 32), + ); + let c_hi = (correction >> 32) as u32; + let c_lo = correction as u32; + let mut reciprocal: u64 = (recip32 as u64) + .wrapping_mul(c_hi as u64) + .wrapping_add((recip32 as u64).wrapping_mul(c_lo as u64) >> 32); + + // We already adjusted the 32-bit estimate, now we need to adjust the final + // 64-bit reciprocal estimate downward to ensure that it is strictly smaller + // than the infinitely precise exact reciprocal. Because the computation + // of the Newton-Raphson step is truncating at every step, this adjustment + // is small; most of the work is already done. + reciprocal = reciprocal.wrapping_sub(2); + + // The numerical reciprocal is accurate to within 2^-56, lies in the + // interval [0.5, 1.0), and is strictly smaller than the true reciprocal + // of b. Multiplying a by this reciprocal thus gives a numerical q = a/b + // in Q53 with the following properties: + // + // 1. q < a/b + // 2. q is in the interval [0.5, 2.0) + // 3. the error in q is bounded away from 2^-53 (actually, we have a + // couple of bits to spare, but this is all we need). + + // We need a 64 x 64 multiply high to compute q, which isn't a basic + // operation in C, so we need to be a little bit fussy. + // let mut quotient: F::Int = ((((reciprocal as u64) + // .wrapping_mul(CastInto::::cast(a_significand << 1) as u64)) + // >> 32) as u32) + // .cast(); + + // We need a 64 x 64 multiply high to compute q, which isn't a basic + // operation in C, so we need to be a little bit fussy. + let mut quotient = (a_significand << 2).widen_mul(reciprocal.cast()).hi(); + + // Two cases: quotient is in [0.5, 1.0) or quotient is in [1.0, 2.0). + // In either case, we are going to compute a residual of the form + // + // r = a - q*b + // + // We know from the construction of q that r satisfies: + // + // 0 <= r < ulp(q)*b + // + // if r is greater than 1/2 ulp(q)*b, then q rounds up. Otherwise, we + // already have the correct result. The exact halfway case cannot occur. + // We also take this time to right shift quotient if it falls in the [1,2) + // range and adjust the exponent accordingly. + let residual = if quotient < (implicit_bit << 1) { + quotient_exponent = quotient_exponent.wrapping_sub(1); + (a_significand << (significand_bits + 1)).wrapping_sub(quotient.wrapping_mul(b_significand)) + } else { + quotient >>= 1; + (a_significand << significand_bits).wrapping_sub(quotient.wrapping_mul(b_significand)) + }; + + let written_exponent = quotient_exponent.wrapping_add(exponent_bias as i32); + + if written_exponent >= max_exponent as i32 { + // If we have overflowed the exponent, return infinity. + return F::from_repr(inf_rep | quotient_sign); + } else if written_exponent < 1 { + // Flush denormals to zero. In the future, it would be nice to add + // code to round them correctly. + return F::from_repr(quotient_sign); + } else { + let round = ((residual << 1) > b_significand) as u32; + // Clear the implicit bits + let mut abs_result = quotient & significand_mask; + // Insert the exponent + abs_result |= written_exponent.cast() << significand_bits; + // Round + abs_result = abs_result.wrapping_add(round.cast()); + // Insert the sign and return + return F::from_repr(abs_result | quotient_sign); + } +} + +intrinsics! { + #[arm_aeabi_alias = __aeabi_fdiv] + pub extern "C" fn __divsf3(a: f32, b: f32) -> f32 { + div32(a, b) + } + + #[arm_aeabi_alias = __aeabi_ddiv] + pub extern "C" fn __divdf3(a: f64, b: f64) -> f64 { + div64(a, b) + } + + #[cfg(target_arch = "arm")] + pub extern "C" fn __divsf3vfp(a: f32, b: f32) -> f32 { + a / b + } + + #[cfg(target_arch = "arm")] + pub extern "C" fn __divdf3vfp(a: f64, b: f64) -> f64 { + a / b + } +} -- cgit v1.2.3