use crate::ieee; use crate::{Category, ExpInt, Float, FloatConvert, ParseError, Round, Status, StatusAnd}; use core::cmp::Ordering; use core::fmt; use core::ops::Neg; #[must_use] #[derive(Copy, Clone, PartialEq, PartialOrd, Debug)] pub struct DoubleFloat(F, F); pub type DoubleDouble = DoubleFloat; // These are legacy semantics for the Fallback, inaccrurate implementation of // IBM double-double, if the accurate DoubleDouble doesn't handle the // operation. It's equivalent to having an IEEE number with consecutive 106 // bits of mantissa and 11 bits of exponent. // // It's not equivalent to IBM double-double. For example, a legit IBM // double-double, 1 + epsilon: // // 1 + epsilon = 1 + (1 >> 1076) // // is not representable by a consecutive 106 bits of mantissa. // // Currently, these semantics are used in the following way: // // DoubleDouble -> (Double, Double) -> // DoubleDouble's Fallback -> IEEE operations // // FIXME: Implement all operations in DoubleDouble, and delete these // semantics. // FIXME(eddyb) This shouldn't need to be `pub`, it's only used in bounds. pub struct FallbackS(F); type Fallback = ieee::IeeeFloat>; impl ieee::Semantics for FallbackS { // Forbid any conversion to/from bits. const BITS: usize = 0; const EXP_BITS: usize = 0; const PRECISION: usize = F::PRECISION * 2; const MAX_EXP: ExpInt = F::MAX_EXP as ExpInt; const MIN_EXP: ExpInt = F::MIN_EXP as ExpInt + F::PRECISION as ExpInt; } // Convert number to F. To avoid spurious underflows, we re- // normalize against the F exponent range first, and only *then* // truncate the mantissa. The result of that second conversion // may be inexact, but should never underflow. // FIXME(eddyb) This shouldn't need to be `pub`, it's only used in bounds. pub struct FallbackExtendedS(F); type FallbackExtended = ieee::IeeeFloat>; impl ieee::Semantics for FallbackExtendedS { // Forbid any conversion to/from bits. const BITS: usize = 0; const EXP_BITS: usize = 0; const PRECISION: usize = Fallback::::PRECISION; const MAX_EXP: ExpInt = F::MAX_EXP as ExpInt; const MIN_EXP: ExpInt = F::MIN_EXP as ExpInt; } impl From> for DoubleFloat where F: FloatConvert>, FallbackExtended: FloatConvert, { fn from(x: Fallback) -> Self { let mut status; let mut loses_info = false; let extended: FallbackExtended = unpack!(status=, x.convert(&mut loses_info)); assert_eq!((status, loses_info), (Status::OK, false)); let a = unpack!(status=, extended.convert(&mut loses_info)); assert_eq!(status - Status::INEXACT, Status::OK); // If conversion was exact or resulted in a special case, we're done; // just set the second double to zero. Otherwise, re-convert back to // the extended format and compute the difference. This now should // convert exactly to double. let b = if a.is_finite_non_zero() && loses_info { let u: FallbackExtended = unpack!(status=, a.convert(&mut loses_info)); assert_eq!((status, loses_info), (Status::OK, false)); let v = unpack!(status=, extended - u); assert_eq!(status, Status::OK); let v = unpack!(status=, v.convert(&mut loses_info)); assert_eq!((status, loses_info), (Status::OK, false)); v } else { F::ZERO }; DoubleFloat(a, b) } } impl> From> for Fallback { fn from(DoubleFloat(a, b): DoubleFloat) -> Self { let mut status; let mut loses_info = false; // Get the first F and convert to our format. let a = unpack!(status=, a.convert(&mut loses_info)); assert_eq!((status, loses_info), (Status::OK, false)); // Unless we have a special case, add in second F. if a.is_finite_non_zero() { let b = unpack!(status=, b.convert(&mut loses_info)); assert_eq!((status, loses_info), (Status::OK, false)); (a + b).value } else { a } } } float_common_impls!(DoubleFloat); impl Neg for DoubleFloat { type Output = Self; fn neg(self) -> Self { if self.1.is_finite_non_zero() { DoubleFloat(-self.0, -self.1) } else { DoubleFloat(-self.0, self.1) } } } impl>> fmt::Display for DoubleFloat { fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { fmt::Display::fmt(&Fallback::from(*self), f) } } impl>> Float for DoubleFloat where Self: From>, { const BITS: usize = F::BITS * 2; const PRECISION: usize = Fallback::::PRECISION; const MAX_EXP: ExpInt = Fallback::::MAX_EXP; const MIN_EXP: ExpInt = Fallback::::MIN_EXP; const ZERO: Self = DoubleFloat(F::ZERO, F::ZERO); const INFINITY: Self = DoubleFloat(F::INFINITY, F::ZERO); // FIXME(eddyb) remove when qnan becomes const fn. const NAN: Self = DoubleFloat(F::NAN, F::ZERO); fn qnan(payload: Option) -> Self { DoubleFloat(F::qnan(payload), F::ZERO) } fn snan(payload: Option) -> Self { DoubleFloat(F::snan(payload), F::ZERO) } fn largest() -> Self { let status; let mut r = DoubleFloat(F::largest(), F::largest()); r.1 = r.1.scalbn(-(F::PRECISION as ExpInt + 1)); r.1 = unpack!(status=, r.1.next_down()); assert_eq!(status, Status::OK); r } const SMALLEST: Self = DoubleFloat(F::SMALLEST, F::ZERO); fn smallest_normalized() -> Self { DoubleFloat(F::smallest_normalized().scalbn(F::PRECISION as ExpInt), F::ZERO) } // Implement addition, subtraction, multiplication and division based on: // "Software for Doubled-Precision Floating-Point Computations", // by Seppo Linnainmaa, ACM TOMS vol 7 no 3, September 1981, pages 272-283. fn add_r(mut self, rhs: Self, round: Round) -> StatusAnd { match (self.category(), rhs.category()) { (Category::Infinity, Category::Infinity) => { if self.is_negative() != rhs.is_negative() { Status::INVALID_OP.and(Self::NAN.copy_sign(self)) } else { Status::OK.and(self) } } (_, Category::Zero) | (Category::NaN, _) | (Category::Infinity, Category::Normal) => Status::OK.and(self), (Category::Zero, _) | (_, Category::NaN) | (_, Category::Infinity) => Status::OK.and(rhs), (Category::Normal, Category::Normal) => { let mut status = Status::OK; let (a, aa, c, cc) = (self.0, self.1, rhs.0, rhs.1); let mut z = a; z = unpack!(status|=, z.add_r(c, round)); if !z.is_finite() { if !z.is_infinite() { return status.and(DoubleFloat(z, F::ZERO)); } status = Status::OK; let a_cmp_c = a.cmp_abs_normal(c); z = cc; z = unpack!(status|=, z.add_r(aa, round)); if a_cmp_c == Ordering::Greater { // z = cc + aa + c + a; z = unpack!(status|=, z.add_r(c, round)); z = unpack!(status|=, z.add_r(a, round)); } else { // z = cc + aa + a + c; z = unpack!(status|=, z.add_r(a, round)); z = unpack!(status|=, z.add_r(c, round)); } if !z.is_finite() { return status.and(DoubleFloat(z, F::ZERO)); } self.0 = z; let mut zz = aa; zz = unpack!(status|=, zz.add_r(cc, round)); if a_cmp_c == Ordering::Greater { // self.1 = a - z + c + zz; self.1 = a; self.1 = unpack!(status|=, self.1.sub_r(z, round)); self.1 = unpack!(status|=, self.1.add_r(c, round)); self.1 = unpack!(status|=, self.1.add_r(zz, round)); } else { // self.1 = c - z + a + zz; self.1 = c; self.1 = unpack!(status|=, self.1.sub_r(z, round)); self.1 = unpack!(status|=, self.1.add_r(a, round)); self.1 = unpack!(status|=, self.1.add_r(zz, round)); } } else { // q = a - z; let mut q = a; q = unpack!(status|=, q.sub_r(z, round)); // zz = q + c + (a - (q + z)) + aa + cc; // Compute a - (q + z) as -((q + z) - a) to avoid temporary copies. let mut zz = q; zz = unpack!(status|=, zz.add_r(c, round)); q = unpack!(status|=, q.add_r(z, round)); q = unpack!(status|=, q.sub_r(a, round)); q = -q; zz = unpack!(status|=, zz.add_r(q, round)); zz = unpack!(status|=, zz.add_r(aa, round)); zz = unpack!(status|=, zz.add_r(cc, round)); if zz.is_zero() && !zz.is_negative() { return Status::OK.and(DoubleFloat(z, F::ZERO)); } self.0 = z; self.0 = unpack!(status|=, self.0.add_r(zz, round)); if !self.0.is_finite() { self.1 = F::ZERO; return status.and(self); } self.1 = z; self.1 = unpack!(status|=, self.1.sub_r(self.0, round)); self.1 = unpack!(status|=, self.1.add_r(zz, round)); } status.and(self) } } } fn mul_r(mut self, rhs: Self, round: Round) -> StatusAnd { // Interesting observation: For special categories, finding the lowest // common ancestor of the following layered graph gives the correct // return category: // // NaN // / \ // Zero Inf // \ / // Normal // // e.g. NaN * NaN = NaN // Zero * Inf = NaN // Normal * Zero = Zero // Normal * Inf = Inf match (self.category(), rhs.category()) { (Category::NaN, _) => Status::OK.and(self), (_, Category::NaN) => Status::OK.and(rhs), (Category::Zero, Category::Infinity) | (Category::Infinity, Category::Zero) => Status::OK.and(Self::NAN), (Category::Zero, _) | (Category::Infinity, _) => Status::OK.and(self), (_, Category::Zero) | (_, Category::Infinity) => Status::OK.and(rhs), (Category::Normal, Category::Normal) => { let mut status = Status::OK; let (a, b, c, d) = (self.0, self.1, rhs.0, rhs.1); // t = a * c let mut t = a; t = unpack!(status|=, t.mul_r(c, round)); if !t.is_finite_non_zero() { return status.and(DoubleFloat(t, F::ZERO)); } // tau = fmsub(a, c, t), that is -fmadd(-a, c, t). let mut tau = a; tau = unpack!(status|=, tau.mul_add_r(c, -t, round)); // v = a * d let mut v = a; v = unpack!(status|=, v.mul_r(d, round)); // w = b * c let mut w = b; w = unpack!(status|=, w.mul_r(c, round)); v = unpack!(status|=, v.add_r(w, round)); // tau += v + w tau = unpack!(status|=, tau.add_r(v, round)); // u = t + tau let mut u = t; u = unpack!(status|=, u.add_r(tau, round)); self.0 = u; if !u.is_finite() { self.1 = F::ZERO; } else { // self.1 = (t - u) + tau t = unpack!(status|=, t.sub_r(u, round)); t = unpack!(status|=, t.add_r(tau, round)); self.1 = t; } status.and(self) } } } fn mul_add_r(self, multiplicand: Self, addend: Self, round: Round) -> StatusAnd { Fallback::from(self) .mul_add_r(Fallback::from(multiplicand), Fallback::from(addend), round) .map(Self::from) } fn div_r(self, rhs: Self, round: Round) -> StatusAnd { Fallback::from(self).div_r(Fallback::from(rhs), round).map(Self::from) } fn ieee_rem(self, rhs: Self) -> StatusAnd { Fallback::from(self).ieee_rem(Fallback::from(rhs)).map(Self::from) } fn c_fmod(self, rhs: Self) -> StatusAnd { Fallback::from(self).c_fmod(Fallback::from(rhs)).map(Self::from) } fn round_to_integral(self, round: Round) -> StatusAnd { Fallback::from(self).round_to_integral(round).map(Self::from) } fn next_up(self) -> StatusAnd { Fallback::from(self).next_up().map(Self::from) } fn from_bits(input: u128) -> Self { let (a, b) = (input, input >> F::BITS); DoubleFloat(F::from_bits(a & ((1 << F::BITS) - 1)), F::from_bits(b & ((1 << F::BITS) - 1))) } fn from_u128_r(input: u128, round: Round) -> StatusAnd { Fallback::from_u128_r(input, round).map(Self::from) } fn from_str_r(s: &str, round: Round) -> Result, ParseError> { Fallback::from_str_r(s, round).map(|r| r.map(Self::from)) } fn to_bits(self) -> u128 { self.0.to_bits() | (self.1.to_bits() << F::BITS) } fn to_u128_r(self, width: usize, round: Round, is_exact: &mut bool) -> StatusAnd { Fallback::from(self).to_u128_r(width, round, is_exact) } fn cmp_abs_normal(self, rhs: Self) -> Ordering { self.0.cmp_abs_normal(rhs.0).then_with(|| { let result = self.1.cmp_abs_normal(rhs.1); if result != Ordering::Equal { let against = self.0.is_negative() ^ self.1.is_negative(); let rhs_against = rhs.0.is_negative() ^ rhs.1.is_negative(); (!against) .cmp(&!rhs_against) .then_with(|| if against { result.reverse() } else { result }) } else { result } }) } fn bitwise_eq(self, rhs: Self) -> bool { self.0.bitwise_eq(rhs.0) && self.1.bitwise_eq(rhs.1) } fn is_negative(self) -> bool { self.0.is_negative() } fn is_denormal(self) -> bool { self.category() == Category::Normal && (self.0.is_denormal() || self.0.is_denormal() || // (double)(Hi + Lo) == Hi defines a normal number. self.0 != (self.0 + self.1).value) } fn is_signaling(self) -> bool { self.0.is_signaling() } fn category(self) -> Category { self.0.category() } fn is_integer(self) -> bool { self.0.is_integer() && self.1.is_integer() } fn get_exact_inverse(self) -> Option { Fallback::from(self).get_exact_inverse().map(Self::from) } fn ilogb(self) -> ExpInt { self.0.ilogb() } fn scalbn_r(self, exp: ExpInt, round: Round) -> Self { DoubleFloat(self.0.scalbn_r(exp, round), self.1.scalbn_r(exp, round)) } fn frexp_r(self, exp: &mut ExpInt, round: Round) -> Self { let a = self.0.frexp_r(exp, round); let mut b = self.1; if self.category() == Category::Normal { b = b.scalbn_r(-*exp, round); } DoubleFloat(a, b) } } // HACK(eddyb) this is here instead of in `tests/ppc.rs` because `DoubleFloat` // has private fields, and it's not worth it to make them public just for testing. #[test] fn is_integer() { let double_from_f64 = |f: f64| ieee::Double::from_bits(f.to_bits().into()); assert!(DoubleFloat(double_from_f64(-0.0), double_from_f64(-0.0)).is_integer()); assert!(!DoubleFloat(double_from_f64(3.14159), double_from_f64(-0.0)).is_integer()); assert!(!DoubleFloat(double_from_f64(-0.0), double_from_f64(3.14159)).is_integer()); }