diff options
Diffstat (limited to '')
-rw-r--r-- | compiler/rustc_apfloat/src/ieee.rs | 2758 | ||||
-rw-r--r-- | compiler/rustc_apfloat/src/lib.rs | 693 | ||||
-rw-r--r-- | compiler/rustc_apfloat/src/ppc.rs | 434 |
3 files changed, 3885 insertions, 0 deletions
diff --git a/compiler/rustc_apfloat/src/ieee.rs b/compiler/rustc_apfloat/src/ieee.rs new file mode 100644 index 000000000..3db8adb2a --- /dev/null +++ b/compiler/rustc_apfloat/src/ieee.rs @@ -0,0 +1,2758 @@ +use crate::{Category, ExpInt, IEK_INF, IEK_NAN, IEK_ZERO}; +use crate::{Float, FloatConvert, ParseError, Round, Status, StatusAnd}; + +use core::cmp::{self, Ordering}; +use core::convert::TryFrom; +use core::fmt::{self, Write}; +use core::marker::PhantomData; +use core::mem; +use core::ops::Neg; +use smallvec::{smallvec, SmallVec}; + +#[must_use] +pub struct IeeeFloat<S> { + /// Absolute significand value (including the integer bit). + sig: [Limb; 1], + + /// The signed unbiased exponent of the value. + exp: ExpInt, + + /// What kind of floating point number this is. + category: Category, + + /// Sign bit of the number. + sign: bool, + + marker: PhantomData<S>, +} + +/// Fundamental unit of big integer arithmetic, but also +/// large to store the largest significands by itself. +type Limb = u128; +const LIMB_BITS: usize = 128; +fn limbs_for_bits(bits: usize) -> usize { + (bits + LIMB_BITS - 1) / LIMB_BITS +} + +/// Enum that represents what fraction of the LSB truncated bits of an fp number +/// represent. +/// +/// This essentially combines the roles of guard and sticky bits. +#[must_use] +#[derive(Copy, Clone, PartialEq, Eq, Debug)] +enum Loss { + // Example of truncated bits: + ExactlyZero, // 000000 + LessThanHalf, // 0xxxxx x's not all zero + ExactlyHalf, // 100000 + MoreThanHalf, // 1xxxxx x's not all zero +} + +/// Represents floating point arithmetic semantics. +pub trait Semantics: Sized { + /// Total number of bits in the in-memory format. + const BITS: usize; + + /// Number of bits in the significand. This includes the integer bit. + const PRECISION: usize; + + /// The largest E such that 2<sup>E</sup> is representable; this matches the + /// definition of IEEE 754. + const MAX_EXP: ExpInt; + + /// The smallest E such that 2<sup>E</sup> is a normalized number; this + /// matches the definition of IEEE 754. + const MIN_EXP: ExpInt = -Self::MAX_EXP + 1; + + /// The significand bit that marks NaN as quiet. + const QNAN_BIT: usize = Self::PRECISION - 2; + + /// The significand bitpattern to mark a NaN as quiet. + /// NOTE: for X87DoubleExtended we need to set two bits instead of 2. + const QNAN_SIGNIFICAND: Limb = 1 << Self::QNAN_BIT; + + fn from_bits(bits: u128) -> IeeeFloat<Self> { + assert!(Self::BITS > Self::PRECISION); + + let sign = bits & (1 << (Self::BITS - 1)); + let exponent = (bits & !sign) >> (Self::PRECISION - 1); + let mut r = IeeeFloat { + sig: [bits & ((1 << (Self::PRECISION - 1)) - 1)], + // Convert the exponent from its bias representation to a signed integer. + exp: (exponent as ExpInt) - Self::MAX_EXP, + category: Category::Zero, + sign: sign != 0, + marker: PhantomData, + }; + + if r.exp == Self::MIN_EXP - 1 && r.sig == [0] { + // Exponent, significand meaningless. + r.category = Category::Zero; + } else if r.exp == Self::MAX_EXP + 1 && r.sig == [0] { + // Exponent, significand meaningless. + r.category = Category::Infinity; + } else if r.exp == Self::MAX_EXP + 1 && r.sig != [0] { + // Sign, exponent, significand meaningless. + r.category = Category::NaN; + } else { + r.category = Category::Normal; + if r.exp == Self::MIN_EXP - 1 { + // Denormal. + r.exp = Self::MIN_EXP; + } else { + // Set integer bit. + sig::set_bit(&mut r.sig, Self::PRECISION - 1); + } + } + + r + } + + fn to_bits(x: IeeeFloat<Self>) -> u128 { + assert!(Self::BITS > Self::PRECISION); + + // Split integer bit from significand. + let integer_bit = sig::get_bit(&x.sig, Self::PRECISION - 1); + let mut significand = x.sig[0] & ((1 << (Self::PRECISION - 1)) - 1); + let exponent = match x.category { + Category::Normal => { + if x.exp == Self::MIN_EXP && !integer_bit { + // Denormal. + Self::MIN_EXP - 1 + } else { + x.exp + } + } + Category::Zero => { + // FIXME(eddyb) Maybe we should guarantee an invariant instead? + significand = 0; + Self::MIN_EXP - 1 + } + Category::Infinity => { + // FIXME(eddyb) Maybe we should guarantee an invariant instead? + significand = 0; + Self::MAX_EXP + 1 + } + Category::NaN => Self::MAX_EXP + 1, + }; + + // Convert the exponent from a signed integer to its bias representation. + let exponent = (exponent + Self::MAX_EXP) as u128; + + ((x.sign as u128) << (Self::BITS - 1)) | (exponent << (Self::PRECISION - 1)) | significand + } +} + +impl<S> Copy for IeeeFloat<S> {} +impl<S> Clone for IeeeFloat<S> { + fn clone(&self) -> Self { + *self + } +} + +macro_rules! ieee_semantics { + ($($name:ident = $sem:ident($bits:tt : $exp_bits:tt)),*) => { + $(pub struct $sem;)* + $(pub type $name = IeeeFloat<$sem>;)* + $(impl Semantics for $sem { + const BITS: usize = $bits; + const PRECISION: usize = ($bits - 1 - $exp_bits) + 1; + const MAX_EXP: ExpInt = (1 << ($exp_bits - 1)) - 1; + })* + } +} + +ieee_semantics! { + Half = HalfS(16:5), + Single = SingleS(32:8), + Double = DoubleS(64:11), + Quad = QuadS(128:15) +} + +pub struct X87DoubleExtendedS; +pub type X87DoubleExtended = IeeeFloat<X87DoubleExtendedS>; +impl Semantics for X87DoubleExtendedS { + const BITS: usize = 80; + const PRECISION: usize = 64; + const MAX_EXP: ExpInt = (1 << (15 - 1)) - 1; + + /// For x87 extended precision, we want to make a NaN, not a + /// pseudo-NaN. Maybe we should expose the ability to make + /// pseudo-NaNs? + const QNAN_SIGNIFICAND: Limb = 0b11 << Self::QNAN_BIT; + + /// Integer bit is explicit in this format. Intel hardware (387 and later) + /// does not support these bit patterns: + /// exponent = all 1's, integer bit 0, significand 0 ("pseudoinfinity") + /// exponent = all 1's, integer bit 0, significand nonzero ("pseudoNaN") + /// exponent = 0, integer bit 1 ("pseudodenormal") + /// exponent != 0 nor all 1's, integer bit 0 ("unnormal") + /// At the moment, the first two are treated as NaNs, the second two as Normal. + fn from_bits(bits: u128) -> IeeeFloat<Self> { + let sign = bits & (1 << (Self::BITS - 1)); + let exponent = (bits & !sign) >> Self::PRECISION; + let mut r = IeeeFloat { + sig: [bits & ((1 << (Self::PRECISION - 1)) - 1)], + // Convert the exponent from its bias representation to a signed integer. + exp: (exponent as ExpInt) - Self::MAX_EXP, + category: Category::Zero, + sign: sign != 0, + marker: PhantomData, + }; + + if r.exp == Self::MIN_EXP - 1 && r.sig == [0] { + // Exponent, significand meaningless. + r.category = Category::Zero; + } else if r.exp == Self::MAX_EXP + 1 && r.sig == [1 << (Self::PRECISION - 1)] { + // Exponent, significand meaningless. + r.category = Category::Infinity; + } else if r.exp == Self::MAX_EXP + 1 && r.sig != [1 << (Self::PRECISION - 1)] { + // Sign, exponent, significand meaningless. + r.category = Category::NaN; + } else { + r.category = Category::Normal; + if r.exp == Self::MIN_EXP - 1 { + // Denormal. + r.exp = Self::MIN_EXP; + } + } + + r + } + + fn to_bits(x: IeeeFloat<Self>) -> u128 { + // Get integer bit from significand. + let integer_bit = sig::get_bit(&x.sig, Self::PRECISION - 1); + let mut significand = x.sig[0] & ((1 << Self::PRECISION) - 1); + let exponent = match x.category { + Category::Normal => { + if x.exp == Self::MIN_EXP && !integer_bit { + // Denormal. + Self::MIN_EXP - 1 + } else { + x.exp + } + } + Category::Zero => { + // FIXME(eddyb) Maybe we should guarantee an invariant instead? + significand = 0; + Self::MIN_EXP - 1 + } + Category::Infinity => { + // FIXME(eddyb) Maybe we should guarantee an invariant instead? + significand = 1 << (Self::PRECISION - 1); + Self::MAX_EXP + 1 + } + Category::NaN => Self::MAX_EXP + 1, + }; + + // Convert the exponent from a signed integer to its bias representation. + let exponent = (exponent + Self::MAX_EXP) as u128; + + ((x.sign as u128) << (Self::BITS - 1)) | (exponent << Self::PRECISION) | significand + } +} + +float_common_impls!(IeeeFloat<S>); + +impl<S: Semantics> PartialEq for IeeeFloat<S> { + fn eq(&self, rhs: &Self) -> bool { + self.partial_cmp(rhs) == Some(Ordering::Equal) + } +} + +impl<S: Semantics> PartialOrd for IeeeFloat<S> { + fn partial_cmp(&self, rhs: &Self) -> Option<Ordering> { + match (self.category, rhs.category) { + (Category::NaN, _) | (_, Category::NaN) => None, + + (Category::Infinity, Category::Infinity) => Some((!self.sign).cmp(&(!rhs.sign))), + + (Category::Zero, Category::Zero) => Some(Ordering::Equal), + + (Category::Infinity, _) | (Category::Normal, Category::Zero) => { + Some((!self.sign).cmp(&self.sign)) + } + + (_, Category::Infinity) | (Category::Zero, Category::Normal) => { + Some(rhs.sign.cmp(&(!rhs.sign))) + } + + (Category::Normal, Category::Normal) => { + // Two normal numbers. Do they have the same sign? + Some((!self.sign).cmp(&(!rhs.sign)).then_with(|| { + // Compare absolute values; invert result if negative. + let result = self.cmp_abs_normal(*rhs); + + if self.sign { result.reverse() } else { result } + })) + } + } + } +} + +impl<S> Neg for IeeeFloat<S> { + type Output = Self; + fn neg(mut self) -> Self { + self.sign = !self.sign; + self + } +} + +/// Prints this value as a decimal string. +/// +/// \param precision The maximum number of digits of +/// precision to output. If there are fewer digits available, +/// zero padding will not be used unless the value is +/// integral and small enough to be expressed in +/// precision digits. 0 means to use the natural +/// precision of the number. +/// \param width The maximum number of zeros to +/// consider inserting before falling back to scientific +/// notation. 0 means to always use scientific notation. +/// +/// \param alternate Indicate whether to remove the trailing zero in +/// fraction part or not. Also setting this parameter to true forces +/// producing of output more similar to default printf behavior. +/// Specifically the lower e is used as exponent delimiter and exponent +/// always contains no less than two digits. +/// +/// Number precision width Result +/// ------ --------- ----- ------ +/// 1.01E+4 5 2 10100 +/// 1.01E+4 4 2 1.01E+4 +/// 1.01E+4 5 1 1.01E+4 +/// 1.01E-2 5 2 0.0101 +/// 1.01E-2 4 2 0.0101 +/// 1.01E-2 4 1 1.01E-2 +impl<S: Semantics> fmt::Display for IeeeFloat<S> { + fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { + let width = f.width().unwrap_or(3); + let alternate = f.alternate(); + + match self.category { + Category::Infinity => { + if self.sign { + return f.write_str("-Inf"); + } else { + return f.write_str("+Inf"); + } + } + + Category::NaN => return f.write_str("NaN"), + + Category::Zero => { + if self.sign { + f.write_char('-')?; + } + + if width == 0 { + if alternate { + f.write_str("0.0")?; + if let Some(n) = f.precision() { + for _ in 1..n { + f.write_char('0')?; + } + } + f.write_str("e+00")?; + } else { + f.write_str("0.0E+0")?; + } + } else { + f.write_char('0')?; + } + return Ok(()); + } + + Category::Normal => {} + } + + if self.sign { + f.write_char('-')?; + } + + // We use enough digits so the number can be round-tripped back to an + // APFloat. The formula comes from "How to Print Floating-Point Numbers + // Accurately" by Steele and White. + // FIXME: Using a formula based purely on the precision is conservative; + // we can print fewer digits depending on the actual value being printed. + + // precision = 2 + floor(S::PRECISION / lg_2(10)) + let precision = f.precision().unwrap_or(2 + S::PRECISION * 59 / 196); + + // Decompose the number into an APInt and an exponent. + let mut exp = self.exp - (S::PRECISION as ExpInt - 1); + let mut sig = vec![self.sig[0]]; + + // Ignore trailing binary zeros. + let trailing_zeros = sig[0].trailing_zeros(); + let _: Loss = sig::shift_right(&mut sig, &mut exp, trailing_zeros as usize); + + // Change the exponent from 2^e to 10^e. + if exp == 0 { + // Nothing to do. + } else if exp > 0 { + // Just shift left. + let shift = exp as usize; + sig.resize(limbs_for_bits(S::PRECISION + shift), 0); + sig::shift_left(&mut sig, &mut exp, shift); + } else { + // exp < 0 + let mut texp = -exp as usize; + + // We transform this using the identity: + // (N)(2^-e) == (N)(5^e)(10^-e) + + // Multiply significand by 5^e. + // N * 5^0101 == N * 5^(1*1) * 5^(0*2) * 5^(1*4) * 5^(0*8) + let mut sig_scratch = vec![]; + let mut p5 = vec![]; + let mut p5_scratch = vec![]; + while texp != 0 { + if p5.is_empty() { + p5.push(5); + } else { + p5_scratch.resize(p5.len() * 2, 0); + let _: Loss = + sig::mul(&mut p5_scratch, &mut 0, &p5, &p5, p5.len() * 2 * LIMB_BITS); + while p5_scratch.last() == Some(&0) { + p5_scratch.pop(); + } + mem::swap(&mut p5, &mut p5_scratch); + } + if texp & 1 != 0 { + sig_scratch.resize(sig.len() + p5.len(), 0); + let _: Loss = sig::mul( + &mut sig_scratch, + &mut 0, + &sig, + &p5, + (sig.len() + p5.len()) * LIMB_BITS, + ); + while sig_scratch.last() == Some(&0) { + sig_scratch.pop(); + } + mem::swap(&mut sig, &mut sig_scratch); + } + texp >>= 1; + } + } + + // Fill the buffer. + let mut buffer = vec![]; + + // Ignore digits from the significand until it is no more + // precise than is required for the desired precision. + // 196/59 is a very slight overestimate of lg_2(10). + let required = (precision * 196 + 58) / 59; + let mut discard_digits = sig::omsb(&sig).saturating_sub(required) * 59 / 196; + let mut in_trail = true; + while !sig.is_empty() { + // Perform short division by 10 to extract the rightmost digit. + // rem <- sig % 10 + // sig <- sig / 10 + let mut rem = 0; + + // Use 64-bit division and remainder, with 32-bit chunks from sig. + sig::each_chunk(&mut sig, 32, |chunk| { + let chunk = chunk as u32; + let combined = ((rem as u64) << 32) | (chunk as u64); + rem = (combined % 10) as u8; + (combined / 10) as u32 as Limb + }); + + // Reduce the significand to avoid wasting time dividing 0's. + while sig.last() == Some(&0) { + sig.pop(); + } + + let digit = rem; + + // Ignore digits we don't need. + if discard_digits > 0 { + discard_digits -= 1; + exp += 1; + continue; + } + + // Drop trailing zeros. + if in_trail && digit == 0 { + exp += 1; + } else { + in_trail = false; + buffer.push(b'0' + digit); + } + } + + assert!(!buffer.is_empty(), "no characters in buffer!"); + + // Drop down to precision. + // FIXME: don't do more precise calculations above than are required. + if buffer.len() > precision { + // The most significant figures are the last ones in the buffer. + let mut first_sig = buffer.len() - precision; + + // Round. + // FIXME: this probably shouldn't use 'round half up'. + + // Rounding down is just a truncation, except we also want to drop + // trailing zeros from the new result. + if buffer[first_sig - 1] < b'5' { + while first_sig < buffer.len() && buffer[first_sig] == b'0' { + first_sig += 1; + } + } else { + // Rounding up requires a decimal add-with-carry. If we continue + // the carry, the newly-introduced zeros will just be truncated. + for x in &mut buffer[first_sig..] { + if *x == b'9' { + first_sig += 1; + } else { + *x += 1; + break; + } + } + } + + exp += first_sig as ExpInt; + buffer.drain(..first_sig); + + // If we carried through, we have exactly one digit of precision. + if buffer.is_empty() { + buffer.push(b'1'); + } + } + + let digits = buffer.len(); + + // Check whether we should use scientific notation. + let scientific = if width == 0 { + true + } else if exp >= 0 { + // 765e3 --> 765000 + // ^^^ + // But we shouldn't make the number look more precise than it is. + exp as usize > width || digits + exp as usize > precision + } else { + // Power of the most significant digit. + let msd = exp + (digits - 1) as ExpInt; + if msd >= 0 { + // 765e-2 == 7.65 + false + } else { + // 765e-5 == 0.00765 + // ^ ^^ + -msd as usize > width + } + }; + + // Scientific formatting is pretty straightforward. + if scientific { + exp += digits as ExpInt - 1; + + f.write_char(buffer[digits - 1] as char)?; + f.write_char('.')?; + let truncate_zero = !alternate; + if digits == 1 && truncate_zero { + f.write_char('0')?; + } else { + for &d in buffer[..digits - 1].iter().rev() { + f.write_char(d as char)?; + } + } + // Fill with zeros up to precision. + if !truncate_zero && precision > digits - 1 { + for _ in 0..=precision - digits { + f.write_char('0')?; + } + } + // For alternate we use lower 'e'. + f.write_char(if alternate { 'e' } else { 'E' })?; + + // Exponent always at least two digits if we do not truncate zeros. + if truncate_zero { + write!(f, "{:+}", exp)?; + } else { + write!(f, "{:+03}", exp)?; + } + + return Ok(()); + } + + // Non-scientific, positive exponents. + if exp >= 0 { + for &d in buffer.iter().rev() { + f.write_char(d as char)?; + } + for _ in 0..exp { + f.write_char('0')?; + } + return Ok(()); + } + + // Non-scientific, negative exponents. + let unit_place = -exp as usize; + if unit_place < digits { + for &d in buffer[unit_place..].iter().rev() { + f.write_char(d as char)?; + } + f.write_char('.')?; + for &d in buffer[..unit_place].iter().rev() { + f.write_char(d as char)?; + } + } else { + f.write_str("0.")?; + for _ in digits..unit_place { + f.write_char('0')?; + } + for &d in buffer.iter().rev() { + f.write_char(d as char)?; + } + } + + Ok(()) + } +} + +impl<S: Semantics> fmt::Debug for IeeeFloat<S> { + fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { + write!( + f, + "{}({:?} | {}{:?} * 2^{})", + self, + self.category, + if self.sign { "-" } else { "+" }, + self.sig, + self.exp + ) + } +} + +impl<S: Semantics> Float for IeeeFloat<S> { + const BITS: usize = S::BITS; + const PRECISION: usize = S::PRECISION; + const MAX_EXP: ExpInt = S::MAX_EXP; + const MIN_EXP: ExpInt = S::MIN_EXP; + + const ZERO: Self = IeeeFloat { + sig: [0], + exp: S::MIN_EXP - 1, + category: Category::Zero, + sign: false, + marker: PhantomData, + }; + + const INFINITY: Self = IeeeFloat { + sig: [0], + exp: S::MAX_EXP + 1, + category: Category::Infinity, + sign: false, + marker: PhantomData, + }; + + // FIXME(eddyb) remove when qnan becomes const fn. + const NAN: Self = IeeeFloat { + sig: [S::QNAN_SIGNIFICAND], + exp: S::MAX_EXP + 1, + category: Category::NaN, + sign: false, + marker: PhantomData, + }; + + fn qnan(payload: Option<u128>) -> Self { + IeeeFloat { + sig: [S::QNAN_SIGNIFICAND + | payload.map_or(0, |payload| { + // Zero out the excess bits of the significand. + payload & ((1 << S::QNAN_BIT) - 1) + })], + exp: S::MAX_EXP + 1, + category: Category::NaN, + sign: false, + marker: PhantomData, + } + } + + fn snan(payload: Option<u128>) -> Self { + let mut snan = Self::qnan(payload); + + // We always have to clear the QNaN bit to make it an SNaN. + sig::clear_bit(&mut snan.sig, S::QNAN_BIT); + + // If there are no bits set in the payload, we have to set + // *something* to make it a NaN instead of an infinity; + // conventionally, this is the next bit down from the QNaN bit. + if snan.sig[0] & !S::QNAN_SIGNIFICAND == 0 { + sig::set_bit(&mut snan.sig, S::QNAN_BIT - 1); + } + + snan + } + + fn largest() -> Self { + // We want (in interchange format): + // exponent = 1..10 + // significand = 1..1 + IeeeFloat { + sig: [(1 << S::PRECISION) - 1], + exp: S::MAX_EXP, + category: Category::Normal, + sign: false, + marker: PhantomData, + } + } + + // We want (in interchange format): + // exponent = 0..0 + // significand = 0..01 + const SMALLEST: Self = IeeeFloat { + sig: [1], + exp: S::MIN_EXP, + category: Category::Normal, + sign: false, + marker: PhantomData, + }; + + fn smallest_normalized() -> Self { + // We want (in interchange format): + // exponent = 0..0 + // significand = 10..0 + IeeeFloat { + sig: [1 << (S::PRECISION - 1)], + exp: S::MIN_EXP, + category: Category::Normal, + sign: false, + marker: PhantomData, + } + } + + fn add_r(mut self, rhs: Self, round: Round) -> StatusAnd<Self> { + let status = match (self.category, rhs.category) { + (Category::Infinity, Category::Infinity) => { + // Differently signed infinities can only be validly + // subtracted. + if self.sign != rhs.sign { + self = Self::NAN; + Status::INVALID_OP + } else { + Status::OK + } + } + + // Sign may depend on rounding mode; handled below. + (_, Category::Zero) | (Category::NaN, _) | (Category::Infinity, Category::Normal) => { + Status::OK + } + + (Category::Zero, _) | (_, Category::NaN | Category::Infinity) => { + self = rhs; + Status::OK + } + + // This return code means it was not a simple case. + (Category::Normal, Category::Normal) => { + let loss = sig::add_or_sub( + &mut self.sig, + &mut self.exp, + &mut self.sign, + &mut [rhs.sig[0]], + rhs.exp, + rhs.sign, + ); + let status; + self = unpack!(status=, self.normalize(round, loss)); + + // Can only be zero if we lost no fraction. + assert!(self.category != Category::Zero || loss == Loss::ExactlyZero); + + status + } + }; + + // If two numbers add (exactly) to zero, IEEE 754 decrees it is a + // positive zero unless rounding to minus infinity, except that + // adding two like-signed zeroes gives that zero. + if self.category == Category::Zero + && (rhs.category != Category::Zero || self.sign != rhs.sign) + { + self.sign = round == Round::TowardNegative; + } + + status.and(self) + } + + fn mul_r(mut self, rhs: Self, round: Round) -> StatusAnd<Self> { + self.sign ^= rhs.sign; + + match (self.category, rhs.category) { + (Category::NaN, _) => { + self.sign = false; + Status::OK.and(self) + } + + (_, Category::NaN) => { + self.sign = false; + self.category = Category::NaN; + self.sig = rhs.sig; + Status::OK.and(self) + } + + (Category::Zero, Category::Infinity) | (Category::Infinity, Category::Zero) => { + Status::INVALID_OP.and(Self::NAN) + } + + (_, Category::Infinity) | (Category::Infinity, _) => { + self.category = Category::Infinity; + Status::OK.and(self) + } + + (Category::Zero, _) | (_, Category::Zero) => { + self.category = Category::Zero; + Status::OK.and(self) + } + + (Category::Normal, Category::Normal) => { + self.exp += rhs.exp; + let mut wide_sig = [0; 2]; + let loss = + sig::mul(&mut wide_sig, &mut self.exp, &self.sig, &rhs.sig, S::PRECISION); + self.sig = [wide_sig[0]]; + let mut status; + self = unpack!(status=, self.normalize(round, loss)); + if loss != Loss::ExactlyZero { + status |= Status::INEXACT; + } + status.and(self) + } + } + } + + fn mul_add_r(mut self, multiplicand: Self, addend: Self, round: Round) -> StatusAnd<Self> { + // If and only if all arguments are normal do we need to do an + // extended-precision calculation. + if !self.is_finite_non_zero() || !multiplicand.is_finite_non_zero() || !addend.is_finite() { + let mut status; + self = unpack!(status=, self.mul_r(multiplicand, round)); + + // FS can only be Status::OK or Status::INVALID_OP. There is no more work + // to do in the latter case. The IEEE-754R standard says it is + // implementation-defined in this case whether, if ADDEND is a + // quiet NaN, we raise invalid op; this implementation does so. + // + // If we need to do the addition we can do so with normal + // precision. + if status == Status::OK { + self = unpack!(status=, self.add_r(addend, round)); + } + return status.and(self); + } + + // Post-multiplication sign, before addition. + self.sign ^= multiplicand.sign; + + // Allocate space for twice as many bits as the original significand, plus one + // extra bit for the addition to overflow into. + assert!(limbs_for_bits(S::PRECISION * 2 + 1) <= 2); + let mut wide_sig = sig::widening_mul(self.sig[0], multiplicand.sig[0]); + + let mut loss = Loss::ExactlyZero; + let mut omsb = sig::omsb(&wide_sig); + self.exp += multiplicand.exp; + + // Assume the operands involved in the multiplication are single-precision + // FP, and the two multiplicants are: + // lhs = a23 . a22 ... a0 * 2^e1 + // rhs = b23 . b22 ... b0 * 2^e2 + // the result of multiplication is: + // lhs = c48 c47 c46 . c45 ... c0 * 2^(e1+e2) + // Note that there are three significant bits at the left-hand side of the + // radix point: two for the multiplication, and an overflow bit for the + // addition (that will always be zero at this point). Move the radix point + // toward left by two bits, and adjust exponent accordingly. + self.exp += 2; + + if addend.is_non_zero() { + // Normalize our MSB to one below the top bit to allow for overflow. + let ext_precision = 2 * S::PRECISION + 1; + if omsb != ext_precision - 1 { + assert!(ext_precision > omsb); + sig::shift_left(&mut wide_sig, &mut self.exp, (ext_precision - 1) - omsb); + } + + // The intermediate result of the multiplication has "2 * S::PRECISION" + // significant bit; adjust the addend to be consistent with mul result. + let mut ext_addend_sig = [addend.sig[0], 0]; + + // Extend the addend significand to ext_precision - 1. This guarantees + // that the high bit of the significand is zero (same as wide_sig), + // so the addition will overflow (if it does overflow at all) into the top bit. + sig::shift_left(&mut ext_addend_sig, &mut 0, ext_precision - 1 - S::PRECISION); + loss = sig::add_or_sub( + &mut wide_sig, + &mut self.exp, + &mut self.sign, + &mut ext_addend_sig, + addend.exp + 1, + addend.sign, + ); + + omsb = sig::omsb(&wide_sig); + } + + // Convert the result having "2 * S::PRECISION" significant-bits back to the one + // having "S::PRECISION" significant-bits. First, move the radix point from + // position "2*S::PRECISION - 1" to "S::PRECISION - 1". The exponent need to be + // adjusted by "2*S::PRECISION - 1" - "S::PRECISION - 1" = "S::PRECISION". + self.exp -= S::PRECISION as ExpInt + 1; + + // In case MSB resides at the left-hand side of radix point, shift the + // mantissa right by some amount to make sure the MSB reside right before + // the radix point (i.e., "MSB . rest-significant-bits"). + if omsb > S::PRECISION { + let bits = omsb - S::PRECISION; + loss = sig::shift_right(&mut wide_sig, &mut self.exp, bits).combine(loss); + } + + self.sig[0] = wide_sig[0]; + + let mut status; + self = unpack!(status=, self.normalize(round, loss)); + if loss != Loss::ExactlyZero { + status |= Status::INEXACT; + } + + // If two numbers add (exactly) to zero, IEEE 754 decrees it is a + // positive zero unless rounding to minus infinity, except that + // adding two like-signed zeroes gives that zero. + if self.category == Category::Zero + && !status.intersects(Status::UNDERFLOW) + && self.sign != addend.sign + { + self.sign = round == Round::TowardNegative; + } + + status.and(self) + } + + fn div_r(mut self, rhs: Self, round: Round) -> StatusAnd<Self> { + self.sign ^= rhs.sign; + + match (self.category, rhs.category) { + (Category::NaN, _) => { + self.sign = false; + Status::OK.and(self) + } + + (_, Category::NaN) => { + self.category = Category::NaN; + self.sig = rhs.sig; + self.sign = false; + Status::OK.and(self) + } + + (Category::Infinity, Category::Infinity) | (Category::Zero, Category::Zero) => { + Status::INVALID_OP.and(Self::NAN) + } + + (Category::Infinity | Category::Zero, _) => Status::OK.and(self), + + (Category::Normal, Category::Infinity) => { + self.category = Category::Zero; + Status::OK.and(self) + } + + (Category::Normal, Category::Zero) => { + self.category = Category::Infinity; + Status::DIV_BY_ZERO.and(self) + } + + (Category::Normal, Category::Normal) => { + self.exp -= rhs.exp; + let dividend = self.sig[0]; + let loss = sig::div( + &mut self.sig, + &mut self.exp, + &mut [dividend], + &mut [rhs.sig[0]], + S::PRECISION, + ); + let mut status; + self = unpack!(status=, self.normalize(round, loss)); + if loss != Loss::ExactlyZero { + status |= Status::INEXACT; + } + status.and(self) + } + } + } + + fn c_fmod(mut self, rhs: Self) -> StatusAnd<Self> { + match (self.category, rhs.category) { + (Category::NaN, _) + | (Category::Zero, Category::Infinity | Category::Normal) + | (Category::Normal, Category::Infinity) => Status::OK.and(self), + + (_, Category::NaN) => { + self.sign = false; + self.category = Category::NaN; + self.sig = rhs.sig; + Status::OK.and(self) + } + + (Category::Infinity, _) | (_, Category::Zero) => Status::INVALID_OP.and(Self::NAN), + + (Category::Normal, Category::Normal) => { + while self.is_finite_non_zero() + && rhs.is_finite_non_zero() + && self.cmp_abs_normal(rhs) != Ordering::Less + { + let mut v = rhs.scalbn(self.ilogb() - rhs.ilogb()); + if self.cmp_abs_normal(v) == Ordering::Less { + v = v.scalbn(-1); + } + v.sign = self.sign; + + let status; + self = unpack!(status=, self - v); + assert_eq!(status, Status::OK); + } + Status::OK.and(self) + } + } + } + + fn round_to_integral(self, round: Round) -> StatusAnd<Self> { + // If the exponent is large enough, we know that this value is already + // integral, and the arithmetic below would potentially cause it to saturate + // to +/-Inf. Bail out early instead. + if self.is_finite_non_zero() && self.exp + 1 >= S::PRECISION as ExpInt { + return Status::OK.and(self); + } + + // The algorithm here is quite simple: we add 2^(p-1), where p is the + // precision of our format, and then subtract it back off again. The choice + // of rounding modes for the addition/subtraction determines the rounding mode + // for our integral rounding as well. + // NOTE: When the input value is negative, we do subtraction followed by + // addition instead. + assert!(S::PRECISION <= 128); + let mut status; + let magic_const = unpack!(status=, Self::from_u128(1 << (S::PRECISION - 1))); + let magic_const = magic_const.copy_sign(self); + + if status != Status::OK { + return status.and(self); + } + + let mut r = self; + r = unpack!(status=, r.add_r(magic_const, round)); + if status != Status::OK && status != Status::INEXACT { + return status.and(self); + } + + // Restore the input sign to handle 0.0/-0.0 cases correctly. + r.sub_r(magic_const, round).map(|r| r.copy_sign(self)) + } + + fn next_up(mut self) -> StatusAnd<Self> { + // Compute nextUp(x), handling each float category separately. + match self.category { + Category::Infinity => { + if self.sign { + // nextUp(-inf) = -largest + Status::OK.and(-Self::largest()) + } else { + // nextUp(+inf) = +inf + Status::OK.and(self) + } + } + Category::NaN => { + // IEEE-754R 2008 6.2 Par 2: nextUp(sNaN) = qNaN. Set Invalid flag. + // IEEE-754R 2008 6.2: nextUp(qNaN) = qNaN. Must be identity so we do not + // change the payload. + if self.is_signaling() { + // For consistency, propagate the sign of the sNaN to the qNaN. + Status::INVALID_OP.and(Self::NAN.copy_sign(self)) + } else { + Status::OK.and(self) + } + } + Category::Zero => { + // nextUp(pm 0) = +smallest + Status::OK.and(Self::SMALLEST) + } + Category::Normal => { + // nextUp(-smallest) = -0 + if self.is_smallest() && self.sign { + return Status::OK.and(-Self::ZERO); + } + + // nextUp(largest) == INFINITY + if self.is_largest() && !self.sign { + return Status::OK.and(Self::INFINITY); + } + + // Excluding the integral bit. This allows us to test for binade boundaries. + let sig_mask = (1 << (S::PRECISION - 1)) - 1; + + // nextUp(normal) == normal + inc. + if self.sign { + // If we are negative, we need to decrement the significand. + + // We only cross a binade boundary that requires adjusting the exponent + // if: + // 1. exponent != S::MIN_EXP. This implies we are not in the + // smallest binade or are dealing with denormals. + // 2. Our significand excluding the integral bit is all zeros. + let crossing_binade_boundary = + self.exp != S::MIN_EXP && self.sig[0] & sig_mask == 0; + + // Decrement the significand. + // + // We always do this since: + // 1. If we are dealing with a non-binade decrement, by definition we + // just decrement the significand. + // 2. If we are dealing with a normal -> normal binade decrement, since + // we have an explicit integral bit the fact that all bits but the + // integral bit are zero implies that subtracting one will yield a + // significand with 0 integral bit and 1 in all other spots. Thus we + // must just adjust the exponent and set the integral bit to 1. + // 3. If we are dealing with a normal -> denormal binade decrement, + // since we set the integral bit to 0 when we represent denormals, we + // just decrement the significand. + sig::decrement(&mut self.sig); + + if crossing_binade_boundary { + // Our result is a normal number. Do the following: + // 1. Set the integral bit to 1. + // 2. Decrement the exponent. + sig::set_bit(&mut self.sig, S::PRECISION - 1); + self.exp -= 1; + } + } else { + // If we are positive, we need to increment the significand. + + // We only cross a binade boundary that requires adjusting the exponent if + // the input is not a denormal and all of said input's significand bits + // are set. If all of said conditions are true: clear the significand, set + // the integral bit to 1, and increment the exponent. If we have a + // denormal always increment since moving denormals and the numbers in the + // smallest normal binade have the same exponent in our representation. + let crossing_binade_boundary = + !self.is_denormal() && self.sig[0] & sig_mask == sig_mask; + + if crossing_binade_boundary { + self.sig = [0]; + sig::set_bit(&mut self.sig, S::PRECISION - 1); + assert_ne!( + self.exp, + S::MAX_EXP, + "We can not increment an exponent beyond the MAX_EXP \ + allowed by the given floating point semantics." + ); + self.exp += 1; + } else { + sig::increment(&mut self.sig); + } + } + Status::OK.and(self) + } + } + } + + fn from_bits(input: u128) -> Self { + // Dispatch to semantics. + S::from_bits(input) + } + + fn from_u128_r(input: u128, round: Round) -> StatusAnd<Self> { + IeeeFloat { + sig: [input], + exp: S::PRECISION as ExpInt - 1, + category: Category::Normal, + sign: false, + marker: PhantomData, + } + .normalize(round, Loss::ExactlyZero) + } + + fn from_str_r(mut s: &str, mut round: Round) -> Result<StatusAnd<Self>, ParseError> { + if s.is_empty() { + return Err(ParseError("Invalid string length")); + } + + // Handle special cases. + match s { + "inf" | "INFINITY" => return Ok(Status::OK.and(Self::INFINITY)), + "-inf" | "-INFINITY" => return Ok(Status::OK.and(-Self::INFINITY)), + "nan" | "NaN" => return Ok(Status::OK.and(Self::NAN)), + "-nan" | "-NaN" => return Ok(Status::OK.and(-Self::NAN)), + _ => {} + } + + // Handle a leading minus sign. + let minus = s.starts_with('-'); + if minus || s.starts_with('+') { + s = &s[1..]; + if s.is_empty() { + return Err(ParseError("String has no digits")); + } + } + + // Adjust the rounding mode for the absolute value below. + if minus { + round = -round; + } + + let r = if s.starts_with("0x") || s.starts_with("0X") { + s = &s[2..]; + if s.is_empty() { + return Err(ParseError("Invalid string")); + } + Self::from_hexadecimal_string(s, round)? + } else { + Self::from_decimal_string(s, round)? + }; + + Ok(r.map(|r| if minus { -r } else { r })) + } + + fn to_bits(self) -> u128 { + // Dispatch to semantics. + S::to_bits(self) + } + + fn to_u128_r(self, width: usize, round: Round, is_exact: &mut bool) -> StatusAnd<u128> { + // The result of trying to convert a number too large. + let overflow = if self.sign { + // Negative numbers cannot be represented as unsigned. + 0 + } else { + // Largest unsigned integer of the given width. + !0 >> (128 - width) + }; + + *is_exact = false; + + match self.category { + Category::NaN => Status::INVALID_OP.and(0), + + Category::Infinity => Status::INVALID_OP.and(overflow), + + Category::Zero => { + // Negative zero can't be represented as an int. + *is_exact = !self.sign; + Status::OK.and(0) + } + + Category::Normal => { + let mut r = 0; + + // Step 1: place our absolute value, with any fraction truncated, in + // the destination. + let truncated_bits = if self.exp < 0 { + // Our absolute value is less than one; truncate everything. + // For exponent -1 the integer bit represents .5, look at that. + // For smaller exponents leftmost truncated bit is 0. + S::PRECISION - 1 + (-self.exp) as usize + } else { + // We want the most significant (exponent + 1) bits; the rest are + // truncated. + let bits = self.exp as usize + 1; + + // Hopelessly large in magnitude? + if bits > width { + return Status::INVALID_OP.and(overflow); + } + + if bits < S::PRECISION { + // We truncate (S::PRECISION - bits) bits. + r = self.sig[0] >> (S::PRECISION - bits); + S::PRECISION - bits + } else { + // We want at least as many bits as are available. + r = self.sig[0] << (bits - S::PRECISION); + 0 + } + }; + + // Step 2: work out any lost fraction, and increment the absolute + // value if we would round away from zero. + let mut loss = Loss::ExactlyZero; + if truncated_bits > 0 { + loss = Loss::through_truncation(&self.sig, truncated_bits); + if loss != Loss::ExactlyZero + && self.round_away_from_zero(round, loss, truncated_bits) + { + r = r.wrapping_add(1); + if r == 0 { + return Status::INVALID_OP.and(overflow); // Overflow. + } + } + } + + // Step 3: check if we fit in the destination. + if r > overflow { + return Status::INVALID_OP.and(overflow); + } + + if loss == Loss::ExactlyZero { + *is_exact = true; + Status::OK.and(r) + } else { + Status::INEXACT.and(r) + } + } + } + } + + fn cmp_abs_normal(self, rhs: Self) -> Ordering { + assert!(self.is_finite_non_zero()); + assert!(rhs.is_finite_non_zero()); + + // If exponents are equal, do an unsigned comparison of the significands. + self.exp.cmp(&rhs.exp).then_with(|| sig::cmp(&self.sig, &rhs.sig)) + } + + fn bitwise_eq(self, rhs: Self) -> bool { + if self.category != rhs.category || self.sign != rhs.sign { + return false; + } + + if self.category == Category::Zero || self.category == Category::Infinity { + return true; + } + + if self.is_finite_non_zero() && self.exp != rhs.exp { + return false; + } + + self.sig == rhs.sig + } + + fn is_negative(self) -> bool { + self.sign + } + + fn is_denormal(self) -> bool { + self.is_finite_non_zero() + && self.exp == S::MIN_EXP + && !sig::get_bit(&self.sig, S::PRECISION - 1) + } + + fn is_signaling(self) -> bool { + // IEEE-754R 2008 6.2.1: A signaling NaN bit string should be encoded with the + // first bit of the trailing significand being 0. + self.is_nan() && !sig::get_bit(&self.sig, S::QNAN_BIT) + } + + fn category(self) -> Category { + self.category + } + + fn get_exact_inverse(self) -> Option<Self> { + // Special floats and denormals have no exact inverse. + if !self.is_finite_non_zero() { + return None; + } + + // Check that the number is a power of two by making sure that only the + // integer bit is set in the significand. + if self.sig != [1 << (S::PRECISION - 1)] { + return None; + } + + // Get the inverse. + let mut reciprocal = Self::from_u128(1).value; + let status; + reciprocal = unpack!(status=, reciprocal / self); + if status != Status::OK { + return None; + } + + // Avoid multiplication with a denormal, it is not safe on all platforms and + // may be slower than a normal division. + if reciprocal.is_denormal() { + return None; + } + + assert!(reciprocal.is_finite_non_zero()); + assert_eq!(reciprocal.sig, [1 << (S::PRECISION - 1)]); + + Some(reciprocal) + } + + fn ilogb(mut self) -> ExpInt { + if self.is_nan() { + return IEK_NAN; + } + if self.is_zero() { + return IEK_ZERO; + } + if self.is_infinite() { + return IEK_INF; + } + if !self.is_denormal() { + return self.exp; + } + + let sig_bits = (S::PRECISION - 1) as ExpInt; + self.exp += sig_bits; + self = self.normalize(Round::NearestTiesToEven, Loss::ExactlyZero).value; + self.exp - sig_bits + } + + fn scalbn_r(mut self, exp: ExpInt, round: Round) -> Self { + // If exp is wildly out-of-scale, simply adding it to self.exp will + // overflow; clamp it to a safe range before adding, but ensure that the range + // is large enough that the clamp does not change the result. The range we + // need to support is the difference between the largest possible exponent and + // the normalized exponent of half the smallest denormal. + + let sig_bits = (S::PRECISION - 1) as i32; + let max_change = S::MAX_EXP as i32 - (S::MIN_EXP as i32 - sig_bits) + 1; + + // Clamp to one past the range ends to let normalize handle overflow. + let exp_change = cmp::min(cmp::max(exp as i32, -max_change - 1), max_change); + self.exp = self.exp.saturating_add(exp_change as ExpInt); + self = self.normalize(round, Loss::ExactlyZero).value; + if self.is_nan() { + sig::set_bit(&mut self.sig, S::QNAN_BIT); + } + self + } + + fn frexp_r(mut self, exp: &mut ExpInt, round: Round) -> Self { + *exp = self.ilogb(); + + // Quiet signalling nans. + if *exp == IEK_NAN { + sig::set_bit(&mut self.sig, S::QNAN_BIT); + return self; + } + + if *exp == IEK_INF { + return self; + } + + // 1 is added because frexp is defined to return a normalized fraction in + // +/-[0.5, 1.0), rather than the usual +/-[1.0, 2.0). + if *exp == IEK_ZERO { + *exp = 0; + } else { + *exp += 1; + } + self.scalbn_r(-*exp, round) + } +} + +impl<S: Semantics, T: Semantics> FloatConvert<IeeeFloat<T>> for IeeeFloat<S> { + fn convert_r(self, round: Round, loses_info: &mut bool) -> StatusAnd<IeeeFloat<T>> { + let mut r = IeeeFloat { + sig: self.sig, + exp: self.exp, + category: self.category, + sign: self.sign, + marker: PhantomData, + }; + + // x86 has some unusual NaNs which cannot be represented in any other + // format; note them here. + fn is_x87_double_extended<S: Semantics>() -> bool { + S::QNAN_SIGNIFICAND == X87DoubleExtendedS::QNAN_SIGNIFICAND + } + let x87_special_nan = is_x87_double_extended::<S>() + && !is_x87_double_extended::<T>() + && r.category == Category::NaN + && (r.sig[0] & S::QNAN_SIGNIFICAND) != S::QNAN_SIGNIFICAND; + + // If this is a truncation of a denormal number, and the target semantics + // has larger exponent range than the source semantics (this can happen + // when truncating from PowerPC double-double to double format), the + // right shift could lose result mantissa bits. Adjust exponent instead + // of performing excessive shift. + let mut shift = T::PRECISION as ExpInt - S::PRECISION as ExpInt; + if shift < 0 && r.is_finite_non_zero() { + let mut exp_change = sig::omsb(&r.sig) as ExpInt - S::PRECISION as ExpInt; + if r.exp + exp_change < T::MIN_EXP { + exp_change = T::MIN_EXP - r.exp; + } + if exp_change < shift { + exp_change = shift; + } + if exp_change < 0 { + shift -= exp_change; + r.exp += exp_change; + } + } + + // If this is a truncation, perform the shift. + let loss = if shift < 0 && (r.is_finite_non_zero() || r.category == Category::NaN) { + sig::shift_right(&mut r.sig, &mut 0, -shift as usize) + } else { + Loss::ExactlyZero + }; + + // If this is an extension, perform the shift. + if shift > 0 && (r.is_finite_non_zero() || r.category == Category::NaN) { + sig::shift_left(&mut r.sig, &mut 0, shift as usize); + } + + let status; + if r.is_finite_non_zero() { + r = unpack!(status=, r.normalize(round, loss)); + *loses_info = status != Status::OK; + } else if r.category == Category::NaN { + *loses_info = loss != Loss::ExactlyZero || x87_special_nan; + + // For x87 extended precision, we want to make a NaN, not a special NaN if + // the input wasn't special either. + if !x87_special_nan && is_x87_double_extended::<T>() { + sig::set_bit(&mut r.sig, T::PRECISION - 1); + } + + // Convert of sNaN creates qNaN and raises an exception (invalid op). + // This also guarantees that a sNaN does not become Inf on a truncation + // that loses all payload bits. + if self.is_signaling() { + // Quiet signaling NaN. + sig::set_bit(&mut r.sig, T::QNAN_BIT); + status = Status::INVALID_OP; + } else { + status = Status::OK; + } + } else { + *loses_info = false; + status = Status::OK; + } + + status.and(r) + } +} + +impl<S: Semantics> IeeeFloat<S> { + /// Handle positive overflow. We either return infinity or + /// the largest finite number. For negative overflow, + /// negate the `round` argument before calling. + fn overflow_result(round: Round) -> StatusAnd<Self> { + match round { + // Infinity? + Round::NearestTiesToEven | Round::NearestTiesToAway | Round::TowardPositive => { + (Status::OVERFLOW | Status::INEXACT).and(Self::INFINITY) + } + // Otherwise we become the largest finite number. + Round::TowardNegative | Round::TowardZero => Status::INEXACT.and(Self::largest()), + } + } + + /// Returns `true` if, when truncating the current number, with `bit` the + /// new LSB, with the given lost fraction and rounding mode, the result + /// would need to be rounded away from zero (i.e., by increasing the + /// signficand). This routine must work for `Category::Zero` of both signs, and + /// `Category::Normal` numbers. + fn round_away_from_zero(&self, round: Round, loss: Loss, bit: usize) -> bool { + // NaNs and infinities should not have lost fractions. + assert!(self.is_finite_non_zero() || self.is_zero()); + + // Current callers never pass this so we don't handle it. + assert_ne!(loss, Loss::ExactlyZero); + + match round { + Round::NearestTiesToAway => loss == Loss::ExactlyHalf || loss == Loss::MoreThanHalf, + Round::NearestTiesToEven => { + if loss == Loss::MoreThanHalf { + return true; + } + + // Our zeros don't have a significand to test. + if loss == Loss::ExactlyHalf && self.category != Category::Zero { + return sig::get_bit(&self.sig, bit); + } + + false + } + Round::TowardZero => false, + Round::TowardPositive => !self.sign, + Round::TowardNegative => self.sign, + } + } + + fn normalize(mut self, round: Round, mut loss: Loss) -> StatusAnd<Self> { + if !self.is_finite_non_zero() { + return Status::OK.and(self); + } + + // Before rounding normalize the exponent of Category::Normal numbers. + let mut omsb = sig::omsb(&self.sig); + + if omsb > 0 { + // OMSB is numbered from 1. We want to place it in the integer + // bit numbered PRECISION if possible, with a compensating change in + // the exponent. + let mut final_exp = self.exp.saturating_add(omsb as ExpInt - S::PRECISION as ExpInt); + + // If the resulting exponent is too high, overflow according to + // the rounding mode. + if final_exp > S::MAX_EXP { + let round = if self.sign { -round } else { round }; + return Self::overflow_result(round).map(|r| r.copy_sign(self)); + } + + // Subnormal numbers have exponent MIN_EXP, and their MSB + // is forced based on that. + if final_exp < S::MIN_EXP { + final_exp = S::MIN_EXP; + } + + // Shifting left is easy as we don't lose precision. + if final_exp < self.exp { + assert_eq!(loss, Loss::ExactlyZero); + + let exp_change = (self.exp - final_exp) as usize; + sig::shift_left(&mut self.sig, &mut self.exp, exp_change); + + return Status::OK.and(self); + } + + // Shift right and capture any new lost fraction. + if final_exp > self.exp { + let exp_change = (final_exp - self.exp) as usize; + loss = sig::shift_right(&mut self.sig, &mut self.exp, exp_change).combine(loss); + + // Keep OMSB up-to-date. + omsb = omsb.saturating_sub(exp_change); + } + } + + // Now round the number according to round given the lost + // fraction. + + // As specified in IEEE 754, since we do not trap we do not report + // underflow for exact results. + if loss == Loss::ExactlyZero { + // Canonicalize zeros. + if omsb == 0 { + self.category = Category::Zero; + } + + return Status::OK.and(self); + } + + // Increment the significand if we're rounding away from zero. + if self.round_away_from_zero(round, loss, 0) { + if omsb == 0 { + self.exp = S::MIN_EXP; + } + + // We should never overflow. + assert_eq!(sig::increment(&mut self.sig), 0); + omsb = sig::omsb(&self.sig); + + // Did the significand increment overflow? + if omsb == S::PRECISION + 1 { + // Renormalize by incrementing the exponent and shifting our + // significand right one. However if we already have the + // maximum exponent we overflow to infinity. + if self.exp == S::MAX_EXP { + self.category = Category::Infinity; + + return (Status::OVERFLOW | Status::INEXACT).and(self); + } + + let _: Loss = sig::shift_right(&mut self.sig, &mut self.exp, 1); + + return Status::INEXACT.and(self); + } + } + + // The normal case - we were and are not denormal, and any + // significand increment above didn't overflow. + if omsb == S::PRECISION { + return Status::INEXACT.and(self); + } + + // We have a non-zero denormal. + assert!(omsb < S::PRECISION); + + // Canonicalize zeros. + if omsb == 0 { + self.category = Category::Zero; + } + + // The Category::Zero case is a denormal that underflowed to zero. + (Status::UNDERFLOW | Status::INEXACT).and(self) + } + + fn from_hexadecimal_string(s: &str, round: Round) -> Result<StatusAnd<Self>, ParseError> { + let mut r = IeeeFloat { + sig: [0], + exp: 0, + category: Category::Normal, + sign: false, + marker: PhantomData, + }; + + let mut any_digits = false; + let mut has_exp = false; + let mut bit_pos = LIMB_BITS as isize; + let mut loss = None; + + // Without leading or trailing zeros, irrespective of the dot. + let mut first_sig_digit = None; + let mut dot = s.len(); + + for (p, c) in s.char_indices() { + // Skip leading zeros and any (hexa)decimal point. + if c == '.' { + if dot != s.len() { + return Err(ParseError("String contains multiple dots")); + } + dot = p; + } else if let Some(hex_value) = c.to_digit(16) { + any_digits = true; + + if first_sig_digit.is_none() { + if hex_value == 0 { + continue; + } + first_sig_digit = Some(p); + } + + // Store the number while we have space. + bit_pos -= 4; + if bit_pos >= 0 { + r.sig[0] |= (hex_value as Limb) << bit_pos; + // If zero or one-half (the hexadecimal digit 8) are followed + // by non-zero, they're a little more than zero or one-half. + } else if let Some(ref mut loss) = loss { + if hex_value != 0 { + if *loss == Loss::ExactlyZero { + *loss = Loss::LessThanHalf; + } + if *loss == Loss::ExactlyHalf { + *loss = Loss::MoreThanHalf; + } + } + } else { + loss = Some(match hex_value { + 0 => Loss::ExactlyZero, + 1..=7 => Loss::LessThanHalf, + 8 => Loss::ExactlyHalf, + 9..=15 => Loss::MoreThanHalf, + _ => unreachable!(), + }); + } + } else if c == 'p' || c == 'P' { + if !any_digits { + return Err(ParseError("Significand has no digits")); + } + + if dot == s.len() { + dot = p; + } + + let mut chars = s[p + 1..].chars().peekable(); + + // Adjust for the given exponent. + let exp_minus = chars.peek() == Some(&'-'); + if exp_minus || chars.peek() == Some(&'+') { + chars.next(); + } + + for c in chars { + if let Some(value) = c.to_digit(10) { + has_exp = true; + r.exp = r.exp.saturating_mul(10).saturating_add(value as ExpInt); + } else { + return Err(ParseError("Invalid character in exponent")); + } + } + if !has_exp { + return Err(ParseError("Exponent has no digits")); + } + + if exp_minus { + r.exp = -r.exp; + } + + break; + } else { + return Err(ParseError("Invalid character in significand")); + } + } + if !any_digits { + return Err(ParseError("Significand has no digits")); + } + + // Hex floats require an exponent but not a hexadecimal point. + if !has_exp { + return Err(ParseError("Hex strings require an exponent")); + } + + // Ignore the exponent if we are zero. + let first_sig_digit = match first_sig_digit { + Some(p) => p, + None => return Ok(Status::OK.and(Self::ZERO)), + }; + + // Calculate the exponent adjustment implicit in the number of + // significant digits and adjust for writing the significand starting + // at the most significant nibble. + let exp_adjustment = if dot > first_sig_digit { + ExpInt::try_from(dot - first_sig_digit).unwrap() + } else { + -ExpInt::try_from(first_sig_digit - dot - 1).unwrap() + }; + let exp_adjustment = exp_adjustment + .saturating_mul(4) + .saturating_sub(1) + .saturating_add(S::PRECISION as ExpInt) + .saturating_sub(LIMB_BITS as ExpInt); + r.exp = r.exp.saturating_add(exp_adjustment); + + Ok(r.normalize(round, loss.unwrap_or(Loss::ExactlyZero))) + } + + fn from_decimal_string(s: &str, round: Round) -> Result<StatusAnd<Self>, ParseError> { + // Given a normal decimal floating point number of the form + // + // dddd.dddd[eE][+-]ddd + // + // where the decimal point and exponent are optional, fill out the + // variables below. Exponent is appropriate if the significand is + // treated as an integer, and normalized_exp if the significand + // is taken to have the decimal point after a single leading + // non-zero digit. + // + // If the value is zero, first_sig_digit is None. + + let mut any_digits = false; + let mut dec_exp = 0i32; + + // Without leading or trailing zeros, irrespective of the dot. + let mut first_sig_digit = None; + let mut last_sig_digit = 0; + let mut dot = s.len(); + + for (p, c) in s.char_indices() { + if c == '.' { + if dot != s.len() { + return Err(ParseError("String contains multiple dots")); + } + dot = p; + } else if let Some(dec_value) = c.to_digit(10) { + any_digits = true; + + if dec_value != 0 { + if first_sig_digit.is_none() { + first_sig_digit = Some(p); + } + last_sig_digit = p; + } + } else if c == 'e' || c == 'E' { + if !any_digits { + return Err(ParseError("Significand has no digits")); + } + + if dot == s.len() { + dot = p; + } + + let mut chars = s[p + 1..].chars().peekable(); + + // Adjust for the given exponent. + let exp_minus = chars.peek() == Some(&'-'); + if exp_minus || chars.peek() == Some(&'+') { + chars.next(); + } + + any_digits = false; + for c in chars { + if let Some(value) = c.to_digit(10) { + any_digits = true; + dec_exp = dec_exp.saturating_mul(10).saturating_add(value as i32); + } else { + return Err(ParseError("Invalid character in exponent")); + } + } + if !any_digits { + return Err(ParseError("Exponent has no digits")); + } + + if exp_minus { + dec_exp = -dec_exp; + } + + break; + } else { + return Err(ParseError("Invalid character in significand")); + } + } + if !any_digits { + return Err(ParseError("Significand has no digits")); + } + + // Test if we have a zero number allowing for non-zero exponents. + let first_sig_digit = match first_sig_digit { + Some(p) => p, + None => return Ok(Status::OK.and(Self::ZERO)), + }; + + // Adjust the exponents for any decimal point. + if dot > last_sig_digit { + dec_exp = dec_exp.saturating_add((dot - last_sig_digit - 1) as i32); + } else { + dec_exp = dec_exp.saturating_sub((last_sig_digit - dot) as i32); + } + let significand_digits = last_sig_digit - first_sig_digit + 1 + - (dot > first_sig_digit && dot < last_sig_digit) as usize; + let normalized_exp = dec_exp.saturating_add(significand_digits as i32 - 1); + + // Handle the cases where exponents are obviously too large or too + // small. Writing L for log 10 / log 2, a number d.ddddd*10^dec_exp + // definitely overflows if + // + // (dec_exp - 1) * L >= MAX_EXP + // + // and definitely underflows to zero where + // + // (dec_exp + 1) * L <= MIN_EXP - PRECISION + // + // With integer arithmetic the tightest bounds for L are + // + // 93/28 < L < 196/59 [ numerator <= 256 ] + // 42039/12655 < L < 28738/8651 [ numerator <= 65536 ] + + // Check for MAX_EXP. + if normalized_exp.saturating_sub(1).saturating_mul(42039) >= 12655 * S::MAX_EXP as i32 { + // Overflow and round. + return Ok(Self::overflow_result(round)); + } + + // Check for MIN_EXP. + if normalized_exp.saturating_add(1).saturating_mul(28738) + <= 8651 * (S::MIN_EXP as i32 - S::PRECISION as i32) + { + // Underflow to zero and round. + let r = + if round == Round::TowardPositive { IeeeFloat::SMALLEST } else { IeeeFloat::ZERO }; + return Ok((Status::UNDERFLOW | Status::INEXACT).and(r)); + } + + // A tight upper bound on number of bits required to hold an + // N-digit decimal integer is N * 196 / 59. Allocate enough space + // to hold the full significand, and an extra limb required by + // tcMultiplyPart. + let max_limbs = limbs_for_bits(1 + 196 * significand_digits / 59); + let mut dec_sig: SmallVec<[Limb; 1]> = SmallVec::with_capacity(max_limbs); + + // Convert to binary efficiently - we do almost all multiplication + // in a Limb. When this would overflow do we do a single + // bignum multiplication, and then revert again to multiplication + // in a Limb. + let mut chars = s[first_sig_digit..=last_sig_digit].chars(); + loop { + let mut val = 0; + let mut multiplier = 1; + + loop { + let dec_value = match chars.next() { + Some('.') => continue, + Some(c) => c.to_digit(10).unwrap(), + None => break, + }; + + multiplier *= 10; + val = val * 10 + dec_value as Limb; + + // The maximum number that can be multiplied by ten with any + // digit added without overflowing a Limb. + if multiplier > (!0 - 9) / 10 { + break; + } + } + + // If we've consumed no digits, we're done. + if multiplier == 1 { + break; + } + + // Multiply out the current limb. + let mut carry = val; + for x in &mut dec_sig { + let [low, mut high] = sig::widening_mul(*x, multiplier); + + // Now add carry. + let (low, overflow) = low.overflowing_add(carry); + high += overflow as Limb; + + *x = low; + carry = high; + } + + // If we had carry, we need another limb (likely but not guaranteed). + if carry > 0 { + dec_sig.push(carry); + } + } + + // Calculate pow(5, abs(dec_exp)) into `pow5_full`. + // The *_calc Vec's are reused scratch space, as an optimization. + let (pow5_full, mut pow5_calc, mut sig_calc, mut sig_scratch_calc) = { + let mut power = dec_exp.abs() as usize; + + const FIRST_EIGHT_POWERS: [Limb; 8] = [1, 5, 25, 125, 625, 3125, 15625, 78125]; + + let mut p5_scratch = smallvec![]; + let mut p5: SmallVec<[Limb; 1]> = smallvec![FIRST_EIGHT_POWERS[4]]; + + let mut r_scratch = smallvec![]; + let mut r: SmallVec<[Limb; 1]> = smallvec![FIRST_EIGHT_POWERS[power & 7]]; + power >>= 3; + + while power > 0 { + // Calculate pow(5,pow(2,n+3)). + p5_scratch.resize(p5.len() * 2, 0); + let _: Loss = sig::mul(&mut p5_scratch, &mut 0, &p5, &p5, p5.len() * 2 * LIMB_BITS); + while p5_scratch.last() == Some(&0) { + p5_scratch.pop(); + } + mem::swap(&mut p5, &mut p5_scratch); + + if power & 1 != 0 { + r_scratch.resize(r.len() + p5.len(), 0); + let _: Loss = + sig::mul(&mut r_scratch, &mut 0, &r, &p5, (r.len() + p5.len()) * LIMB_BITS); + while r_scratch.last() == Some(&0) { + r_scratch.pop(); + } + mem::swap(&mut r, &mut r_scratch); + } + + power >>= 1; + } + + (r, r_scratch, p5, p5_scratch) + }; + + // Attempt dec_sig * 10^dec_exp with increasing precision. + let mut attempt = 0; + loop { + let calc_precision = (LIMB_BITS << attempt) - 1; + attempt += 1; + + let calc_normal_from_limbs = |sig: &mut SmallVec<[Limb; 1]>, + limbs: &[Limb]| + -> StatusAnd<ExpInt> { + sig.resize(limbs_for_bits(calc_precision), 0); + let (mut loss, mut exp) = sig::from_limbs(sig, limbs, calc_precision); + + // Before rounding normalize the exponent of Category::Normal numbers. + let mut omsb = sig::omsb(sig); + + assert_ne!(omsb, 0); + + // OMSB is numbered from 1. We want to place it in the integer + // bit numbered PRECISION if possible, with a compensating change in + // the exponent. + let final_exp = exp.saturating_add(omsb as ExpInt - calc_precision as ExpInt); + + // Shifting left is easy as we don't lose precision. + if final_exp < exp { + assert_eq!(loss, Loss::ExactlyZero); + + let exp_change = (exp - final_exp) as usize; + sig::shift_left(sig, &mut exp, exp_change); + + return Status::OK.and(exp); + } + + // Shift right and capture any new lost fraction. + if final_exp > exp { + let exp_change = (final_exp - exp) as usize; + loss = sig::shift_right(sig, &mut exp, exp_change).combine(loss); + + // Keep OMSB up-to-date. + omsb = omsb.saturating_sub(exp_change); + } + + assert_eq!(omsb, calc_precision); + + // Now round the number according to round given the lost + // fraction. + + // As specified in IEEE 754, since we do not trap we do not report + // underflow for exact results. + if loss == Loss::ExactlyZero { + return Status::OK.and(exp); + } + + // Increment the significand if we're rounding away from zero. + if loss == Loss::MoreThanHalf || loss == Loss::ExactlyHalf && sig::get_bit(sig, 0) { + // We should never overflow. + assert_eq!(sig::increment(sig), 0); + omsb = sig::omsb(sig); + + // Did the significand increment overflow? + if omsb == calc_precision + 1 { + let _: Loss = sig::shift_right(sig, &mut exp, 1); + + return Status::INEXACT.and(exp); + } + } + + // The normal case - we were and are not denormal, and any + // significand increment above didn't overflow. + Status::INEXACT.and(exp) + }; + + let status; + let mut exp = unpack!(status=, + calc_normal_from_limbs(&mut sig_calc, &dec_sig)); + let pow5_status; + let pow5_exp = unpack!(pow5_status=, + calc_normal_from_limbs(&mut pow5_calc, &pow5_full)); + + // Add dec_exp, as 10^n = 5^n * 2^n. + exp += dec_exp as ExpInt; + + let mut used_bits = S::PRECISION; + let mut truncated_bits = calc_precision - used_bits; + + let half_ulp_err1 = (status != Status::OK) as Limb; + let (calc_loss, half_ulp_err2); + if dec_exp >= 0 { + exp += pow5_exp; + + sig_scratch_calc.resize(sig_calc.len() + pow5_calc.len(), 0); + calc_loss = sig::mul( + &mut sig_scratch_calc, + &mut exp, + &sig_calc, + &pow5_calc, + calc_precision, + ); + mem::swap(&mut sig_calc, &mut sig_scratch_calc); + + half_ulp_err2 = (pow5_status != Status::OK) as Limb; + } else { + exp -= pow5_exp; + + sig_scratch_calc.resize(sig_calc.len(), 0); + calc_loss = sig::div( + &mut sig_scratch_calc, + &mut exp, + &mut sig_calc, + &mut pow5_calc, + calc_precision, + ); + mem::swap(&mut sig_calc, &mut sig_scratch_calc); + + // Denormal numbers have less precision. + if exp < S::MIN_EXP { + truncated_bits += (S::MIN_EXP - exp) as usize; + used_bits = calc_precision.saturating_sub(truncated_bits); + } + // Extra half-ulp lost in reciprocal of exponent. + half_ulp_err2 = + 2 * (pow5_status != Status::OK || calc_loss != Loss::ExactlyZero) as Limb; + } + + // Both sig::mul and sig::div return the + // result with the integer bit set. + assert!(sig::get_bit(&sig_calc, calc_precision - 1)); + + // The error from the true value, in half-ulps, on multiplying two + // floating point numbers, which differ from the value they + // approximate by at most half_ulp_err1 and half_ulp_err2 half-ulps, is strictly less + // than the returned value. + // + // See "How to Read Floating Point Numbers Accurately" by William D Clinger. + assert!(half_ulp_err1 < 2 || half_ulp_err2 < 2 || (half_ulp_err1 + half_ulp_err2 < 8)); + + let inexact = (calc_loss != Loss::ExactlyZero) as Limb; + let half_ulp_err = if half_ulp_err1 + half_ulp_err2 == 0 { + inexact * 2 // <= inexact half-ulps. + } else { + inexact + 2 * (half_ulp_err1 + half_ulp_err2) + }; + + let ulps_from_boundary = { + let bits = calc_precision - used_bits - 1; + + let i = bits / LIMB_BITS; + let limb = sig_calc[i] & (!0 >> (LIMB_BITS - 1 - bits % LIMB_BITS)); + let boundary = match round { + Round::NearestTiesToEven | Round::NearestTiesToAway => 1 << (bits % LIMB_BITS), + _ => 0, + }; + if i == 0 { + let delta = limb.wrapping_sub(boundary); + cmp::min(delta, delta.wrapping_neg()) + } else if limb == boundary { + if !sig::is_all_zeros(&sig_calc[1..i]) { + !0 // A lot. + } else { + sig_calc[0] + } + } else if limb == boundary.wrapping_sub(1) { + if sig_calc[1..i].iter().any(|&x| x.wrapping_neg() != 1) { + !0 // A lot. + } else { + sig_calc[0].wrapping_neg() + } + } else { + !0 // A lot. + } + }; + + // Are we guaranteed to round correctly if we truncate? + if ulps_from_boundary.saturating_mul(2) >= half_ulp_err { + let mut r = IeeeFloat { + sig: [0], + exp, + category: Category::Normal, + sign: false, + marker: PhantomData, + }; + sig::extract(&mut r.sig, &sig_calc, used_bits, calc_precision - used_bits); + // If we extracted less bits above we must adjust our exponent + // to compensate for the implicit right shift. + r.exp += (S::PRECISION - used_bits) as ExpInt; + let loss = Loss::through_truncation(&sig_calc, truncated_bits); + return Ok(r.normalize(round, loss)); + } + } + } +} + +impl Loss { + /// Combine the effect of two lost fractions. + fn combine(self, less_significant: Loss) -> Loss { + let mut more_significant = self; + if less_significant != Loss::ExactlyZero { + if more_significant == Loss::ExactlyZero { + more_significant = Loss::LessThanHalf; + } else if more_significant == Loss::ExactlyHalf { + more_significant = Loss::MoreThanHalf; + } + } + + more_significant + } + + /// Returns the fraction lost were a bignum truncated losing the least + /// significant `bits` bits. + fn through_truncation(limbs: &[Limb], bits: usize) -> Loss { + if bits == 0 { + return Loss::ExactlyZero; + } + + let half_bit = bits - 1; + let half_limb = half_bit / LIMB_BITS; + let (half_limb, rest) = if half_limb < limbs.len() { + (limbs[half_limb], &limbs[..half_limb]) + } else { + (0, limbs) + }; + let half = 1 << (half_bit % LIMB_BITS); + let has_half = half_limb & half != 0; + let has_rest = half_limb & (half - 1) != 0 || !sig::is_all_zeros(rest); + + match (has_half, has_rest) { + (false, false) => Loss::ExactlyZero, + (false, true) => Loss::LessThanHalf, + (true, false) => Loss::ExactlyHalf, + (true, true) => Loss::MoreThanHalf, + } + } +} + +/// Implementation details of IeeeFloat significands, such as big integer arithmetic. +/// As a rule of thumb, no functions in this module should dynamically allocate. +mod sig { + use super::{limbs_for_bits, ExpInt, Limb, Loss, LIMB_BITS}; + use core::cmp::Ordering; + use core::iter; + use core::mem; + + pub(super) fn is_all_zeros(limbs: &[Limb]) -> bool { + limbs.iter().all(|&l| l == 0) + } + + /// One, not zero, based LSB. That is, returns 0 for a zeroed significand. + pub(super) fn olsb(limbs: &[Limb]) -> usize { + limbs + .iter() + .enumerate() + .find(|(_, &limb)| limb != 0) + .map_or(0, |(i, limb)| i * LIMB_BITS + limb.trailing_zeros() as usize + 1) + } + + /// One, not zero, based MSB. That is, returns 0 for a zeroed significand. + pub(super) fn omsb(limbs: &[Limb]) -> usize { + limbs + .iter() + .enumerate() + .rfind(|(_, &limb)| limb != 0) + .map_or(0, |(i, limb)| (i + 1) * LIMB_BITS - limb.leading_zeros() as usize) + } + + /// Comparison (unsigned) of two significands. + pub(super) fn cmp(a: &[Limb], b: &[Limb]) -> Ordering { + assert_eq!(a.len(), b.len()); + for (a, b) in a.iter().zip(b).rev() { + match a.cmp(b) { + Ordering::Equal => {} + o => return o, + } + } + + Ordering::Equal + } + + /// Extracts the given bit. + pub(super) fn get_bit(limbs: &[Limb], bit: usize) -> bool { + limbs[bit / LIMB_BITS] & (1 << (bit % LIMB_BITS)) != 0 + } + + /// Sets the given bit. + pub(super) fn set_bit(limbs: &mut [Limb], bit: usize) { + limbs[bit / LIMB_BITS] |= 1 << (bit % LIMB_BITS); + } + + /// Clear the given bit. + pub(super) fn clear_bit(limbs: &mut [Limb], bit: usize) { + limbs[bit / LIMB_BITS] &= !(1 << (bit % LIMB_BITS)); + } + + /// Shifts `dst` left `bits` bits, subtract `bits` from its exponent. + pub(super) fn shift_left(dst: &mut [Limb], exp: &mut ExpInt, bits: usize) { + if bits > 0 { + // Our exponent should not underflow. + *exp = exp.checked_sub(bits as ExpInt).unwrap(); + + // Jump is the inter-limb jump; shift is the intra-limb shift. + let jump = bits / LIMB_BITS; + let shift = bits % LIMB_BITS; + + for i in (0..dst.len()).rev() { + let mut limb; + + if i < jump { + limb = 0; + } else { + // dst[i] comes from the two limbs src[i - jump] and, if we have + // an intra-limb shift, src[i - jump - 1]. + limb = dst[i - jump]; + if shift > 0 { + limb <<= shift; + if i > jump { + limb |= dst[i - jump - 1] >> (LIMB_BITS - shift); + } + } + } + + dst[i] = limb; + } + } + } + + /// Shifts `dst` right `bits` bits noting lost fraction. + pub(super) fn shift_right(dst: &mut [Limb], exp: &mut ExpInt, bits: usize) -> Loss { + let loss = Loss::through_truncation(dst, bits); + + if bits > 0 { + // Our exponent should not overflow. + *exp = exp.checked_add(bits as ExpInt).unwrap(); + + // Jump is the inter-limb jump; shift is the intra-limb shift. + let jump = bits / LIMB_BITS; + let shift = bits % LIMB_BITS; + + // Perform the shift. This leaves the most significant `bits` bits + // of the result at zero. + for i in 0..dst.len() { + let mut limb; + + if i + jump >= dst.len() { + limb = 0; + } else { + limb = dst[i + jump]; + if shift > 0 { + limb >>= shift; + if i + jump + 1 < dst.len() { + limb |= dst[i + jump + 1] << (LIMB_BITS - shift); + } + } + } + + dst[i] = limb; + } + } + + loss + } + + /// Copies the bit vector of width `src_bits` from `src`, starting at bit SRC_LSB, + /// to `dst`, such that the bit SRC_LSB becomes the least significant bit of `dst`. + /// All high bits above `src_bits` in `dst` are zero-filled. + pub(super) fn extract(dst: &mut [Limb], src: &[Limb], src_bits: usize, src_lsb: usize) { + if src_bits == 0 { + return; + } + + let dst_limbs = limbs_for_bits(src_bits); + assert!(dst_limbs <= dst.len()); + + let src = &src[src_lsb / LIMB_BITS..]; + dst[..dst_limbs].copy_from_slice(&src[..dst_limbs]); + + let shift = src_lsb % LIMB_BITS; + let _: Loss = shift_right(&mut dst[..dst_limbs], &mut 0, shift); + + // We now have (dst_limbs * LIMB_BITS - shift) bits from `src` + // in `dst`. If this is less that src_bits, append the rest, else + // clear the high bits. + let n = dst_limbs * LIMB_BITS - shift; + if n < src_bits { + let mask = (1 << (src_bits - n)) - 1; + dst[dst_limbs - 1] |= (src[dst_limbs] & mask) << (n % LIMB_BITS); + } else if n > src_bits && src_bits % LIMB_BITS > 0 { + dst[dst_limbs - 1] &= (1 << (src_bits % LIMB_BITS)) - 1; + } + + // Clear high limbs. + for x in &mut dst[dst_limbs..] { + *x = 0; + } + } + + /// We want the most significant PRECISION bits of `src`. There may not + /// be that many; extract what we can. + pub(super) fn from_limbs(dst: &mut [Limb], src: &[Limb], precision: usize) -> (Loss, ExpInt) { + let omsb = omsb(src); + + if precision <= omsb { + extract(dst, src, precision, omsb - precision); + (Loss::through_truncation(src, omsb - precision), omsb as ExpInt - 1) + } else { + extract(dst, src, omsb, 0); + (Loss::ExactlyZero, precision as ExpInt - 1) + } + } + + /// For every consecutive chunk of `bits` bits from `limbs`, + /// going from most significant to the least significant bits, + /// call `f` to transform those bits and store the result back. + pub(super) fn each_chunk<F: FnMut(Limb) -> Limb>(limbs: &mut [Limb], bits: usize, mut f: F) { + assert_eq!(LIMB_BITS % bits, 0); + for limb in limbs.iter_mut().rev() { + let mut r = 0; + for i in (0..LIMB_BITS / bits).rev() { + r |= f((*limb >> (i * bits)) & ((1 << bits) - 1)) << (i * bits); + } + *limb = r; + } + } + + /// Increment in-place, return the carry flag. + pub(super) fn increment(dst: &mut [Limb]) -> Limb { + for x in dst { + *x = x.wrapping_add(1); + if *x != 0 { + return 0; + } + } + + 1 + } + + /// Decrement in-place, return the borrow flag. + pub(super) fn decrement(dst: &mut [Limb]) -> Limb { + for x in dst { + *x = x.wrapping_sub(1); + if *x != !0 { + return 0; + } + } + + 1 + } + + /// `a += b + c` where `c` is zero or one. Returns the carry flag. + pub(super) fn add(a: &mut [Limb], b: &[Limb], mut c: Limb) -> Limb { + assert!(c <= 1); + + for (a, &b) in iter::zip(a, b) { + let (r, overflow) = a.overflowing_add(b); + let (r, overflow2) = r.overflowing_add(c); + *a = r; + c = (overflow | overflow2) as Limb; + } + + c + } + + /// `a -= b + c` where `c` is zero or one. Returns the borrow flag. + pub(super) fn sub(a: &mut [Limb], b: &[Limb], mut c: Limb) -> Limb { + assert!(c <= 1); + + for (a, &b) in iter::zip(a, b) { + let (r, overflow) = a.overflowing_sub(b); + let (r, overflow2) = r.overflowing_sub(c); + *a = r; + c = (overflow | overflow2) as Limb; + } + + c + } + + /// `a += b` or `a -= b`. Does not preserve `b`. + pub(super) fn add_or_sub( + a_sig: &mut [Limb], + a_exp: &mut ExpInt, + a_sign: &mut bool, + b_sig: &mut [Limb], + b_exp: ExpInt, + b_sign: bool, + ) -> Loss { + // Are we bigger exponent-wise than the RHS? + let bits = *a_exp - b_exp; + + // Determine if the operation on the absolute values is effectively + // an addition or subtraction. + // Subtraction is more subtle than one might naively expect. + if *a_sign ^ b_sign { + let (reverse, loss); + + if bits == 0 { + reverse = cmp(a_sig, b_sig) == Ordering::Less; + loss = Loss::ExactlyZero; + } else if bits > 0 { + loss = shift_right(b_sig, &mut 0, (bits - 1) as usize); + shift_left(a_sig, a_exp, 1); + reverse = false; + } else { + loss = shift_right(a_sig, a_exp, (-bits - 1) as usize); + shift_left(b_sig, &mut 0, 1); + reverse = true; + } + + let borrow = (loss != Loss::ExactlyZero) as Limb; + if reverse { + // The code above is intended to ensure that no borrow is necessary. + assert_eq!(sub(b_sig, a_sig, borrow), 0); + a_sig.copy_from_slice(b_sig); + *a_sign = !*a_sign; + } else { + // The code above is intended to ensure that no borrow is necessary. + assert_eq!(sub(a_sig, b_sig, borrow), 0); + } + + // Invert the lost fraction - it was on the RHS and subtracted. + match loss { + Loss::LessThanHalf => Loss::MoreThanHalf, + Loss::MoreThanHalf => Loss::LessThanHalf, + _ => loss, + } + } else { + let loss = if bits > 0 { + shift_right(b_sig, &mut 0, bits as usize) + } else { + shift_right(a_sig, a_exp, -bits as usize) + }; + // We have a guard bit; generating a carry cannot happen. + assert_eq!(add(a_sig, b_sig, 0), 0); + loss + } + } + + /// `[low, high] = a * b`. + /// + /// This cannot overflow, because + /// + /// `(n - 1) * (n - 1) + 2 * (n - 1) == (n - 1) * (n + 1)` + /// + /// which is less than n<sup>2</sup>. + pub(super) fn widening_mul(a: Limb, b: Limb) -> [Limb; 2] { + let mut wide = [0, 0]; + + if a == 0 || b == 0 { + return wide; + } + + const HALF_BITS: usize = LIMB_BITS / 2; + + let select = |limb, i| (limb >> (i * HALF_BITS)) & ((1 << HALF_BITS) - 1); + for i in 0..2 { + for j in 0..2 { + let mut x = [select(a, i) * select(b, j), 0]; + shift_left(&mut x, &mut 0, (i + j) * HALF_BITS); + assert_eq!(add(&mut wide, &x, 0), 0); + } + } + + wide + } + + /// `dst = a * b` (for normal `a` and `b`). Returns the lost fraction. + pub(super) fn mul<'a>( + dst: &mut [Limb], + exp: &mut ExpInt, + mut a: &'a [Limb], + mut b: &'a [Limb], + precision: usize, + ) -> Loss { + // Put the narrower number on the `a` for less loops below. + if a.len() > b.len() { + mem::swap(&mut a, &mut b); + } + + for x in &mut dst[..b.len()] { + *x = 0; + } + + for i in 0..a.len() { + let mut carry = 0; + for j in 0..b.len() { + let [low, mut high] = widening_mul(a[i], b[j]); + + // Now add carry. + let (low, overflow) = low.overflowing_add(carry); + high += overflow as Limb; + + // And now `dst[i + j]`, and store the new low part there. + let (low, overflow) = low.overflowing_add(dst[i + j]); + high += overflow as Limb; + + dst[i + j] = low; + carry = high; + } + dst[i + b.len()] = carry; + } + + // Assume the operands involved in the multiplication are single-precision + // FP, and the two multiplicants are: + // a = a23 . a22 ... a0 * 2^e1 + // b = b23 . b22 ... b0 * 2^e2 + // the result of multiplication is: + // dst = c48 c47 c46 . c45 ... c0 * 2^(e1+e2) + // Note that there are three significant bits at the left-hand side of the + // radix point: two for the multiplication, and an overflow bit for the + // addition (that will always be zero at this point). Move the radix point + // toward left by two bits, and adjust exponent accordingly. + *exp += 2; + + // Convert the result having "2 * precision" significant-bits back to the one + // having "precision" significant-bits. First, move the radix point from + // poision "2*precision - 1" to "precision - 1". The exponent need to be + // adjusted by "2*precision - 1" - "precision - 1" = "precision". + *exp -= precision as ExpInt + 1; + + // In case MSB resides at the left-hand side of radix point, shift the + // mantissa right by some amount to make sure the MSB reside right before + // the radix point (i.e., "MSB . rest-significant-bits"). + // + // Note that the result is not normalized when "omsb < precision". So, the + // caller needs to call IeeeFloat::normalize() if normalized value is + // expected. + let omsb = omsb(dst); + if omsb <= precision { Loss::ExactlyZero } else { shift_right(dst, exp, omsb - precision) } + } + + /// `quotient = dividend / divisor`. Returns the lost fraction. + /// Does not preserve `dividend` or `divisor`. + pub(super) fn div( + quotient: &mut [Limb], + exp: &mut ExpInt, + dividend: &mut [Limb], + divisor: &mut [Limb], + precision: usize, + ) -> Loss { + // Normalize the divisor. + let bits = precision - omsb(divisor); + shift_left(divisor, &mut 0, bits); + *exp += bits as ExpInt; + + // Normalize the dividend. + let bits = precision - omsb(dividend); + shift_left(dividend, exp, bits); + + // Division by 1. + let olsb_divisor = olsb(divisor); + if olsb_divisor == precision { + quotient.copy_from_slice(dividend); + return Loss::ExactlyZero; + } + + // Ensure the dividend >= divisor initially for the loop below. + // Incidentally, this means that the division loop below is + // guaranteed to set the integer bit to one. + if cmp(dividend, divisor) == Ordering::Less { + shift_left(dividend, exp, 1); + assert_ne!(cmp(dividend, divisor), Ordering::Less) + } + + // Helper for figuring out the lost fraction. + let lost_fraction = |dividend: &[Limb], divisor: &[Limb]| match cmp(dividend, divisor) { + Ordering::Greater => Loss::MoreThanHalf, + Ordering::Equal => Loss::ExactlyHalf, + Ordering::Less => { + if is_all_zeros(dividend) { + Loss::ExactlyZero + } else { + Loss::LessThanHalf + } + } + }; + + // Try to perform a (much faster) short division for small divisors. + let divisor_bits = precision - (olsb_divisor - 1); + macro_rules! try_short_div { + ($W:ty, $H:ty, $half:expr) => { + if divisor_bits * 2 <= $half { + // Extract the small divisor. + let _: Loss = shift_right(divisor, &mut 0, olsb_divisor - 1); + let divisor = divisor[0] as $H as $W; + + // Shift the dividend to produce a quotient with the unit bit set. + let top_limb = *dividend.last().unwrap(); + let mut rem = (top_limb >> (LIMB_BITS - (divisor_bits - 1))) as $H; + shift_left(dividend, &mut 0, divisor_bits - 1); + + // Apply short division in place on $H (of $half bits) chunks. + each_chunk(dividend, $half, |chunk| { + let chunk = chunk as $H; + let combined = ((rem as $W) << $half) | (chunk as $W); + rem = (combined % divisor) as $H; + (combined / divisor) as $H as Limb + }); + quotient.copy_from_slice(dividend); + + return lost_fraction(&[(rem as Limb) << 1], &[divisor as Limb]); + } + }; + } + + try_short_div!(u32, u16, 16); + try_short_div!(u64, u32, 32); + try_short_div!(u128, u64, 64); + + // Zero the quotient before setting bits in it. + for x in &mut quotient[..limbs_for_bits(precision)] { + *x = 0; + } + + // Long division. + for bit in (0..precision).rev() { + if cmp(dividend, divisor) != Ordering::Less { + sub(dividend, divisor, 0); + set_bit(quotient, bit); + } + shift_left(dividend, &mut 0, 1); + } + + lost_fraction(dividend, divisor) + } +} diff --git a/compiler/rustc_apfloat/src/lib.rs b/compiler/rustc_apfloat/src/lib.rs new file mode 100644 index 000000000..cfc3d5b15 --- /dev/null +++ b/compiler/rustc_apfloat/src/lib.rs @@ -0,0 +1,693 @@ +//! Port of LLVM's APFloat software floating-point implementation from the +//! following C++ sources (please update commit hash when backporting): +//! <https://github.com/llvm-mirror/llvm/tree/23efab2bbd424ed13495a420ad8641cb2c6c28f9> +//! +//! * `include/llvm/ADT/APFloat.h` -> `Float` and `FloatConvert` traits +//! * `lib/Support/APFloat.cpp` -> `ieee` and `ppc` modules +//! * `unittests/ADT/APFloatTest.cpp` -> `tests` directory +//! +//! The port contains no unsafe code, global state, or side-effects in general, +//! and the only allocations are in the conversion to/from decimal strings. +//! +//! Most of the API and the testcases are intact in some form or another, +//! with some ergonomic changes, such as idiomatic short names, returning +//! new values instead of mutating the receiver, and having separate method +//! variants that take a non-default rounding mode (with the suffix `_r`). +//! Comments have been preserved where possible, only slightly adapted. +//! +//! Instead of keeping a pointer to a configuration struct and inspecting it +//! dynamically on every operation, types (e.g., `ieee::Double`), traits +//! (e.g., `ieee::Semantics`) and associated constants are employed for +//! increased type safety and performance. +//! +//! On-heap bigints are replaced everywhere (except in decimal conversion), +//! with short arrays of `type Limb = u128` elements (instead of `u64`), +//! This allows fitting the largest supported significands in one integer +//! (`ieee::Quad` and `ppc::Fallback` use slightly less than 128 bits). +//! All of the functions in the `ieee::sig` module operate on slices. +//! +//! # Note +//! +//! This API is completely unstable and subject to change. + +#![doc(html_root_url = "https://doc.rust-lang.org/nightly/nightly-rustc/")] +#![no_std] +#![forbid(unsafe_code)] + +#[macro_use] +extern crate alloc; + +use core::cmp::Ordering; +use core::fmt; +use core::ops::{Add, Div, Mul, Neg, Rem, Sub}; +use core::ops::{AddAssign, DivAssign, MulAssign, RemAssign, SubAssign}; +use core::str::FromStr; + +bitflags::bitflags! { + /// IEEE-754R 7: Default exception handling. + /// + /// UNDERFLOW or OVERFLOW are always returned or-ed with INEXACT. + #[must_use] + pub struct Status: u8 { + const OK = 0x00; + const INVALID_OP = 0x01; + const DIV_BY_ZERO = 0x02; + const OVERFLOW = 0x04; + const UNDERFLOW = 0x08; + const INEXACT = 0x10; + } +} + +#[must_use] +#[derive(Copy, Clone, PartialEq, Eq, PartialOrd, Ord, Debug)] +pub struct StatusAnd<T> { + pub status: Status, + pub value: T, +} + +impl Status { + pub fn and<T>(self, value: T) -> StatusAnd<T> { + StatusAnd { status: self, value } + } +} + +impl<T> StatusAnd<T> { + pub fn map<F: FnOnce(T) -> U, U>(self, f: F) -> StatusAnd<U> { + StatusAnd { status: self.status, value: f(self.value) } + } +} + +#[macro_export] +macro_rules! unpack { + ($status:ident|=, $e:expr) => { + match $e { + $crate::StatusAnd { status, value } => { + $status |= status; + value + } + } + }; + ($status:ident=, $e:expr) => { + match $e { + $crate::StatusAnd { status, value } => { + $status = status; + value + } + } + }; +} + +/// Category of internally-represented number. +#[derive(Copy, Clone, PartialEq, Eq, Debug)] +pub enum Category { + Infinity, + NaN, + Normal, + Zero, +} + +/// IEEE-754R 4.3: Rounding-direction attributes. +#[derive(Copy, Clone, PartialEq, Eq, Debug)] +pub enum Round { + NearestTiesToEven, + TowardPositive, + TowardNegative, + TowardZero, + NearestTiesToAway, +} + +impl Neg for Round { + type Output = Round; + fn neg(self) -> Round { + match self { + Round::TowardPositive => Round::TowardNegative, + Round::TowardNegative => Round::TowardPositive, + Round::NearestTiesToEven | Round::TowardZero | Round::NearestTiesToAway => self, + } + } +} + +/// A signed type to represent a floating point number's unbiased exponent. +pub type ExpInt = i16; + +// \c ilogb error results. +pub const IEK_INF: ExpInt = ExpInt::MAX; +pub const IEK_NAN: ExpInt = ExpInt::MIN; +pub const IEK_ZERO: ExpInt = ExpInt::MIN + 1; + +#[derive(Copy, Clone, PartialEq, Eq, Debug)] +pub struct ParseError(pub &'static str); + +/// A self-contained host- and target-independent arbitrary-precision +/// floating-point software implementation. +/// +/// `apfloat` uses significand bignum integer arithmetic as provided by functions +/// in the `ieee::sig`. +/// +/// Written for clarity rather than speed, in particular with a view to use in +/// the front-end of a cross compiler so that target arithmetic can be correctly +/// performed on the host. Performance should nonetheless be reasonable, +/// particularly for its intended use. It may be useful as a base +/// implementation for a run-time library during development of a faster +/// target-specific one. +/// +/// All 5 rounding modes in the IEEE-754R draft are handled correctly for all +/// implemented operations. Currently implemented operations are add, subtract, +/// multiply, divide, fused-multiply-add, conversion-to-float, +/// conversion-to-integer and conversion-from-integer. New rounding modes +/// (e.g., away from zero) can be added with three or four lines of code. +/// +/// Four formats are built-in: IEEE single precision, double precision, +/// quadruple precision, and x87 80-bit extended double (when operating with +/// full extended precision). Adding a new format that obeys IEEE semantics +/// only requires adding two lines of code: a declaration and definition of the +/// format. +/// +/// All operations return the status of that operation as an exception bit-mask, +/// so multiple operations can be done consecutively with their results or-ed +/// together. The returned status can be useful for compiler diagnostics; e.g., +/// inexact, underflow and overflow can be easily diagnosed on constant folding, +/// and compiler optimizers can determine what exceptions would be raised by +/// folding operations and optimize, or perhaps not optimize, accordingly. +/// +/// At present, underflow tininess is detected after rounding; it should be +/// straight forward to add support for the before-rounding case too. +/// +/// The library reads hexadecimal floating point numbers as per C99, and +/// correctly rounds if necessary according to the specified rounding mode. +/// Syntax is required to have been validated by the caller. +/// +/// It also reads decimal floating point numbers and correctly rounds according +/// to the specified rounding mode. +/// +/// Non-zero finite numbers are represented internally as a sign bit, a 16-bit +/// signed exponent, and the significand as an array of integer limbs. After +/// normalization of a number of precision P the exponent is within the range of +/// the format, and if the number is not denormal the P-th bit of the +/// significand is set as an explicit integer bit. For denormals the most +/// significant bit is shifted right so that the exponent is maintained at the +/// format's minimum, so that the smallest denormal has just the least +/// significant bit of the significand set. The sign of zeros and infinities +/// is significant; the exponent and significand of such numbers is not stored, +/// but has a known implicit (deterministic) value: 0 for the significands, 0 +/// for zero exponent, all 1 bits for infinity exponent. For NaNs the sign and +/// significand are deterministic, although not really meaningful, and preserved +/// in non-conversion operations. The exponent is implicitly all 1 bits. +/// +/// `apfloat` does not provide any exception handling beyond default exception +/// handling. We represent Signaling NaNs via IEEE-754R 2008 6.2.1 should clause +/// by encoding Signaling NaNs with the first bit of its trailing significand +/// as 0. +/// +/// Future work +/// =========== +/// +/// Some features that may or may not be worth adding: +/// +/// Optional ability to detect underflow tininess before rounding. +/// +/// New formats: x87 in single and double precision mode (IEEE apart from +/// extended exponent range) (hard). +/// +/// New operations: sqrt, nexttoward. +/// +pub trait Float: + Copy + + Default + + FromStr<Err = ParseError> + + PartialOrd + + fmt::Display + + Neg<Output = Self> + + AddAssign + + SubAssign + + MulAssign + + DivAssign + + RemAssign + + Add<Output = StatusAnd<Self>> + + Sub<Output = StatusAnd<Self>> + + Mul<Output = StatusAnd<Self>> + + Div<Output = StatusAnd<Self>> + + Rem<Output = StatusAnd<Self>> +{ + /// Total number of bits in the in-memory format. + const BITS: usize; + + /// Number of bits in the significand. This includes the integer bit. + const PRECISION: usize; + + /// The largest E such that 2<sup>E</sup> is representable; this matches the + /// definition of IEEE 754. + const MAX_EXP: ExpInt; + + /// The smallest E such that 2<sup>E</sup> is a normalized number; this + /// matches the definition of IEEE 754. + const MIN_EXP: ExpInt; + + /// Positive Zero. + const ZERO: Self; + + /// Positive Infinity. + const INFINITY: Self; + + /// NaN (Not a Number). + // FIXME(eddyb) provide a default when qnan becomes const fn. + const NAN: Self; + + /// Factory for QNaN values. + // FIXME(eddyb) should be const fn. + fn qnan(payload: Option<u128>) -> Self; + + /// Factory for SNaN values. + // FIXME(eddyb) should be const fn. + fn snan(payload: Option<u128>) -> Self; + + /// Largest finite number. + // FIXME(eddyb) should be const (but FloatPair::largest is nontrivial). + fn largest() -> Self; + + /// Smallest (by magnitude) finite number. + /// Might be denormalized, which implies a relative loss of precision. + const SMALLEST: Self; + + /// Smallest (by magnitude) normalized finite number. + // FIXME(eddyb) should be const (but FloatPair::smallest_normalized is nontrivial). + fn smallest_normalized() -> Self; + + // Arithmetic + + fn add_r(self, rhs: Self, round: Round) -> StatusAnd<Self>; + fn sub_r(self, rhs: Self, round: Round) -> StatusAnd<Self> { + self.add_r(-rhs, round) + } + fn mul_r(self, rhs: Self, round: Round) -> StatusAnd<Self>; + fn mul_add_r(self, multiplicand: Self, addend: Self, round: Round) -> StatusAnd<Self>; + fn mul_add(self, multiplicand: Self, addend: Self) -> StatusAnd<Self> { + self.mul_add_r(multiplicand, addend, Round::NearestTiesToEven) + } + fn div_r(self, rhs: Self, round: Round) -> StatusAnd<Self>; + /// IEEE remainder. + // This is not currently correct in all cases. + fn ieee_rem(self, rhs: Self) -> StatusAnd<Self> { + let mut v = self; + + let status; + v = unpack!(status=, v / rhs); + if status == Status::DIV_BY_ZERO { + return status.and(self); + } + + assert!(Self::PRECISION < 128); + + let status; + let x = unpack!(status=, v.to_i128_r(128, Round::NearestTiesToEven, &mut false)); + if status == Status::INVALID_OP { + return status.and(self); + } + + let status; + let mut v = unpack!(status=, Self::from_i128(x)); + assert_eq!(status, Status::OK); // should always work + + let status; + v = unpack!(status=, v * rhs); + assert_eq!(status - Status::INEXACT, Status::OK); // should not overflow or underflow + + let status; + v = unpack!(status=, self - v); + assert_eq!(status - Status::INEXACT, Status::OK); // likewise + + if v.is_zero() { + status.and(v.copy_sign(self)) // IEEE754 requires this + } else { + status.and(v) + } + } + /// C fmod, or llvm frem. + fn c_fmod(self, rhs: Self) -> StatusAnd<Self>; + fn round_to_integral(self, round: Round) -> StatusAnd<Self>; + + /// IEEE-754R 2008 5.3.1: nextUp. + fn next_up(self) -> StatusAnd<Self>; + + /// IEEE-754R 2008 5.3.1: nextDown. + /// + /// *NOTE* since nextDown(x) = -nextUp(-x), we only implement nextUp with + /// appropriate sign switching before/after the computation. + fn next_down(self) -> StatusAnd<Self> { + (-self).next_up().map(|r| -r) + } + + fn abs(self) -> Self { + if self.is_negative() { -self } else { self } + } + fn copy_sign(self, rhs: Self) -> Self { + if self.is_negative() != rhs.is_negative() { -self } else { self } + } + + // Conversions + fn from_bits(input: u128) -> Self; + fn from_i128_r(input: i128, round: Round) -> StatusAnd<Self> { + if input < 0 { + Self::from_u128_r(input.wrapping_neg() as u128, -round).map(|r| -r) + } else { + Self::from_u128_r(input as u128, round) + } + } + fn from_i128(input: i128) -> StatusAnd<Self> { + Self::from_i128_r(input, Round::NearestTiesToEven) + } + fn from_u128_r(input: u128, round: Round) -> StatusAnd<Self>; + fn from_u128(input: u128) -> StatusAnd<Self> { + Self::from_u128_r(input, Round::NearestTiesToEven) + } + fn from_str_r(s: &str, round: Round) -> Result<StatusAnd<Self>, ParseError>; + fn to_bits(self) -> u128; + + /// Converts a floating point number to an integer according to the + /// rounding mode. In case of an invalid operation exception, + /// deterministic values are returned, namely zero for NaNs and the + /// minimal or maximal value respectively for underflow or overflow. + /// If the rounded value is in range but the floating point number is + /// not the exact integer, the C standard doesn't require an inexact + /// exception to be raised. IEEE-854 does require it so we do that. + /// + /// Note that for conversions to integer type the C standard requires + /// round-to-zero to always be used. + /// + /// The *is_exact output tells whether the result is exact, in the sense + /// that converting it back to the original floating point type produces + /// the original value. This is almost equivalent to `result == Status::OK`, + /// except for negative zeroes. + fn to_i128_r(self, width: usize, round: Round, is_exact: &mut bool) -> StatusAnd<i128> { + let status; + if self.is_negative() { + if self.is_zero() { + // Negative zero can't be represented as an int. + *is_exact = false; + } + let r = unpack!(status=, (-self).to_u128_r(width, -round, is_exact)); + + // Check for values that don't fit in the signed integer. + if r > (1 << (width - 1)) { + // Return the most negative integer for the given width. + *is_exact = false; + Status::INVALID_OP.and(-1 << (width - 1)) + } else { + status.and(r.wrapping_neg() as i128) + } + } else { + // Positive case is simpler, can pretend it's a smaller unsigned + // integer, and `to_u128` will take care of all the edge cases. + self.to_u128_r(width - 1, round, is_exact).map(|r| r as i128) + } + } + fn to_i128(self, width: usize) -> StatusAnd<i128> { + self.to_i128_r(width, Round::TowardZero, &mut true) + } + fn to_u128_r(self, width: usize, round: Round, is_exact: &mut bool) -> StatusAnd<u128>; + fn to_u128(self, width: usize) -> StatusAnd<u128> { + self.to_u128_r(width, Round::TowardZero, &mut true) + } + + fn cmp_abs_normal(self, rhs: Self) -> Ordering; + + /// Bitwise comparison for equality (QNaNs compare equal, 0!=-0). + fn bitwise_eq(self, rhs: Self) -> bool; + + // IEEE-754R 5.7.2 General operations. + + /// Implements IEEE minNum semantics. Returns the smaller of the 2 arguments if + /// both are not NaN. If either argument is a NaN, returns the other argument. + fn min(self, other: Self) -> Self { + if self.is_nan() { + other + } else if other.is_nan() { + self + } else if other.partial_cmp(&self) == Some(Ordering::Less) { + other + } else { + self + } + } + + /// Implements IEEE maxNum semantics. Returns the larger of the 2 arguments if + /// both are not NaN. If either argument is a NaN, returns the other argument. + fn max(self, other: Self) -> Self { + if self.is_nan() { + other + } else if other.is_nan() { + self + } else if self.partial_cmp(&other) == Some(Ordering::Less) { + other + } else { + self + } + } + + /// IEEE-754R isSignMinus: Returns whether the current value is + /// negative. + /// + /// This applies to zeros and NaNs as well. + fn is_negative(self) -> bool; + + /// IEEE-754R isNormal: Returns whether the current value is normal. + /// + /// This implies that the current value of the float is not zero, subnormal, + /// infinite, or NaN following the definition of normality from IEEE-754R. + fn is_normal(self) -> bool { + !self.is_denormal() && self.is_finite_non_zero() + } + + /// Returns `true` if the current value is zero, subnormal, or + /// normal. + /// + /// This means that the value is not infinite or NaN. + fn is_finite(self) -> bool { + !self.is_nan() && !self.is_infinite() + } + + /// Returns `true` if the float is plus or minus zero. + fn is_zero(self) -> bool { + self.category() == Category::Zero + } + + /// IEEE-754R isSubnormal(): Returns whether the float is a + /// denormal. + fn is_denormal(self) -> bool; + + /// IEEE-754R isInfinite(): Returns whether the float is infinity. + fn is_infinite(self) -> bool { + self.category() == Category::Infinity + } + + /// Returns `true` if the float is a quiet or signaling NaN. + fn is_nan(self) -> bool { + self.category() == Category::NaN + } + + /// Returns `true` if the float is a signaling NaN. + fn is_signaling(self) -> bool; + + // Simple Queries + + fn category(self) -> Category; + fn is_non_zero(self) -> bool { + !self.is_zero() + } + fn is_finite_non_zero(self) -> bool { + self.is_finite() && !self.is_zero() + } + fn is_pos_zero(self) -> bool { + self.is_zero() && !self.is_negative() + } + fn is_neg_zero(self) -> bool { + self.is_zero() && self.is_negative() + } + + /// Returns `true` if the number has the smallest possible non-zero + /// magnitude in the current semantics. + fn is_smallest(self) -> bool { + Self::SMALLEST.copy_sign(self).bitwise_eq(self) + } + + /// Returns `true` if the number has the largest possible finite + /// magnitude in the current semantics. + fn is_largest(self) -> bool { + Self::largest().copy_sign(self).bitwise_eq(self) + } + + /// Returns `true` if the number is an exact integer. + fn is_integer(self) -> bool { + // This could be made more efficient; I'm going for obviously correct. + if !self.is_finite() { + return false; + } + self.round_to_integral(Round::TowardZero).value.bitwise_eq(self) + } + + /// If this value has an exact multiplicative inverse, return it. + fn get_exact_inverse(self) -> Option<Self>; + + /// Returns the exponent of the internal representation of the Float. + /// + /// Because the radix of Float is 2, this is equivalent to floor(log2(x)). + /// For special Float values, this returns special error codes: + /// + /// NaN -> \c IEK_NAN + /// 0 -> \c IEK_ZERO + /// Inf -> \c IEK_INF + /// + fn ilogb(self) -> ExpInt; + + /// Returns: self * 2<sup>exp</sup> for integral exponents. + /// Equivalent to C standard library function `ldexp`. + fn scalbn_r(self, exp: ExpInt, round: Round) -> Self; + fn scalbn(self, exp: ExpInt) -> Self { + self.scalbn_r(exp, Round::NearestTiesToEven) + } + + /// Equivalent to C standard library function with the same name. + /// + /// While the C standard says exp is an unspecified value for infinity and nan, + /// this returns INT_MAX for infinities, and INT_MIN for NaNs (see `ilogb`). + fn frexp_r(self, exp: &mut ExpInt, round: Round) -> Self; + fn frexp(self, exp: &mut ExpInt) -> Self { + self.frexp_r(exp, Round::NearestTiesToEven) + } +} + +pub trait FloatConvert<T: Float>: Float { + /// Converts a value of one floating point type to another. + /// The return value corresponds to the IEEE754 exceptions. *loses_info + /// records whether the transformation lost information, i.e., whether + /// converting the result back to the original type will produce the + /// original value (this is almost the same as return `value == Status::OK`, + /// but there are edge cases where this is not so). + fn convert_r(self, round: Round, loses_info: &mut bool) -> StatusAnd<T>; + fn convert(self, loses_info: &mut bool) -> StatusAnd<T> { + self.convert_r(Round::NearestTiesToEven, loses_info) + } +} + +macro_rules! float_common_impls { + ($ty:ident<$t:tt>) => { + impl<$t> Default for $ty<$t> + where + Self: Float, + { + fn default() -> Self { + Self::ZERO + } + } + + impl<$t> ::core::str::FromStr for $ty<$t> + where + Self: Float, + { + type Err = ParseError; + fn from_str(s: &str) -> Result<Self, ParseError> { + Self::from_str_r(s, Round::NearestTiesToEven).map(|x| x.value) + } + } + + // Rounding ties to the nearest even, by default. + + impl<$t> ::core::ops::Add for $ty<$t> + where + Self: Float, + { + type Output = StatusAnd<Self>; + fn add(self, rhs: Self) -> StatusAnd<Self> { + self.add_r(rhs, Round::NearestTiesToEven) + } + } + + impl<$t> ::core::ops::Sub for $ty<$t> + where + Self: Float, + { + type Output = StatusAnd<Self>; + fn sub(self, rhs: Self) -> StatusAnd<Self> { + self.sub_r(rhs, Round::NearestTiesToEven) + } + } + + impl<$t> ::core::ops::Mul for $ty<$t> + where + Self: Float, + { + type Output = StatusAnd<Self>; + fn mul(self, rhs: Self) -> StatusAnd<Self> { + self.mul_r(rhs, Round::NearestTiesToEven) + } + } + + impl<$t> ::core::ops::Div for $ty<$t> + where + Self: Float, + { + type Output = StatusAnd<Self>; + fn div(self, rhs: Self) -> StatusAnd<Self> { + self.div_r(rhs, Round::NearestTiesToEven) + } + } + + impl<$t> ::core::ops::Rem for $ty<$t> + where + Self: Float, + { + type Output = StatusAnd<Self>; + fn rem(self, rhs: Self) -> StatusAnd<Self> { + self.c_fmod(rhs) + } + } + + impl<$t> ::core::ops::AddAssign for $ty<$t> + where + Self: Float, + { + fn add_assign(&mut self, rhs: Self) { + *self = (*self + rhs).value; + } + } + + impl<$t> ::core::ops::SubAssign for $ty<$t> + where + Self: Float, + { + fn sub_assign(&mut self, rhs: Self) { + *self = (*self - rhs).value; + } + } + + impl<$t> ::core::ops::MulAssign for $ty<$t> + where + Self: Float, + { + fn mul_assign(&mut self, rhs: Self) { + *self = (*self * rhs).value; + } + } + + impl<$t> ::core::ops::DivAssign for $ty<$t> + where + Self: Float, + { + fn div_assign(&mut self, rhs: Self) { + *self = (*self / rhs).value; + } + } + + impl<$t> ::core::ops::RemAssign for $ty<$t> + where + Self: Float, + { + fn rem_assign(&mut self, rhs: Self) { + *self = (*self % rhs).value; + } + } + }; +} + +pub mod ieee; +pub mod ppc; diff --git a/compiler/rustc_apfloat/src/ppc.rs b/compiler/rustc_apfloat/src/ppc.rs new file mode 100644 index 000000000..65a0f6664 --- /dev/null +++ b/compiler/rustc_apfloat/src/ppc.rs @@ -0,0 +1,434 @@ +use crate::ieee; +use crate::{Category, ExpInt, Float, FloatConvert, ParseError, Round, Status, StatusAnd}; + +use core::cmp::Ordering; +use core::fmt; +use core::ops::Neg; + +#[must_use] +#[derive(Copy, Clone, PartialEq, PartialOrd, Debug)] +pub struct DoubleFloat<F>(F, F); +pub type DoubleDouble = DoubleFloat<ieee::Double>; + +// These are legacy semantics for the Fallback, inaccurate implementation of +// IBM double-double, if the accurate DoubleDouble doesn't handle the +// operation. It's equivalent to having an IEEE number with consecutive 106 +// bits of mantissa and 11 bits of exponent. +// +// It's not equivalent to IBM double-double. For example, a legit IBM +// double-double, 1 + epsilon: +// +// 1 + epsilon = 1 + (1 >> 1076) +// +// is not representable by a consecutive 106 bits of mantissa. +// +// Currently, these semantics are used in the following way: +// +// DoubleDouble -> (Double, Double) -> +// DoubleDouble's Fallback -> IEEE operations +// +// FIXME: Implement all operations in DoubleDouble, and delete these +// semantics. +// FIXME(eddyb) This shouldn't need to be `pub`, it's only used in bounds. +pub struct FallbackS<F>(#[allow(unused)] F); +type Fallback<F> = ieee::IeeeFloat<FallbackS<F>>; +impl<F: Float> ieee::Semantics for FallbackS<F> { + // Forbid any conversion to/from bits. + const BITS: usize = 0; + const PRECISION: usize = F::PRECISION * 2; + const MAX_EXP: ExpInt = F::MAX_EXP as ExpInt; + const MIN_EXP: ExpInt = F::MIN_EXP as ExpInt + F::PRECISION as ExpInt; +} + +// Convert number to F. To avoid spurious underflows, we re- +// normalize against the F exponent range first, and only *then* +// truncate the mantissa. The result of that second conversion +// may be inexact, but should never underflow. +// FIXME(eddyb) This shouldn't need to be `pub`, it's only used in bounds. +pub struct FallbackExtendedS<F>(#[allow(unused)] F); +type FallbackExtended<F> = ieee::IeeeFloat<FallbackExtendedS<F>>; +impl<F: Float> ieee::Semantics for FallbackExtendedS<F> { + // Forbid any conversion to/from bits. + const BITS: usize = 0; + const PRECISION: usize = Fallback::<F>::PRECISION; + const MAX_EXP: ExpInt = F::MAX_EXP as ExpInt; +} + +impl<F: Float> From<Fallback<F>> for DoubleFloat<F> +where + F: FloatConvert<FallbackExtended<F>>, + FallbackExtended<F>: FloatConvert<F>, +{ + fn from(x: Fallback<F>) -> Self { + let mut status; + let mut loses_info = false; + + let extended: FallbackExtended<F> = unpack!(status=, x.convert(&mut loses_info)); + assert_eq!((status, loses_info), (Status::OK, false)); + + let a = unpack!(status=, extended.convert(&mut loses_info)); + assert_eq!(status - Status::INEXACT, Status::OK); + + // If conversion was exact or resulted in a special case, we're done; + // just set the second double to zero. Otherwise, re-convert back to + // the extended format and compute the difference. This now should + // convert exactly to double. + let b = if a.is_finite_non_zero() && loses_info { + let u: FallbackExtended<F> = unpack!(status=, a.convert(&mut loses_info)); + assert_eq!((status, loses_info), (Status::OK, false)); + let v = unpack!(status=, extended - u); + assert_eq!(status, Status::OK); + let v = unpack!(status=, v.convert(&mut loses_info)); + assert_eq!((status, loses_info), (Status::OK, false)); + v + } else { + F::ZERO + }; + + DoubleFloat(a, b) + } +} + +impl<F: FloatConvert<Self>> From<DoubleFloat<F>> for Fallback<F> { + fn from(DoubleFloat(a, b): DoubleFloat<F>) -> Self { + let mut status; + let mut loses_info = false; + + // Get the first F and convert to our format. + let a = unpack!(status=, a.convert(&mut loses_info)); + assert_eq!((status, loses_info), (Status::OK, false)); + + // Unless we have a special case, add in second F. + if a.is_finite_non_zero() { + let b = unpack!(status=, b.convert(&mut loses_info)); + assert_eq!((status, loses_info), (Status::OK, false)); + + (a + b).value + } else { + a + } + } +} + +float_common_impls!(DoubleFloat<F>); + +impl<F: Float> Neg for DoubleFloat<F> { + type Output = Self; + fn neg(self) -> Self { + if self.1.is_finite_non_zero() { + DoubleFloat(-self.0, -self.1) + } else { + DoubleFloat(-self.0, self.1) + } + } +} + +impl<F: FloatConvert<Fallback<F>>> fmt::Display for DoubleFloat<F> { + fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { + fmt::Display::fmt(&Fallback::from(*self), f) + } +} + +impl<F: FloatConvert<Fallback<F>>> Float for DoubleFloat<F> +where + Self: From<Fallback<F>>, +{ + const BITS: usize = F::BITS * 2; + const PRECISION: usize = Fallback::<F>::PRECISION; + const MAX_EXP: ExpInt = Fallback::<F>::MAX_EXP; + const MIN_EXP: ExpInt = Fallback::<F>::MIN_EXP; + + const ZERO: Self = DoubleFloat(F::ZERO, F::ZERO); + + const INFINITY: Self = DoubleFloat(F::INFINITY, F::ZERO); + + // FIXME(eddyb) remove when qnan becomes const fn. + const NAN: Self = DoubleFloat(F::NAN, F::ZERO); + + fn qnan(payload: Option<u128>) -> Self { + DoubleFloat(F::qnan(payload), F::ZERO) + } + + fn snan(payload: Option<u128>) -> Self { + DoubleFloat(F::snan(payload), F::ZERO) + } + + fn largest() -> Self { + let status; + let mut r = DoubleFloat(F::largest(), F::largest()); + r.1 = r.1.scalbn(-(F::PRECISION as ExpInt + 1)); + r.1 = unpack!(status=, r.1.next_down()); + assert_eq!(status, Status::OK); + r + } + + const SMALLEST: Self = DoubleFloat(F::SMALLEST, F::ZERO); + + fn smallest_normalized() -> Self { + DoubleFloat(F::smallest_normalized().scalbn(F::PRECISION as ExpInt), F::ZERO) + } + + // Implement addition, subtraction, multiplication and division based on: + // "Software for Doubled-Precision Floating-Point Computations", + // by Seppo Linnainmaa, ACM TOMS vol 7 no 3, September 1981, pages 272-283. + + fn add_r(mut self, rhs: Self, round: Round) -> StatusAnd<Self> { + match (self.category(), rhs.category()) { + (Category::Infinity, Category::Infinity) => { + if self.is_negative() != rhs.is_negative() { + Status::INVALID_OP.and(Self::NAN.copy_sign(self)) + } else { + Status::OK.and(self) + } + } + + (_, Category::Zero) | (Category::NaN, _) | (Category::Infinity, Category::Normal) => { + Status::OK.and(self) + } + + (Category::Zero, _) | (_, Category::NaN | Category::Infinity) => Status::OK.and(rhs), + + (Category::Normal, Category::Normal) => { + let mut status = Status::OK; + let (a, aa, c, cc) = (self.0, self.1, rhs.0, rhs.1); + let mut z = a; + z = unpack!(status|=, z.add_r(c, round)); + if !z.is_finite() { + if !z.is_infinite() { + return status.and(DoubleFloat(z, F::ZERO)); + } + status = Status::OK; + let a_cmp_c = a.cmp_abs_normal(c); + z = cc; + z = unpack!(status|=, z.add_r(aa, round)); + if a_cmp_c == Ordering::Greater { + // z = cc + aa + c + a; + z = unpack!(status|=, z.add_r(c, round)); + z = unpack!(status|=, z.add_r(a, round)); + } else { + // z = cc + aa + a + c; + z = unpack!(status|=, z.add_r(a, round)); + z = unpack!(status|=, z.add_r(c, round)); + } + if !z.is_finite() { + return status.and(DoubleFloat(z, F::ZERO)); + } + self.0 = z; + let mut zz = aa; + zz = unpack!(status|=, zz.add_r(cc, round)); + if a_cmp_c == Ordering::Greater { + // self.1 = a - z + c + zz; + self.1 = a; + self.1 = unpack!(status|=, self.1.sub_r(z, round)); + self.1 = unpack!(status|=, self.1.add_r(c, round)); + self.1 = unpack!(status|=, self.1.add_r(zz, round)); + } else { + // self.1 = c - z + a + zz; + self.1 = c; + self.1 = unpack!(status|=, self.1.sub_r(z, round)); + self.1 = unpack!(status|=, self.1.add_r(a, round)); + self.1 = unpack!(status|=, self.1.add_r(zz, round)); + } + } else { + // q = a - z; + let mut q = a; + q = unpack!(status|=, q.sub_r(z, round)); + + // zz = q + c + (a - (q + z)) + aa + cc; + // Compute a - (q + z) as -((q + z) - a) to avoid temporary copies. + let mut zz = q; + zz = unpack!(status|=, zz.add_r(c, round)); + q = unpack!(status|=, q.add_r(z, round)); + q = unpack!(status|=, q.sub_r(a, round)); + q = -q; + zz = unpack!(status|=, zz.add_r(q, round)); + zz = unpack!(status|=, zz.add_r(aa, round)); + zz = unpack!(status|=, zz.add_r(cc, round)); + if zz.is_zero() && !zz.is_negative() { + return Status::OK.and(DoubleFloat(z, F::ZERO)); + } + self.0 = z; + self.0 = unpack!(status|=, self.0.add_r(zz, round)); + if !self.0.is_finite() { + self.1 = F::ZERO; + return status.and(self); + } + self.1 = z; + self.1 = unpack!(status|=, self.1.sub_r(self.0, round)); + self.1 = unpack!(status|=, self.1.add_r(zz, round)); + } + status.and(self) + } + } + } + + fn mul_r(mut self, rhs: Self, round: Round) -> StatusAnd<Self> { + // Interesting observation: For special categories, finding the lowest + // common ancestor of the following layered graph gives the correct + // return category: + // + // NaN + // / \ + // Zero Inf + // \ / + // Normal + // + // e.g., NaN * NaN = NaN + // Zero * Inf = NaN + // Normal * Zero = Zero + // Normal * Inf = Inf + match (self.category(), rhs.category()) { + (Category::NaN, _) => Status::OK.and(self), + + (_, Category::NaN) => Status::OK.and(rhs), + + (Category::Zero, Category::Infinity) | (Category::Infinity, Category::Zero) => { + Status::OK.and(Self::NAN) + } + + (Category::Zero | Category::Infinity, _) => Status::OK.and(self), + + (_, Category::Zero | Category::Infinity) => Status::OK.and(rhs), + + (Category::Normal, Category::Normal) => { + let mut status = Status::OK; + let (a, b, c, d) = (self.0, self.1, rhs.0, rhs.1); + // t = a * c + let mut t = a; + t = unpack!(status|=, t.mul_r(c, round)); + if !t.is_finite_non_zero() { + return status.and(DoubleFloat(t, F::ZERO)); + } + + // tau = fmsub(a, c, t), that is -fmadd(-a, c, t). + let mut tau = a; + tau = unpack!(status|=, tau.mul_add_r(c, -t, round)); + // v = a * d + let mut v = a; + v = unpack!(status|=, v.mul_r(d, round)); + // w = b * c + let mut w = b; + w = unpack!(status|=, w.mul_r(c, round)); + v = unpack!(status|=, v.add_r(w, round)); + // tau += v + w + tau = unpack!(status|=, tau.add_r(v, round)); + // u = t + tau + let mut u = t; + u = unpack!(status|=, u.add_r(tau, round)); + + self.0 = u; + if !u.is_finite() { + self.1 = F::ZERO; + } else { + // self.1 = (t - u) + tau + t = unpack!(status|=, t.sub_r(u, round)); + t = unpack!(status|=, t.add_r(tau, round)); + self.1 = t; + } + status.and(self) + } + } + } + + fn mul_add_r(self, multiplicand: Self, addend: Self, round: Round) -> StatusAnd<Self> { + Fallback::from(self) + .mul_add_r(Fallback::from(multiplicand), Fallback::from(addend), round) + .map(Self::from) + } + + fn div_r(self, rhs: Self, round: Round) -> StatusAnd<Self> { + Fallback::from(self).div_r(Fallback::from(rhs), round).map(Self::from) + } + + fn c_fmod(self, rhs: Self) -> StatusAnd<Self> { + Fallback::from(self).c_fmod(Fallback::from(rhs)).map(Self::from) + } + + fn round_to_integral(self, round: Round) -> StatusAnd<Self> { + Fallback::from(self).round_to_integral(round).map(Self::from) + } + + fn next_up(self) -> StatusAnd<Self> { + Fallback::from(self).next_up().map(Self::from) + } + + fn from_bits(input: u128) -> Self { + let (a, b) = (input, input >> F::BITS); + DoubleFloat(F::from_bits(a & ((1 << F::BITS) - 1)), F::from_bits(b & ((1 << F::BITS) - 1))) + } + + fn from_u128_r(input: u128, round: Round) -> StatusAnd<Self> { + Fallback::from_u128_r(input, round).map(Self::from) + } + + fn from_str_r(s: &str, round: Round) -> Result<StatusAnd<Self>, ParseError> { + Fallback::from_str_r(s, round).map(|r| r.map(Self::from)) + } + + fn to_bits(self) -> u128 { + self.0.to_bits() | (self.1.to_bits() << F::BITS) + } + + fn to_u128_r(self, width: usize, round: Round, is_exact: &mut bool) -> StatusAnd<u128> { + Fallback::from(self).to_u128_r(width, round, is_exact) + } + + fn cmp_abs_normal(self, rhs: Self) -> Ordering { + self.0.cmp_abs_normal(rhs.0).then_with(|| { + let result = self.1.cmp_abs_normal(rhs.1); + if result != Ordering::Equal { + let against = self.0.is_negative() ^ self.1.is_negative(); + let rhs_against = rhs.0.is_negative() ^ rhs.1.is_negative(); + (!against) + .cmp(&!rhs_against) + .then_with(|| if against { result.reverse() } else { result }) + } else { + result + } + }) + } + + fn bitwise_eq(self, rhs: Self) -> bool { + self.0.bitwise_eq(rhs.0) && self.1.bitwise_eq(rhs.1) + } + + fn is_negative(self) -> bool { + self.0.is_negative() + } + + fn is_denormal(self) -> bool { + self.category() == Category::Normal + && (self.0.is_denormal() || self.0.is_denormal() || + // (double)(Hi + Lo) == Hi defines a normal number. + !(self.0 + self.1).value.bitwise_eq(self.0)) + } + + fn is_signaling(self) -> bool { + self.0.is_signaling() + } + + fn category(self) -> Category { + self.0.category() + } + + fn get_exact_inverse(self) -> Option<Self> { + Fallback::from(self).get_exact_inverse().map(Self::from) + } + + fn ilogb(self) -> ExpInt { + self.0.ilogb() + } + + fn scalbn_r(self, exp: ExpInt, round: Round) -> Self { + DoubleFloat(self.0.scalbn_r(exp, round), self.1.scalbn_r(exp, round)) + } + + fn frexp_r(self, exp: &mut ExpInt, round: Round) -> Self { + let a = self.0.frexp_r(exp, round); + let mut b = self.1; + if self.category() == Category::Normal { + b = b.scalbn_r(-*exp, round); + } + DoubleFloat(a, b) + } +} |