From 9835e2ae736235810b4ea1c162ca5e65c547e770 Mon Sep 17 00:00:00 2001 From: Daniel Baumann Date: Sat, 18 May 2024 04:49:50 +0200 Subject: Merging upstream version 1.71.1+dfsg1. Signed-off-by: Daniel Baumann --- vendor/libm-0.1.4/src/math/fma.rs | 207 ++++++++++++++++++++++++++++++++++++++ 1 file changed, 207 insertions(+) create mode 100644 vendor/libm-0.1.4/src/math/fma.rs (limited to 'vendor/libm-0.1.4/src/math/fma.rs') diff --git a/vendor/libm-0.1.4/src/math/fma.rs b/vendor/libm-0.1.4/src/math/fma.rs new file mode 100644 index 000000000..07d90f8b7 --- /dev/null +++ b/vendor/libm-0.1.4/src/math/fma.rs @@ -0,0 +1,207 @@ +use core::{f32, f64}; + +use super::scalbn; + +const ZEROINFNAN: i32 = 0x7ff - 0x3ff - 52 - 1; + +struct Num { + m: u64, + e: i32, + sign: i32, +} + +#[inline] +fn normalize(x: f64) -> Num { + let x1p63: f64 = f64::from_bits(0x43e0000000000000); // 0x1p63 === 2 ^ 63 + + let mut ix: u64 = x.to_bits(); + let mut e: i32 = (ix >> 52) as i32; + let sign: i32 = e & 0x800; + e &= 0x7ff; + if e == 0 { + ix = (x * x1p63).to_bits(); + e = (ix >> 52) as i32 & 0x7ff; + e = if e != 0 { e - 63 } else { 0x800 }; + } + ix &= (1 << 52) - 1; + ix |= 1 << 52; + ix <<= 1; + e -= 0x3ff + 52 + 1; + Num { m: ix, e, sign } +} + +#[inline] +fn mul(x: u64, y: u64) -> (u64, u64) { + let t1: u64; + let t2: u64; + let t3: u64; + let xlo: u64 = x as u32 as u64; + let xhi: u64 = x >> 32; + let ylo: u64 = y as u32 as u64; + let yhi: u64 = y >> 32; + + t1 = xlo * ylo; + t2 = xlo * yhi + xhi * ylo; + t3 = xhi * yhi; + let lo = t1.wrapping_add(t2 << 32); + let hi = t3 + (t2 >> 32) + (t1 > lo) as u64; + (hi, lo) +} + +/// Floating multiply add (f64) +/// +/// Computes `(x*y)+z`, rounded as one ternary operation: +/// Computes the value (as if) to infinite precision and rounds once to the result format, +/// according to the rounding mode characterized by the value of FLT_ROUNDS. +#[inline] +#[cfg_attr(all(test, assert_no_panic), no_panic::no_panic)] +pub fn fma(x: f64, y: f64, z: f64) -> f64 { + let x1p63: f64 = f64::from_bits(0x43e0000000000000); // 0x1p63 === 2 ^ 63 + let x0_ffffff8p_63 = f64::from_bits(0x3bfffffff0000000); // 0x0.ffffff8p-63 + + /* normalize so top 10bits and last bit are 0 */ + let nx = normalize(x); + let ny = normalize(y); + let nz = normalize(z); + + if nx.e >= ZEROINFNAN || ny.e >= ZEROINFNAN { + return x * y + z; + } + if nz.e >= ZEROINFNAN { + if nz.e > ZEROINFNAN { + /* z==0 */ + return x * y + z; + } + return z; + } + + /* mul: r = x*y */ + let zhi: u64; + let zlo: u64; + let (mut rhi, mut rlo) = mul(nx.m, ny.m); + /* either top 20 or 21 bits of rhi and last 2 bits of rlo are 0 */ + + /* align exponents */ + let mut e: i32 = nx.e + ny.e; + let mut d: i32 = nz.e - e; + /* shift bits z<<=kz, r>>=kr, so kz+kr == d, set e = e+kr (== ez-kz) */ + if d > 0 { + if d < 64 { + zlo = nz.m << d; + zhi = nz.m >> (64 - d); + } else { + zlo = 0; + zhi = nz.m; + e = nz.e - 64; + d -= 64; + if d == 0 { + } else if d < 64 { + rlo = rhi << (64 - d) | rlo >> d | ((rlo << (64 - d)) != 0) as u64; + rhi = rhi >> d; + } else { + rlo = 1; + rhi = 0; + } + } + } else { + zhi = 0; + d = -d; + if d == 0 { + zlo = nz.m; + } else if d < 64 { + zlo = nz.m >> d | ((nz.m << (64 - d)) != 0) as u64; + } else { + zlo = 1; + } + } + + /* add */ + let mut sign: i32 = nx.sign ^ ny.sign; + let samesign: bool = (sign ^ nz.sign) == 0; + let mut nonzero: i32 = 1; + if samesign { + /* r += z */ + rlo = rlo.wrapping_add(zlo); + rhi += zhi + (rlo < zlo) as u64; + } else { + /* r -= z */ + let t = rlo; + rlo -= zlo; + rhi = rhi - zhi - (t < rlo) as u64; + if (rhi >> 63) != 0 { + rlo = (-(rlo as i64)) as u64; + rhi = (-(rhi as i64)) as u64 - (rlo != 0) as u64; + sign = (sign == 0) as i32; + } + nonzero = (rhi != 0) as i32; + } + + /* set rhi to top 63bit of the result (last bit is sticky) */ + if nonzero != 0 { + e += 64; + d = rhi.leading_zeros() as i32 - 1; + /* note: d > 0 */ + rhi = rhi << d | rlo >> (64 - d) | ((rlo << d) != 0) as u64; + } else if rlo != 0 { + d = rlo.leading_zeros() as i32 - 1; + if d < 0 { + rhi = rlo >> 1 | (rlo & 1); + } else { + rhi = rlo << d; + } + } else { + /* exact +-0 */ + return x * y + z; + } + e -= d; + + /* convert to double */ + let mut i: i64 = rhi as i64; /* i is in [1<<62,(1<<63)-1] */ + if sign != 0 { + i = -i; + } + let mut r: f64 = i as f64; /* |r| is in [0x1p62,0x1p63] */ + + if e < -1022 - 62 { + /* result is subnormal before rounding */ + if e == -1022 - 63 { + let mut c: f64 = x1p63; + if sign != 0 { + c = -c; + } + if r == c { + /* min normal after rounding, underflow depends + on arch behaviour which can be imitated by + a double to float conversion */ + let fltmin: f32 = (x0_ffffff8p_63 * f32::MIN_POSITIVE as f64 * r) as f32; + return f64::MIN_POSITIVE / f32::MIN_POSITIVE as f64 * fltmin as f64; + } + /* one bit is lost when scaled, add another top bit to + only round once at conversion if it is inexact */ + if (rhi << 53) != 0 { + i = (rhi >> 1 | (rhi & 1) | 1 << 62) as i64; + if sign != 0 { + i = -i; + } + r = i as f64; + r = 2. * r - c; /* remove top bit */ + + /* raise underflow portably, such that it + cannot be optimized away */ + { + let tiny: f64 = f64::MIN_POSITIVE / f32::MIN_POSITIVE as f64 * r; + r += (tiny * tiny) * (r - r); + } + } + } else { + /* only round once when scaled */ + d = 10; + i = ((rhi >> d | ((rhi << (64 - d)) != 0) as u64) << d) as i64; + if sign != 0 { + i = -i; + } + r = i as f64; + } + } + scalbn(r, e) +} -- cgit v1.2.3