diff options
Diffstat (limited to 'arch/mips/math-emu/sp_maddf.c')
-rw-r--r-- | arch/mips/math-emu/sp_maddf.c | 278 |
1 files changed, 278 insertions, 0 deletions
diff --git a/arch/mips/math-emu/sp_maddf.c b/arch/mips/math-emu/sp_maddf.c new file mode 100644 index 000000000..473ee222d --- /dev/null +++ b/arch/mips/math-emu/sp_maddf.c @@ -0,0 +1,278 @@ +// SPDX-License-Identifier: GPL-2.0-only +/* + * IEEE754 floating point arithmetic + * single precision: MADDF.f (Fused Multiply Add) + * MADDF.fmt: FPR[fd] = FPR[fd] + (FPR[fs] x FPR[ft]) + * + * MIPS floating point support + * Copyright (C) 2015 Imagination Technologies, Ltd. + * Author: Markos Chandras <markos.chandras@imgtec.com> + */ + +#include "ieee754sp.h" + + +static union ieee754sp _sp_maddf(union ieee754sp z, union ieee754sp x, + union ieee754sp y, enum maddf_flags flags) +{ + int re; + int rs; + unsigned int rm; + u64 rm64; + u64 zm64; + int s; + + COMPXSP; + COMPYSP; + COMPZSP; + + EXPLODEXSP; + EXPLODEYSP; + EXPLODEZSP; + + FLUSHXSP; + FLUSHYSP; + FLUSHZSP; + + ieee754_clearcx(); + + rs = xs ^ ys; + if (flags & MADDF_NEGATE_PRODUCT) + rs ^= 1; + if (flags & MADDF_NEGATE_ADDITION) + zs ^= 1; + + /* + * Handle the cases when at least one of x, y or z is a NaN. + * Order of precedence is sNaN, qNaN and z, x, y. + */ + if (zc == IEEE754_CLASS_SNAN) + return ieee754sp_nanxcpt(z); + if (xc == IEEE754_CLASS_SNAN) + return ieee754sp_nanxcpt(x); + if (yc == IEEE754_CLASS_SNAN) + return ieee754sp_nanxcpt(y); + if (zc == IEEE754_CLASS_QNAN) + return z; + if (xc == IEEE754_CLASS_QNAN) + return x; + if (yc == IEEE754_CLASS_QNAN) + return y; + + if (zc == IEEE754_CLASS_DNORM) + SPDNORMZ; + /* ZERO z cases are handled separately below */ + + switch (CLPAIR(xc, yc)) { + + + /* + * Infinity handling + */ + case CLPAIR(IEEE754_CLASS_INF, IEEE754_CLASS_ZERO): + case CLPAIR(IEEE754_CLASS_ZERO, IEEE754_CLASS_INF): + ieee754_setcx(IEEE754_INVALID_OPERATION); + return ieee754sp_indef(); + + case CLPAIR(IEEE754_CLASS_NORM, IEEE754_CLASS_INF): + case CLPAIR(IEEE754_CLASS_DNORM, IEEE754_CLASS_INF): + case CLPAIR(IEEE754_CLASS_INF, IEEE754_CLASS_NORM): + case CLPAIR(IEEE754_CLASS_INF, IEEE754_CLASS_DNORM): + case CLPAIR(IEEE754_CLASS_INF, IEEE754_CLASS_INF): + if ((zc == IEEE754_CLASS_INF) && (zs != rs)) { + /* + * Cases of addition of infinities with opposite signs + * or subtraction of infinities with same signs. + */ + ieee754_setcx(IEEE754_INVALID_OPERATION); + return ieee754sp_indef(); + } + /* + * z is here either not an infinity, or an infinity having the + * same sign as product (x*y). The result must be an infinity, + * and its sign is determined only by the sign of product (x*y). + */ + return ieee754sp_inf(rs); + + case CLPAIR(IEEE754_CLASS_ZERO, IEEE754_CLASS_ZERO): + case CLPAIR(IEEE754_CLASS_ZERO, IEEE754_CLASS_NORM): + case CLPAIR(IEEE754_CLASS_ZERO, IEEE754_CLASS_DNORM): + case CLPAIR(IEEE754_CLASS_NORM, IEEE754_CLASS_ZERO): + case CLPAIR(IEEE754_CLASS_DNORM, IEEE754_CLASS_ZERO): + if (zc == IEEE754_CLASS_INF) + return ieee754sp_inf(zs); + if (zc == IEEE754_CLASS_ZERO) { + /* Handle cases +0 + (-0) and similar ones. */ + if (zs == rs) + /* + * Cases of addition of zeros of equal signs + * or subtraction of zeroes of opposite signs. + * The sign of the resulting zero is in any + * such case determined only by the sign of z. + */ + return z; + + return ieee754sp_zero(ieee754_csr.rm == FPU_CSR_RD); + } + /* x*y is here 0, and z is not 0, so just return z */ + return z; + + case CLPAIR(IEEE754_CLASS_DNORM, IEEE754_CLASS_DNORM): + SPDNORMX; + fallthrough; + case CLPAIR(IEEE754_CLASS_NORM, IEEE754_CLASS_DNORM): + if (zc == IEEE754_CLASS_INF) + return ieee754sp_inf(zs); + SPDNORMY; + break; + + case CLPAIR(IEEE754_CLASS_DNORM, IEEE754_CLASS_NORM): + if (zc == IEEE754_CLASS_INF) + return ieee754sp_inf(zs); + SPDNORMX; + break; + + case CLPAIR(IEEE754_CLASS_NORM, IEEE754_CLASS_NORM): + if (zc == IEEE754_CLASS_INF) + return ieee754sp_inf(zs); + /* continue to real computations */ + } + + /* Finally get to do some computation */ + + /* + * Do the multiplication bit first + * + * rm = xm * ym, re = xe + ye basically + * + * At this point xm and ym should have been normalized. + */ + + /* rm = xm * ym, re = xe+ye basically */ + assert(xm & SP_HIDDEN_BIT); + assert(ym & SP_HIDDEN_BIT); + + re = xe + ye; + + /* Multiple 24 bit xm and ym to give 48 bit results */ + rm64 = (uint64_t)xm * ym; + + /* Shunt to top of word */ + rm64 = rm64 << 16; + + /* Put explicit bit at bit 62 if necessary */ + if ((int64_t) rm64 < 0) { + rm64 = rm64 >> 1; + re++; + } + + assert(rm64 & (1 << 62)); + + if (zc == IEEE754_CLASS_ZERO) { + /* + * Move explicit bit from bit 62 to bit 26 since the + * ieee754sp_format code expects the mantissa to be + * 27 bits wide (24 + 3 rounding bits). + */ + rm = XSPSRS64(rm64, (62 - 26)); + return ieee754sp_format(rs, re, rm); + } + + /* Move explicit bit from bit 23 to bit 62 */ + zm64 = (uint64_t)zm << (62 - 23); + assert(zm64 & (1 << 62)); + + /* Make the exponents the same */ + if (ze > re) { + /* + * Have to shift r fraction right to align. + */ + s = ze - re; + rm64 = XSPSRS64(rm64, s); + re += s; + } else if (re > ze) { + /* + * Have to shift z fraction right to align. + */ + s = re - ze; + zm64 = XSPSRS64(zm64, s); + ze += s; + } + assert(ze == re); + assert(ze <= SP_EMAX); + + /* Do the addition */ + if (zs == rs) { + /* + * Generate 64 bit result by adding two 63 bit numbers + * leaving result in zm64, zs and ze. + */ + zm64 = zm64 + rm64; + if ((int64_t)zm64 < 0) { /* carry out */ + zm64 = XSPSRS1(zm64); + ze++; + } + } else { + if (zm64 >= rm64) { + zm64 = zm64 - rm64; + } else { + zm64 = rm64 - zm64; + zs = rs; + } + if (zm64 == 0) + return ieee754sp_zero(ieee754_csr.rm == FPU_CSR_RD); + + /* + * Put explicit bit at bit 62 if necessary. + */ + while ((zm64 >> 62) == 0) { + zm64 <<= 1; + ze--; + } + } + + /* + * Move explicit bit from bit 62 to bit 26 since the + * ieee754sp_format code expects the mantissa to be + * 27 bits wide (24 + 3 rounding bits). + */ + zm = XSPSRS64(zm64, (62 - 26)); + + return ieee754sp_format(zs, ze, zm); +} + +union ieee754sp ieee754sp_maddf(union ieee754sp z, union ieee754sp x, + union ieee754sp y) +{ + return _sp_maddf(z, x, y, 0); +} + +union ieee754sp ieee754sp_msubf(union ieee754sp z, union ieee754sp x, + union ieee754sp y) +{ + return _sp_maddf(z, x, y, MADDF_NEGATE_PRODUCT); +} + +union ieee754sp ieee754sp_madd(union ieee754sp z, union ieee754sp x, + union ieee754sp y) +{ + return _sp_maddf(z, x, y, 0); +} + +union ieee754sp ieee754sp_msub(union ieee754sp z, union ieee754sp x, + union ieee754sp y) +{ + return _sp_maddf(z, x, y, MADDF_NEGATE_ADDITION); +} + +union ieee754sp ieee754sp_nmadd(union ieee754sp z, union ieee754sp x, + union ieee754sp y) +{ + return _sp_maddf(z, x, y, MADDF_NEGATE_PRODUCT|MADDF_NEGATE_ADDITION); +} + +union ieee754sp ieee754sp_nmsub(union ieee754sp z, union ieee754sp x, + union ieee754sp y) +{ + return _sp_maddf(z, x, y, MADDF_NEGATE_PRODUCT); +} |