diff options
Diffstat (limited to 'arch/mips/math-emu')
46 files changed, 9159 insertions, 0 deletions
diff --git a/arch/mips/math-emu/Makefile b/arch/mips/math-emu/Makefile new file mode 100644 index 000000000..81d25ff32 --- /dev/null +++ b/arch/mips/math-emu/Makefile @@ -0,0 +1,19 @@ +# SPDX-License-Identifier: GPL-2.0 +# +# Makefile for the Linux/MIPS kernel FPU emulation. +# + +obj-y += cp1emu.o ieee754dp.o ieee754sp.o ieee754.o \ + dp_div.o dp_mul.o dp_sub.o dp_add.o dp_fsp.o dp_cmp.o dp_simple.o \ + dp_tint.o dp_fint.o dp_rint.o dp_maddf.o dp_2008class.o dp_fmin.o \ + dp_fmax.o \ + sp_div.o sp_mul.o sp_sub.o sp_add.o sp_fdp.o sp_cmp.o sp_simple.o \ + sp_tint.o sp_fint.o sp_rint.o sp_maddf.o sp_2008class.o sp_fmin.o \ + sp_fmax.o \ + dsemul.o + +lib-y += ieee754d.o \ + dp_tlong.o dp_flong.o dp_sqrt.o \ + sp_tlong.o sp_flong.o sp_sqrt.o + +obj-$(CONFIG_DEBUG_FS) += me-debugfs.o diff --git a/arch/mips/math-emu/cp1emu.c b/arch/mips/math-emu/cp1emu.c new file mode 100644 index 000000000..265bc5781 --- /dev/null +++ b/arch/mips/math-emu/cp1emu.c @@ -0,0 +1,2948 @@ +// SPDX-License-Identifier: GPL-2.0-only +/* + * cp1emu.c: a MIPS coprocessor 1 (FPU) instruction emulator + * + * MIPS floating point support + * Copyright (C) 1994-2000 Algorithmics Ltd. + * + * Kevin D. Kissell, kevink@mips.com and Carsten Langgaard, carstenl@mips.com + * Copyright (C) 2000 MIPS Technologies, Inc. + * + * A complete emulator for MIPS coprocessor 1 instructions. This is + * required for #float(switch) or #float(trap), where it catches all + * COP1 instructions via the "CoProcessor Unusable" exception. + * + * More surprisingly it is also required for #float(ieee), to help out + * the hardware FPU at the boundaries of the IEEE-754 representation + * (denormalised values, infinities, underflow, etc). It is made + * quite nasty because emulation of some non-COP1 instructions is + * required, e.g. in branch delay slots. + * + * Note if you know that you won't have an FPU, then you'll get much + * better performance by compiling with -msoft-float! + */ +#include <linux/sched.h> +#include <linux/debugfs.h> +#include <linux/percpu-defs.h> +#include <linux/perf_event.h> + +#include <asm/branch.h> +#include <asm/inst.h> +#include <asm/ptrace.h> +#include <asm/signal.h> +#include <linux/uaccess.h> + +#include <asm/cpu-info.h> +#include <asm/processor.h> +#include <asm/fpu_emulator.h> +#include <asm/fpu.h> +#include <asm/mips-r2-to-r6-emul.h> + +#include "ieee754.h" + +/* Function which emulates a floating point instruction. */ + +static int fpu_emu(struct pt_regs *, struct mips_fpu_struct *, + mips_instruction); + +static int fpux_emu(struct pt_regs *, + struct mips_fpu_struct *, mips_instruction, void __user **); + +/* Control registers */ + +#define FPCREG_RID 0 /* $0 = revision id */ +#define FPCREG_FCCR 25 /* $25 = fccr */ +#define FPCREG_FEXR 26 /* $26 = fexr */ +#define FPCREG_FENR 28 /* $28 = fenr */ +#define FPCREG_CSR 31 /* $31 = csr */ + +/* convert condition code register number to csr bit */ +const unsigned int fpucondbit[8] = { + FPU_CSR_COND, + FPU_CSR_COND1, + FPU_CSR_COND2, + FPU_CSR_COND3, + FPU_CSR_COND4, + FPU_CSR_COND5, + FPU_CSR_COND6, + FPU_CSR_COND7 +}; + +/* (microMIPS) Convert certain microMIPS instructions to MIPS32 format. */ +static const int sd_format[] = {16, 17, 0, 0, 0, 0, 0, 0}; +static const int sdps_format[] = {16, 17, 22, 0, 0, 0, 0, 0}; +static const int dwl_format[] = {17, 20, 21, 0, 0, 0, 0, 0}; +static const int swl_format[] = {16, 20, 21, 0, 0, 0, 0, 0}; + +/* + * This functions translates a 32-bit microMIPS instruction + * into a 32-bit MIPS32 instruction. Returns 0 on success + * and SIGILL otherwise. + */ +static int microMIPS32_to_MIPS32(union mips_instruction *insn_ptr) +{ + union mips_instruction insn = *insn_ptr; + union mips_instruction mips32_insn = insn; + int func, fmt, op; + + switch (insn.mm_i_format.opcode) { + case mm_ldc132_op: + mips32_insn.mm_i_format.opcode = ldc1_op; + mips32_insn.mm_i_format.rt = insn.mm_i_format.rs; + mips32_insn.mm_i_format.rs = insn.mm_i_format.rt; + break; + case mm_lwc132_op: + mips32_insn.mm_i_format.opcode = lwc1_op; + mips32_insn.mm_i_format.rt = insn.mm_i_format.rs; + mips32_insn.mm_i_format.rs = insn.mm_i_format.rt; + break; + case mm_sdc132_op: + mips32_insn.mm_i_format.opcode = sdc1_op; + mips32_insn.mm_i_format.rt = insn.mm_i_format.rs; + mips32_insn.mm_i_format.rs = insn.mm_i_format.rt; + break; + case mm_swc132_op: + mips32_insn.mm_i_format.opcode = swc1_op; + mips32_insn.mm_i_format.rt = insn.mm_i_format.rs; + mips32_insn.mm_i_format.rs = insn.mm_i_format.rt; + break; + case mm_pool32i_op: + /* NOTE: offset is << by 1 if in microMIPS mode. */ + if ((insn.mm_i_format.rt == mm_bc1f_op) || + (insn.mm_i_format.rt == mm_bc1t_op)) { + mips32_insn.fb_format.opcode = cop1_op; + mips32_insn.fb_format.bc = bc_op; + mips32_insn.fb_format.flag = + (insn.mm_i_format.rt == mm_bc1t_op) ? 1 : 0; + } else + return SIGILL; + break; + case mm_pool32f_op: + switch (insn.mm_fp0_format.func) { + case mm_32f_01_op: + case mm_32f_11_op: + case mm_32f_02_op: + case mm_32f_12_op: + case mm_32f_41_op: + case mm_32f_51_op: + case mm_32f_42_op: + case mm_32f_52_op: + op = insn.mm_fp0_format.func; + if (op == mm_32f_01_op) + func = madd_s_op; + else if (op == mm_32f_11_op) + func = madd_d_op; + else if (op == mm_32f_02_op) + func = nmadd_s_op; + else if (op == mm_32f_12_op) + func = nmadd_d_op; + else if (op == mm_32f_41_op) + func = msub_s_op; + else if (op == mm_32f_51_op) + func = msub_d_op; + else if (op == mm_32f_42_op) + func = nmsub_s_op; + else + func = nmsub_d_op; + mips32_insn.fp6_format.opcode = cop1x_op; + mips32_insn.fp6_format.fr = insn.mm_fp6_format.fr; + mips32_insn.fp6_format.ft = insn.mm_fp6_format.ft; + mips32_insn.fp6_format.fs = insn.mm_fp6_format.fs; + mips32_insn.fp6_format.fd = insn.mm_fp6_format.fd; + mips32_insn.fp6_format.func = func; + break; + case mm_32f_10_op: + func = -1; /* Invalid */ + op = insn.mm_fp5_format.op & 0x7; + if (op == mm_ldxc1_op) + func = ldxc1_op; + else if (op == mm_sdxc1_op) + func = sdxc1_op; + else if (op == mm_lwxc1_op) + func = lwxc1_op; + else if (op == mm_swxc1_op) + func = swxc1_op; + + if (func != -1) { + mips32_insn.r_format.opcode = cop1x_op; + mips32_insn.r_format.rs = + insn.mm_fp5_format.base; + mips32_insn.r_format.rt = + insn.mm_fp5_format.index; + mips32_insn.r_format.rd = 0; + mips32_insn.r_format.re = insn.mm_fp5_format.fd; + mips32_insn.r_format.func = func; + } else + return SIGILL; + break; + case mm_32f_40_op: + op = -1; /* Invalid */ + if (insn.mm_fp2_format.op == mm_fmovt_op) + op = 1; + else if (insn.mm_fp2_format.op == mm_fmovf_op) + op = 0; + if (op != -1) { + mips32_insn.fp0_format.opcode = cop1_op; + mips32_insn.fp0_format.fmt = + sdps_format[insn.mm_fp2_format.fmt]; + mips32_insn.fp0_format.ft = + (insn.mm_fp2_format.cc<<2) + op; + mips32_insn.fp0_format.fs = + insn.mm_fp2_format.fs; + mips32_insn.fp0_format.fd = + insn.mm_fp2_format.fd; + mips32_insn.fp0_format.func = fmovc_op; + } else + return SIGILL; + break; + case mm_32f_60_op: + func = -1; /* Invalid */ + if (insn.mm_fp0_format.op == mm_fadd_op) + func = fadd_op; + else if (insn.mm_fp0_format.op == mm_fsub_op) + func = fsub_op; + else if (insn.mm_fp0_format.op == mm_fmul_op) + func = fmul_op; + else if (insn.mm_fp0_format.op == mm_fdiv_op) + func = fdiv_op; + if (func != -1) { + mips32_insn.fp0_format.opcode = cop1_op; + mips32_insn.fp0_format.fmt = + sdps_format[insn.mm_fp0_format.fmt]; + mips32_insn.fp0_format.ft = + insn.mm_fp0_format.ft; + mips32_insn.fp0_format.fs = + insn.mm_fp0_format.fs; + mips32_insn.fp0_format.fd = + insn.mm_fp0_format.fd; + mips32_insn.fp0_format.func = func; + } else + return SIGILL; + break; + case mm_32f_70_op: + func = -1; /* Invalid */ + if (insn.mm_fp0_format.op == mm_fmovn_op) + func = fmovn_op; + else if (insn.mm_fp0_format.op == mm_fmovz_op) + func = fmovz_op; + if (func != -1) { + mips32_insn.fp0_format.opcode = cop1_op; + mips32_insn.fp0_format.fmt = + sdps_format[insn.mm_fp0_format.fmt]; + mips32_insn.fp0_format.ft = + insn.mm_fp0_format.ft; + mips32_insn.fp0_format.fs = + insn.mm_fp0_format.fs; + mips32_insn.fp0_format.fd = + insn.mm_fp0_format.fd; + mips32_insn.fp0_format.func = func; + } else + return SIGILL; + break; + case mm_32f_73_op: /* POOL32FXF */ + switch (insn.mm_fp1_format.op) { + case mm_movf0_op: + case mm_movf1_op: + case mm_movt0_op: + case mm_movt1_op: + if ((insn.mm_fp1_format.op & 0x7f) == + mm_movf0_op) + op = 0; + else + op = 1; + mips32_insn.r_format.opcode = spec_op; + mips32_insn.r_format.rs = insn.mm_fp4_format.fs; + mips32_insn.r_format.rt = + (insn.mm_fp4_format.cc << 2) + op; + mips32_insn.r_format.rd = insn.mm_fp4_format.rt; + mips32_insn.r_format.re = 0; + mips32_insn.r_format.func = movc_op; + break; + case mm_fcvtd0_op: + case mm_fcvtd1_op: + case mm_fcvts0_op: + case mm_fcvts1_op: + if ((insn.mm_fp1_format.op & 0x7f) == + mm_fcvtd0_op) { + func = fcvtd_op; + fmt = swl_format[insn.mm_fp3_format.fmt]; + } else { + func = fcvts_op; + fmt = dwl_format[insn.mm_fp3_format.fmt]; + } + mips32_insn.fp0_format.opcode = cop1_op; + mips32_insn.fp0_format.fmt = fmt; + mips32_insn.fp0_format.ft = 0; + mips32_insn.fp0_format.fs = + insn.mm_fp3_format.fs; + mips32_insn.fp0_format.fd = + insn.mm_fp3_format.rt; + mips32_insn.fp0_format.func = func; + break; + case mm_fmov0_op: + case mm_fmov1_op: + case mm_fabs0_op: + case mm_fabs1_op: + case mm_fneg0_op: + case mm_fneg1_op: + if ((insn.mm_fp1_format.op & 0x7f) == + mm_fmov0_op) + func = fmov_op; + else if ((insn.mm_fp1_format.op & 0x7f) == + mm_fabs0_op) + func = fabs_op; + else + func = fneg_op; + mips32_insn.fp0_format.opcode = cop1_op; + mips32_insn.fp0_format.fmt = + sdps_format[insn.mm_fp3_format.fmt]; + mips32_insn.fp0_format.ft = 0; + mips32_insn.fp0_format.fs = + insn.mm_fp3_format.fs; + mips32_insn.fp0_format.fd = + insn.mm_fp3_format.rt; + mips32_insn.fp0_format.func = func; + break; + case mm_ffloorl_op: + case mm_ffloorw_op: + case mm_fceill_op: + case mm_fceilw_op: + case mm_ftruncl_op: + case mm_ftruncw_op: + case mm_froundl_op: + case mm_froundw_op: + case mm_fcvtl_op: + case mm_fcvtw_op: + if (insn.mm_fp1_format.op == mm_ffloorl_op) + func = ffloorl_op; + else if (insn.mm_fp1_format.op == mm_ffloorw_op) + func = ffloor_op; + else if (insn.mm_fp1_format.op == mm_fceill_op) + func = fceill_op; + else if (insn.mm_fp1_format.op == mm_fceilw_op) + func = fceil_op; + else if (insn.mm_fp1_format.op == mm_ftruncl_op) + func = ftruncl_op; + else if (insn.mm_fp1_format.op == mm_ftruncw_op) + func = ftrunc_op; + else if (insn.mm_fp1_format.op == mm_froundl_op) + func = froundl_op; + else if (insn.mm_fp1_format.op == mm_froundw_op) + func = fround_op; + else if (insn.mm_fp1_format.op == mm_fcvtl_op) + func = fcvtl_op; + else + func = fcvtw_op; + mips32_insn.fp0_format.opcode = cop1_op; + mips32_insn.fp0_format.fmt = + sd_format[insn.mm_fp1_format.fmt]; + mips32_insn.fp0_format.ft = 0; + mips32_insn.fp0_format.fs = + insn.mm_fp1_format.fs; + mips32_insn.fp0_format.fd = + insn.mm_fp1_format.rt; + mips32_insn.fp0_format.func = func; + break; + case mm_frsqrt_op: + case mm_fsqrt_op: + case mm_frecip_op: + if (insn.mm_fp1_format.op == mm_frsqrt_op) + func = frsqrt_op; + else if (insn.mm_fp1_format.op == mm_fsqrt_op) + func = fsqrt_op; + else + func = frecip_op; + mips32_insn.fp0_format.opcode = cop1_op; + mips32_insn.fp0_format.fmt = + sdps_format[insn.mm_fp1_format.fmt]; + mips32_insn.fp0_format.ft = 0; + mips32_insn.fp0_format.fs = + insn.mm_fp1_format.fs; + mips32_insn.fp0_format.fd = + insn.mm_fp1_format.rt; + mips32_insn.fp0_format.func = func; + break; + case mm_mfc1_op: + case mm_mtc1_op: + case mm_cfc1_op: + case mm_ctc1_op: + case mm_mfhc1_op: + case mm_mthc1_op: + if (insn.mm_fp1_format.op == mm_mfc1_op) + op = mfc_op; + else if (insn.mm_fp1_format.op == mm_mtc1_op) + op = mtc_op; + else if (insn.mm_fp1_format.op == mm_cfc1_op) + op = cfc_op; + else if (insn.mm_fp1_format.op == mm_ctc1_op) + op = ctc_op; + else if (insn.mm_fp1_format.op == mm_mfhc1_op) + op = mfhc_op; + else + op = mthc_op; + mips32_insn.fp1_format.opcode = cop1_op; + mips32_insn.fp1_format.op = op; + mips32_insn.fp1_format.rt = + insn.mm_fp1_format.rt; + mips32_insn.fp1_format.fs = + insn.mm_fp1_format.fs; + mips32_insn.fp1_format.fd = 0; + mips32_insn.fp1_format.func = 0; + break; + default: + return SIGILL; + } + break; + case mm_32f_74_op: /* c.cond.fmt */ + mips32_insn.fp0_format.opcode = cop1_op; + mips32_insn.fp0_format.fmt = + sdps_format[insn.mm_fp4_format.fmt]; + mips32_insn.fp0_format.ft = insn.mm_fp4_format.rt; + mips32_insn.fp0_format.fs = insn.mm_fp4_format.fs; + mips32_insn.fp0_format.fd = insn.mm_fp4_format.cc << 2; + mips32_insn.fp0_format.func = + insn.mm_fp4_format.cond | MM_MIPS32_COND_FC; + break; + default: + return SIGILL; + } + break; + default: + return SIGILL; + } + + *insn_ptr = mips32_insn; + return 0; +} + +/* + * Redundant with logic already in kernel/branch.c, + * embedded in compute_return_epc. At some point, + * a single subroutine should be used across both + * modules. + */ +int isBranchInstr(struct pt_regs *regs, struct mm_decoded_insn dec_insn, + unsigned long *contpc) +{ + union mips_instruction insn = (union mips_instruction)dec_insn.insn; + unsigned int fcr31; + unsigned int bit = 0; + unsigned int bit0; + union fpureg *fpr; + + switch (insn.i_format.opcode) { + case spec_op: + switch (insn.r_format.func) { + case jalr_op: + if (insn.r_format.rd != 0) { + regs->regs[insn.r_format.rd] = + regs->cp0_epc + dec_insn.pc_inc + + dec_insn.next_pc_inc; + } + fallthrough; + case jr_op: + /* For R6, JR already emulated in jalr_op */ + if (NO_R6EMU && insn.r_format.func == jr_op) + break; + *contpc = regs->regs[insn.r_format.rs]; + return 1; + } + break; + case bcond_op: + switch (insn.i_format.rt) { + case bltzal_op: + case bltzall_op: + if (NO_R6EMU && (insn.i_format.rs || + insn.i_format.rt == bltzall_op)) + break; + + regs->regs[31] = regs->cp0_epc + + dec_insn.pc_inc + + dec_insn.next_pc_inc; + fallthrough; + case bltzl_op: + if (NO_R6EMU) + break; + fallthrough; + case bltz_op: + if ((long)regs->regs[insn.i_format.rs] < 0) + *contpc = regs->cp0_epc + + dec_insn.pc_inc + + (insn.i_format.simmediate << 2); + else + *contpc = regs->cp0_epc + + dec_insn.pc_inc + + dec_insn.next_pc_inc; + return 1; + case bgezal_op: + case bgezall_op: + if (NO_R6EMU && (insn.i_format.rs || + insn.i_format.rt == bgezall_op)) + break; + + regs->regs[31] = regs->cp0_epc + + dec_insn.pc_inc + + dec_insn.next_pc_inc; + fallthrough; + case bgezl_op: + if (NO_R6EMU) + break; + fallthrough; + case bgez_op: + if ((long)regs->regs[insn.i_format.rs] >= 0) + *contpc = regs->cp0_epc + + dec_insn.pc_inc + + (insn.i_format.simmediate << 2); + else + *contpc = regs->cp0_epc + + dec_insn.pc_inc + + dec_insn.next_pc_inc; + return 1; + } + break; + case jalx_op: + set_isa16_mode(bit); + fallthrough; + case jal_op: + regs->regs[31] = regs->cp0_epc + + dec_insn.pc_inc + + dec_insn.next_pc_inc; + fallthrough; + case j_op: + *contpc = regs->cp0_epc + dec_insn.pc_inc; + *contpc >>= 28; + *contpc <<= 28; + *contpc |= (insn.j_format.target << 2); + /* Set microMIPS mode bit: XOR for jalx. */ + *contpc ^= bit; + return 1; + case beql_op: + if (NO_R6EMU) + break; + fallthrough; + case beq_op: + if (regs->regs[insn.i_format.rs] == + regs->regs[insn.i_format.rt]) + *contpc = regs->cp0_epc + + dec_insn.pc_inc + + (insn.i_format.simmediate << 2); + else + *contpc = regs->cp0_epc + + dec_insn.pc_inc + + dec_insn.next_pc_inc; + return 1; + case bnel_op: + if (NO_R6EMU) + break; + fallthrough; + case bne_op: + if (regs->regs[insn.i_format.rs] != + regs->regs[insn.i_format.rt]) + *contpc = regs->cp0_epc + + dec_insn.pc_inc + + (insn.i_format.simmediate << 2); + else + *contpc = regs->cp0_epc + + dec_insn.pc_inc + + dec_insn.next_pc_inc; + return 1; + case blezl_op: + if (!insn.i_format.rt && NO_R6EMU) + break; + fallthrough; + case blez_op: + + /* + * Compact branches for R6 for the + * blez and blezl opcodes. + * BLEZ | rs = 0 | rt != 0 == BLEZALC + * BLEZ | rs = rt != 0 == BGEZALC + * BLEZ | rs != 0 | rt != 0 == BGEUC + * BLEZL | rs = 0 | rt != 0 == BLEZC + * BLEZL | rs = rt != 0 == BGEZC + * BLEZL | rs != 0 | rt != 0 == BGEC + * + * For real BLEZ{,L}, rt is always 0. + */ + if (cpu_has_mips_r6 && insn.i_format.rt) { + if ((insn.i_format.opcode == blez_op) && + ((!insn.i_format.rs && insn.i_format.rt) || + (insn.i_format.rs == insn.i_format.rt))) + regs->regs[31] = regs->cp0_epc + + dec_insn.pc_inc; + *contpc = regs->cp0_epc + dec_insn.pc_inc + + dec_insn.next_pc_inc; + + return 1; + } + if ((long)regs->regs[insn.i_format.rs] <= 0) + *contpc = regs->cp0_epc + + dec_insn.pc_inc + + (insn.i_format.simmediate << 2); + else + *contpc = regs->cp0_epc + + dec_insn.pc_inc + + dec_insn.next_pc_inc; + return 1; + case bgtzl_op: + if (!insn.i_format.rt && NO_R6EMU) + break; + fallthrough; + case bgtz_op: + /* + * Compact branches for R6 for the + * bgtz and bgtzl opcodes. + * BGTZ | rs = 0 | rt != 0 == BGTZALC + * BGTZ | rs = rt != 0 == BLTZALC + * BGTZ | rs != 0 | rt != 0 == BLTUC + * BGTZL | rs = 0 | rt != 0 == BGTZC + * BGTZL | rs = rt != 0 == BLTZC + * BGTZL | rs != 0 | rt != 0 == BLTC + * + * *ZALC varint for BGTZ &&& rt != 0 + * For real GTZ{,L}, rt is always 0. + */ + if (cpu_has_mips_r6 && insn.i_format.rt) { + if ((insn.i_format.opcode == blez_op) && + ((!insn.i_format.rs && insn.i_format.rt) || + (insn.i_format.rs == insn.i_format.rt))) + regs->regs[31] = regs->cp0_epc + + dec_insn.pc_inc; + *contpc = regs->cp0_epc + dec_insn.pc_inc + + dec_insn.next_pc_inc; + + return 1; + } + + if ((long)regs->regs[insn.i_format.rs] > 0) + *contpc = regs->cp0_epc + + dec_insn.pc_inc + + (insn.i_format.simmediate << 2); + else + *contpc = regs->cp0_epc + + dec_insn.pc_inc + + dec_insn.next_pc_inc; + return 1; + case pop10_op: + case pop30_op: + if (!cpu_has_mips_r6) + break; + if (insn.i_format.rt && !insn.i_format.rs) + regs->regs[31] = regs->cp0_epc + 4; + *contpc = regs->cp0_epc + dec_insn.pc_inc + + dec_insn.next_pc_inc; + + return 1; +#ifdef CONFIG_CPU_CAVIUM_OCTEON + case lwc2_op: /* This is bbit0 on Octeon */ + if ((regs->regs[insn.i_format.rs] & (1ull<<insn.i_format.rt)) == 0) + *contpc = regs->cp0_epc + 4 + (insn.i_format.simmediate << 2); + else + *contpc = regs->cp0_epc + 8; + return 1; + case ldc2_op: /* This is bbit032 on Octeon */ + if ((regs->regs[insn.i_format.rs] & (1ull<<(insn.i_format.rt + 32))) == 0) + *contpc = regs->cp0_epc + 4 + (insn.i_format.simmediate << 2); + else + *contpc = regs->cp0_epc + 8; + return 1; + case swc2_op: /* This is bbit1 on Octeon */ + if (regs->regs[insn.i_format.rs] & (1ull<<insn.i_format.rt)) + *contpc = regs->cp0_epc + 4 + (insn.i_format.simmediate << 2); + else + *contpc = regs->cp0_epc + 8; + return 1; + case sdc2_op: /* This is bbit132 on Octeon */ + if (regs->regs[insn.i_format.rs] & (1ull<<(insn.i_format.rt + 32))) + *contpc = regs->cp0_epc + 4 + (insn.i_format.simmediate << 2); + else + *contpc = regs->cp0_epc + 8; + return 1; +#else + case bc6_op: + /* + * Only valid for MIPS R6 but we can still end up + * here from a broken userland so just tell emulator + * this is not a branch and let it break later on. + */ + if (!cpu_has_mips_r6) + break; + *contpc = regs->cp0_epc + dec_insn.pc_inc + + dec_insn.next_pc_inc; + + return 1; + case balc6_op: + if (!cpu_has_mips_r6) + break; + regs->regs[31] = regs->cp0_epc + 4; + *contpc = regs->cp0_epc + dec_insn.pc_inc + + dec_insn.next_pc_inc; + + return 1; + case pop66_op: + if (!cpu_has_mips_r6) + break; + *contpc = regs->cp0_epc + dec_insn.pc_inc + + dec_insn.next_pc_inc; + + return 1; + case pop76_op: + if (!cpu_has_mips_r6) + break; + if (!insn.i_format.rs) + regs->regs[31] = regs->cp0_epc + 4; + *contpc = regs->cp0_epc + dec_insn.pc_inc + + dec_insn.next_pc_inc; + + return 1; +#endif + case cop0_op: + case cop1_op: + /* Need to check for R6 bc1nez and bc1eqz branches */ + if (cpu_has_mips_r6 && + ((insn.i_format.rs == bc1eqz_op) || + (insn.i_format.rs == bc1nez_op))) { + bit = 0; + fpr = ¤t->thread.fpu.fpr[insn.i_format.rt]; + bit0 = get_fpr32(fpr, 0) & 0x1; + switch (insn.i_format.rs) { + case bc1eqz_op: + bit = bit0 == 0; + break; + case bc1nez_op: + bit = bit0 != 0; + break; + } + if (bit) + *contpc = regs->cp0_epc + + dec_insn.pc_inc + + (insn.i_format.simmediate << 2); + else + *contpc = regs->cp0_epc + + dec_insn.pc_inc + + dec_insn.next_pc_inc; + + return 1; + } + /* R2/R6 compatible cop1 instruction */ + fallthrough; + case cop2_op: + case cop1x_op: + if (insn.i_format.rs == bc_op) { + preempt_disable(); + if (is_fpu_owner()) + fcr31 = read_32bit_cp1_register(CP1_STATUS); + else + fcr31 = current->thread.fpu.fcr31; + preempt_enable(); + + bit = (insn.i_format.rt >> 2); + bit += (bit != 0); + bit += 23; + switch (insn.i_format.rt & 3) { + case 0: /* bc1f */ + case 2: /* bc1fl */ + if (~fcr31 & (1 << bit)) + *contpc = regs->cp0_epc + + dec_insn.pc_inc + + (insn.i_format.simmediate << 2); + else + *contpc = regs->cp0_epc + + dec_insn.pc_inc + + dec_insn.next_pc_inc; + return 1; + case 1: /* bc1t */ + case 3: /* bc1tl */ + if (fcr31 & (1 << bit)) + *contpc = regs->cp0_epc + + dec_insn.pc_inc + + (insn.i_format.simmediate << 2); + else + *contpc = regs->cp0_epc + + dec_insn.pc_inc + + dec_insn.next_pc_inc; + return 1; + } + } + break; + } + return 0; +} + +/* + * In the Linux kernel, we support selection of FPR format on the + * basis of the Status.FR bit. If an FPU is not present, the FR bit + * is hardwired to zero, which would imply a 32-bit FPU even for + * 64-bit CPUs so we rather look at TIF_32BIT_FPREGS. + * FPU emu is slow and bulky and optimizing this function offers fairly + * sizeable benefits so we try to be clever and make this function return + * a constant whenever possible, that is on 64-bit kernels without O32 + * compatibility enabled and on 32-bit without 64-bit FPU support. + */ +static inline int cop1_64bit(struct pt_regs *xcp) +{ + if (IS_ENABLED(CONFIG_64BIT) && !IS_ENABLED(CONFIG_MIPS32_O32)) + return 1; + else if (IS_ENABLED(CONFIG_32BIT) && + !IS_ENABLED(CONFIG_MIPS_O32_FP64_SUPPORT)) + return 0; + + return !test_thread_flag(TIF_32BIT_FPREGS); +} + +static inline bool hybrid_fprs(void) +{ + return test_thread_flag(TIF_HYBRID_FPREGS); +} + +#define SIFROMREG(si, x) \ +do { \ + if (cop1_64bit(xcp) && !hybrid_fprs()) \ + (si) = (int)get_fpr32(&ctx->fpr[x], 0); \ + else \ + (si) = (int)get_fpr32(&ctx->fpr[(x) & ~1], (x) & 1); \ +} while (0) + +#define SITOREG(si, x) \ +do { \ + if (cop1_64bit(xcp) && !hybrid_fprs()) { \ + unsigned int i; \ + set_fpr32(&ctx->fpr[x], 0, si); \ + for (i = 1; i < ARRAY_SIZE(ctx->fpr[x].val32); i++) \ + set_fpr32(&ctx->fpr[x], i, 0); \ + } else { \ + set_fpr32(&ctx->fpr[(x) & ~1], (x) & 1, si); \ + } \ +} while (0) + +#define SIFROMHREG(si, x) ((si) = (int)get_fpr32(&ctx->fpr[x], 1)) + +#define SITOHREG(si, x) \ +do { \ + unsigned int i; \ + set_fpr32(&ctx->fpr[x], 1, si); \ + for (i = 2; i < ARRAY_SIZE(ctx->fpr[x].val32); i++) \ + set_fpr32(&ctx->fpr[x], i, 0); \ +} while (0) + +#define DIFROMREG(di, x) \ + ((di) = get_fpr64(&ctx->fpr[(x) & ~(cop1_64bit(xcp) ^ 1)], 0)) + +#define DITOREG(di, x) \ +do { \ + unsigned int fpr, i; \ + fpr = (x) & ~(cop1_64bit(xcp) ^ 1); \ + set_fpr64(&ctx->fpr[fpr], 0, di); \ + for (i = 1; i < ARRAY_SIZE(ctx->fpr[x].val64); i++) \ + set_fpr64(&ctx->fpr[fpr], i, 0); \ +} while (0) + +#define SPFROMREG(sp, x) SIFROMREG((sp).bits, x) +#define SPTOREG(sp, x) SITOREG((sp).bits, x) +#define DPFROMREG(dp, x) DIFROMREG((dp).bits, x) +#define DPTOREG(dp, x) DITOREG((dp).bits, x) + +/* + * Emulate a CFC1 instruction. + */ +static inline void cop1_cfc(struct pt_regs *xcp, struct mips_fpu_struct *ctx, + mips_instruction ir) +{ + u32 fcr31 = ctx->fcr31; + u32 value = 0; + + switch (MIPSInst_RD(ir)) { + case FPCREG_CSR: + value = fcr31; + pr_debug("%p gpr[%d]<-csr=%08x\n", + (void *)xcp->cp0_epc, MIPSInst_RT(ir), value); + break; + + case FPCREG_FENR: + if (!cpu_has_mips_r) + break; + value = (fcr31 >> (FPU_CSR_FS_S - MIPS_FENR_FS_S)) & + MIPS_FENR_FS; + value |= fcr31 & (FPU_CSR_ALL_E | FPU_CSR_RM); + pr_debug("%p gpr[%d]<-enr=%08x\n", + (void *)xcp->cp0_epc, MIPSInst_RT(ir), value); + break; + + case FPCREG_FEXR: + if (!cpu_has_mips_r) + break; + value = fcr31 & (FPU_CSR_ALL_X | FPU_CSR_ALL_S); + pr_debug("%p gpr[%d]<-exr=%08x\n", + (void *)xcp->cp0_epc, MIPSInst_RT(ir), value); + break; + + case FPCREG_FCCR: + if (!cpu_has_mips_r) + break; + value = (fcr31 >> (FPU_CSR_COND_S - MIPS_FCCR_COND0_S)) & + MIPS_FCCR_COND0; + value |= (fcr31 >> (FPU_CSR_COND1_S - MIPS_FCCR_COND1_S)) & + (MIPS_FCCR_CONDX & ~MIPS_FCCR_COND0); + pr_debug("%p gpr[%d]<-ccr=%08x\n", + (void *)xcp->cp0_epc, MIPSInst_RT(ir), value); + break; + + case FPCREG_RID: + value = boot_cpu_data.fpu_id; + break; + + default: + break; + } + + if (MIPSInst_RT(ir)) + xcp->regs[MIPSInst_RT(ir)] = value; +} + +/* + * Emulate a CTC1 instruction. + */ +static inline void cop1_ctc(struct pt_regs *xcp, struct mips_fpu_struct *ctx, + mips_instruction ir) +{ + u32 fcr31 = ctx->fcr31; + u32 value; + u32 mask; + + if (MIPSInst_RT(ir) == 0) + value = 0; + else + value = xcp->regs[MIPSInst_RT(ir)]; + + switch (MIPSInst_RD(ir)) { + case FPCREG_CSR: + pr_debug("%p gpr[%d]->csr=%08x\n", + (void *)xcp->cp0_epc, MIPSInst_RT(ir), value); + + /* Preserve read-only bits. */ + mask = boot_cpu_data.fpu_msk31; + fcr31 = (value & ~mask) | (fcr31 & mask); + break; + + case FPCREG_FENR: + if (!cpu_has_mips_r) + break; + pr_debug("%p gpr[%d]->enr=%08x\n", + (void *)xcp->cp0_epc, MIPSInst_RT(ir), value); + fcr31 &= ~(FPU_CSR_FS | FPU_CSR_ALL_E | FPU_CSR_RM); + fcr31 |= (value << (FPU_CSR_FS_S - MIPS_FENR_FS_S)) & + FPU_CSR_FS; + fcr31 |= value & (FPU_CSR_ALL_E | FPU_CSR_RM); + break; + + case FPCREG_FEXR: + if (!cpu_has_mips_r) + break; + pr_debug("%p gpr[%d]->exr=%08x\n", + (void *)xcp->cp0_epc, MIPSInst_RT(ir), value); + fcr31 &= ~(FPU_CSR_ALL_X | FPU_CSR_ALL_S); + fcr31 |= value & (FPU_CSR_ALL_X | FPU_CSR_ALL_S); + break; + + case FPCREG_FCCR: + if (!cpu_has_mips_r) + break; + pr_debug("%p gpr[%d]->ccr=%08x\n", + (void *)xcp->cp0_epc, MIPSInst_RT(ir), value); + fcr31 &= ~(FPU_CSR_CONDX | FPU_CSR_COND); + fcr31 |= (value << (FPU_CSR_COND_S - MIPS_FCCR_COND0_S)) & + FPU_CSR_COND; + fcr31 |= (value << (FPU_CSR_COND1_S - MIPS_FCCR_COND1_S)) & + FPU_CSR_CONDX; + break; + + default: + break; + } + + ctx->fcr31 = fcr31; +} + +/* + * Emulate the single floating point instruction pointed at by EPC. + * Two instructions if the instruction is in a branch delay slot. + */ + +static int cop1Emulate(struct pt_regs *xcp, struct mips_fpu_struct *ctx, + struct mm_decoded_insn dec_insn, void __user **fault_addr) +{ + unsigned long contpc = xcp->cp0_epc + dec_insn.pc_inc; + unsigned int cond, cbit, bit0; + mips_instruction ir; + int likely, pc_inc; + union fpureg *fpr; + u32 __user *wva; + u64 __user *dva; + u32 wval; + u64 dval; + int sig; + + /* + * These are giving gcc a gentle hint about what to expect in + * dec_inst in order to do better optimization. + */ + if (!cpu_has_mmips && dec_insn.micro_mips_mode) + unreachable(); + + /* XXX NEC Vr54xx bug workaround */ + if (delay_slot(xcp)) { + if (dec_insn.micro_mips_mode) { + if (!mm_isBranchInstr(xcp, dec_insn, &contpc)) + clear_delay_slot(xcp); + } else { + if (!isBranchInstr(xcp, dec_insn, &contpc)) + clear_delay_slot(xcp); + } + } + + if (delay_slot(xcp)) { + /* + * The instruction to be emulated is in a branch delay slot + * which means that we have to emulate the branch instruction + * BEFORE we do the cop1 instruction. + * + * This branch could be a COP1 branch, but in that case we + * would have had a trap for that instruction, and would not + * come through this route. + * + * Linux MIPS branch emulator operates on context, updating the + * cp0_epc. + */ + ir = dec_insn.next_insn; /* process delay slot instr */ + pc_inc = dec_insn.next_pc_inc; + } else { + ir = dec_insn.insn; /* process current instr */ + pc_inc = dec_insn.pc_inc; + } + + /* + * Since microMIPS FPU instructios are a subset of MIPS32 FPU + * instructions, we want to convert microMIPS FPU instructions + * into MIPS32 instructions so that we could reuse all of the + * FPU emulation code. + * + * NOTE: We cannot do this for branch instructions since they + * are not a subset. Example: Cannot emulate a 16-bit + * aligned target address with a MIPS32 instruction. + */ + if (dec_insn.micro_mips_mode) { + /* + * If next instruction is a 16-bit instruction, then + * it cannot be a FPU instruction. This could happen + * since we can be called for non-FPU instructions. + */ + if ((pc_inc == 2) || + (microMIPS32_to_MIPS32((union mips_instruction *)&ir) + == SIGILL)) + return SIGILL; + } + +emul: + perf_sw_event(PERF_COUNT_SW_EMULATION_FAULTS, 1, xcp, 0); + MIPS_FPU_EMU_INC_STATS(emulated); + switch (MIPSInst_OPCODE(ir)) { + case ldc1_op: + dva = (u64 __user *) (xcp->regs[MIPSInst_RS(ir)] + + MIPSInst_SIMM(ir)); + MIPS_FPU_EMU_INC_STATS(loads); + + if (!access_ok(dva, sizeof(u64))) { + MIPS_FPU_EMU_INC_STATS(errors); + *fault_addr = dva; + return SIGBUS; + } + if (__get_user(dval, dva)) { + MIPS_FPU_EMU_INC_STATS(errors); + *fault_addr = dva; + return SIGSEGV; + } + DITOREG(dval, MIPSInst_RT(ir)); + break; + + case sdc1_op: + dva = (u64 __user *) (xcp->regs[MIPSInst_RS(ir)] + + MIPSInst_SIMM(ir)); + MIPS_FPU_EMU_INC_STATS(stores); + DIFROMREG(dval, MIPSInst_RT(ir)); + if (!access_ok(dva, sizeof(u64))) { + MIPS_FPU_EMU_INC_STATS(errors); + *fault_addr = dva; + return SIGBUS; + } + if (__put_user(dval, dva)) { + MIPS_FPU_EMU_INC_STATS(errors); + *fault_addr = dva; + return SIGSEGV; + } + break; + + case lwc1_op: + wva = (u32 __user *) (xcp->regs[MIPSInst_RS(ir)] + + MIPSInst_SIMM(ir)); + MIPS_FPU_EMU_INC_STATS(loads); + if (!access_ok(wva, sizeof(u32))) { + MIPS_FPU_EMU_INC_STATS(errors); + *fault_addr = wva; + return SIGBUS; + } + if (__get_user(wval, wva)) { + MIPS_FPU_EMU_INC_STATS(errors); + *fault_addr = wva; + return SIGSEGV; + } + SITOREG(wval, MIPSInst_RT(ir)); + break; + + case swc1_op: + wva = (u32 __user *) (xcp->regs[MIPSInst_RS(ir)] + + MIPSInst_SIMM(ir)); + MIPS_FPU_EMU_INC_STATS(stores); + SIFROMREG(wval, MIPSInst_RT(ir)); + if (!access_ok(wva, sizeof(u32))) { + MIPS_FPU_EMU_INC_STATS(errors); + *fault_addr = wva; + return SIGBUS; + } + if (__put_user(wval, wva)) { + MIPS_FPU_EMU_INC_STATS(errors); + *fault_addr = wva; + return SIGSEGV; + } + break; + + case cop1_op: + switch (MIPSInst_RS(ir)) { + case dmfc_op: + if (!cpu_has_mips_3_4_5 && !cpu_has_mips64) + return SIGILL; + + /* copregister fs -> gpr[rt] */ + if (MIPSInst_RT(ir) != 0) { + DIFROMREG(xcp->regs[MIPSInst_RT(ir)], + MIPSInst_RD(ir)); + } + break; + + case dmtc_op: + if (!cpu_has_mips_3_4_5 && !cpu_has_mips64) + return SIGILL; + + /* copregister fs <- rt */ + DITOREG(xcp->regs[MIPSInst_RT(ir)], MIPSInst_RD(ir)); + break; + + case mfhc_op: + if (!cpu_has_mips_r2_r6) + return SIGILL; + + /* copregister rd -> gpr[rt] */ + if (MIPSInst_RT(ir) != 0) { + SIFROMHREG(xcp->regs[MIPSInst_RT(ir)], + MIPSInst_RD(ir)); + } + break; + + case mthc_op: + if (!cpu_has_mips_r2_r6) + return SIGILL; + + /* copregister rd <- gpr[rt] */ + SITOHREG(xcp->regs[MIPSInst_RT(ir)], MIPSInst_RD(ir)); + break; + + case mfc_op: + /* copregister rd -> gpr[rt] */ + if (MIPSInst_RT(ir) != 0) { + SIFROMREG(xcp->regs[MIPSInst_RT(ir)], + MIPSInst_RD(ir)); + } + break; + + case mtc_op: + /* copregister rd <- rt */ + SITOREG(xcp->regs[MIPSInst_RT(ir)], MIPSInst_RD(ir)); + break; + + case cfc_op: + /* cop control register rd -> gpr[rt] */ + cop1_cfc(xcp, ctx, ir); + break; + + case ctc_op: + /* copregister rd <- rt */ + cop1_ctc(xcp, ctx, ir); + if ((ctx->fcr31 >> 5) & ctx->fcr31 & FPU_CSR_ALL_E) { + return SIGFPE; + } + break; + + case bc1eqz_op: + case bc1nez_op: + if (!cpu_has_mips_r6 || delay_slot(xcp)) + return SIGILL; + + likely = 0; + cond = 0; + fpr = ¤t->thread.fpu.fpr[MIPSInst_RT(ir)]; + bit0 = get_fpr32(fpr, 0) & 0x1; + switch (MIPSInst_RS(ir)) { + case bc1eqz_op: + MIPS_FPU_EMU_INC_STATS(bc1eqz); + cond = bit0 == 0; + break; + case bc1nez_op: + MIPS_FPU_EMU_INC_STATS(bc1nez); + cond = bit0 != 0; + break; + } + goto branch_common; + + case bc_op: + if (delay_slot(xcp)) + return SIGILL; + + if (cpu_has_mips_4_5_r) + cbit = fpucondbit[MIPSInst_RT(ir) >> 2]; + else + cbit = FPU_CSR_COND; + cond = ctx->fcr31 & cbit; + + likely = 0; + switch (MIPSInst_RT(ir) & 3) { + case bcfl_op: + if (cpu_has_mips_2_3_4_5_r) + likely = 1; + fallthrough; + case bcf_op: + cond = !cond; + break; + case bctl_op: + if (cpu_has_mips_2_3_4_5_r) + likely = 1; + fallthrough; + case bct_op: + break; + } +branch_common: + MIPS_FPU_EMU_INC_STATS(branches); + set_delay_slot(xcp); + if (cond) { + /* + * Branch taken: emulate dslot instruction + */ + unsigned long bcpc; + + /* + * Remember EPC at the branch to point back + * at so that any delay-slot instruction + * signal is not silently ignored. + */ + bcpc = xcp->cp0_epc; + xcp->cp0_epc += dec_insn.pc_inc; + + contpc = MIPSInst_SIMM(ir); + ir = dec_insn.next_insn; + if (dec_insn.micro_mips_mode) { + contpc = (xcp->cp0_epc + (contpc << 1)); + + /* If 16-bit instruction, not FPU. */ + if ((dec_insn.next_pc_inc == 2) || + (microMIPS32_to_MIPS32((union mips_instruction *)&ir) == SIGILL)) { + + /* + * Since this instruction will + * be put on the stack with + * 32-bit words, get around + * this problem by putting a + * NOP16 as the second one. + */ + if (dec_insn.next_pc_inc == 2) + ir = (ir & (~0xffff)) | MM_NOP16; + + /* + * Single step the non-CP1 + * instruction in the dslot. + */ + sig = mips_dsemul(xcp, ir, + bcpc, contpc); + if (sig < 0) + break; + if (sig) + xcp->cp0_epc = bcpc; + /* + * SIGILL forces out of + * the emulation loop. + */ + return sig ? sig : SIGILL; + } + } else + contpc = (xcp->cp0_epc + (contpc << 2)); + + switch (MIPSInst_OPCODE(ir)) { + case lwc1_op: + case swc1_op: + goto emul; + + case ldc1_op: + case sdc1_op: + if (cpu_has_mips_2_3_4_5_r) + goto emul; + + goto bc_sigill; + + case cop1_op: + goto emul; + + case cop1x_op: + if (cpu_has_mips_4_5_64_r2_r6) + /* its one of ours */ + goto emul; + + goto bc_sigill; + + case spec_op: + switch (MIPSInst_FUNC(ir)) { + case movc_op: + if (cpu_has_mips_4_5_r) + goto emul; + + goto bc_sigill; + } + break; + + bc_sigill: + xcp->cp0_epc = bcpc; + return SIGILL; + } + + /* + * Single step the non-cp1 + * instruction in the dslot + */ + sig = mips_dsemul(xcp, ir, bcpc, contpc); + if (sig < 0) + break; + if (sig) + xcp->cp0_epc = bcpc; + /* SIGILL forces out of the emulation loop. */ + return sig ? sig : SIGILL; + } else if (likely) { /* branch not taken */ + /* + * branch likely nullifies + * dslot if not taken + */ + xcp->cp0_epc += dec_insn.pc_inc; + contpc += dec_insn.pc_inc; + /* + * else continue & execute + * dslot as normal insn + */ + } + break; + + default: + if (!(MIPSInst_RS(ir) & 0x10)) + return SIGILL; + + /* a real fpu computation instruction */ + sig = fpu_emu(xcp, ctx, ir); + if (sig) + return sig; + } + break; + + case cop1x_op: + if (!cpu_has_mips_4_5_64_r2_r6) + return SIGILL; + + sig = fpux_emu(xcp, ctx, ir, fault_addr); + if (sig) + return sig; + break; + + case spec_op: + if (!cpu_has_mips_4_5_r) + return SIGILL; + + if (MIPSInst_FUNC(ir) != movc_op) + return SIGILL; + cond = fpucondbit[MIPSInst_RT(ir) >> 2]; + if (((ctx->fcr31 & cond) != 0) == ((MIPSInst_RT(ir) & 1) != 0)) + xcp->regs[MIPSInst_RD(ir)] = + xcp->regs[MIPSInst_RS(ir)]; + break; + default: + return SIGILL; + } + + /* we did it !! */ + xcp->cp0_epc = contpc; + clear_delay_slot(xcp); + + return 0; +} + +/* + * Conversion table from MIPS compare ops 48-63 + * cond = ieee754dp_cmp(x,y,IEEE754_UN,sig); + */ +static const unsigned char cmptab[8] = { + 0, /* cmp_0 (sig) cmp_sf */ + IEEE754_CUN, /* cmp_un (sig) cmp_ngle */ + IEEE754_CEQ, /* cmp_eq (sig) cmp_seq */ + IEEE754_CEQ | IEEE754_CUN, /* cmp_ueq (sig) cmp_ngl */ + IEEE754_CLT, /* cmp_olt (sig) cmp_lt */ + IEEE754_CLT | IEEE754_CUN, /* cmp_ult (sig) cmp_nge */ + IEEE754_CLT | IEEE754_CEQ, /* cmp_ole (sig) cmp_le */ + IEEE754_CLT | IEEE754_CEQ | IEEE754_CUN, /* cmp_ule (sig) cmp_ngt */ +}; + +static const unsigned char negative_cmptab[8] = { + 0, /* Reserved */ + IEEE754_CLT | IEEE754_CGT | IEEE754_CEQ, + IEEE754_CLT | IEEE754_CGT | IEEE754_CUN, + IEEE754_CLT | IEEE754_CGT, + /* Reserved */ +}; + + +/* + * Additional MIPS4 instructions + */ + +#define DEF3OP(name, p, f1, f2, f3) \ +static union ieee754##p fpemu_##p##_##name(union ieee754##p r, \ + union ieee754##p s, union ieee754##p t) \ +{ \ + struct _ieee754_csr ieee754_csr_save; \ + s = f1(s, t); \ + ieee754_csr_save = ieee754_csr; \ + s = f2(s, r); \ + ieee754_csr_save.cx |= ieee754_csr.cx; \ + ieee754_csr_save.sx |= ieee754_csr.sx; \ + s = f3(s); \ + ieee754_csr.cx |= ieee754_csr_save.cx; \ + ieee754_csr.sx |= ieee754_csr_save.sx; \ + return s; \ +} + +static union ieee754dp fpemu_dp_recip(union ieee754dp d) +{ + return ieee754dp_div(ieee754dp_one(0), d); +} + +static union ieee754dp fpemu_dp_rsqrt(union ieee754dp d) +{ + return ieee754dp_div(ieee754dp_one(0), ieee754dp_sqrt(d)); +} + +static union ieee754sp fpemu_sp_recip(union ieee754sp s) +{ + return ieee754sp_div(ieee754sp_one(0), s); +} + +static union ieee754sp fpemu_sp_rsqrt(union ieee754sp s) +{ + return ieee754sp_div(ieee754sp_one(0), ieee754sp_sqrt(s)); +} + +DEF3OP(madd, sp, ieee754sp_mul, ieee754sp_add, ); +DEF3OP(msub, sp, ieee754sp_mul, ieee754sp_sub, ); +DEF3OP(nmadd, sp, ieee754sp_mul, ieee754sp_add, ieee754sp_neg); +DEF3OP(nmsub, sp, ieee754sp_mul, ieee754sp_sub, ieee754sp_neg); +DEF3OP(madd, dp, ieee754dp_mul, ieee754dp_add, ); +DEF3OP(msub, dp, ieee754dp_mul, ieee754dp_sub, ); +DEF3OP(nmadd, dp, ieee754dp_mul, ieee754dp_add, ieee754dp_neg); +DEF3OP(nmsub, dp, ieee754dp_mul, ieee754dp_sub, ieee754dp_neg); + +static int fpux_emu(struct pt_regs *xcp, struct mips_fpu_struct *ctx, + mips_instruction ir, void __user **fault_addr) +{ + unsigned int rcsr = 0; /* resulting csr */ + + MIPS_FPU_EMU_INC_STATS(cp1xops); + + switch (MIPSInst_FMA_FFMT(ir)) { + case s_fmt:{ /* 0 */ + + union ieee754sp(*handler) (union ieee754sp, union ieee754sp, union ieee754sp); + union ieee754sp fd, fr, fs, ft; + u32 __user *va; + u32 val; + + switch (MIPSInst_FUNC(ir)) { + case lwxc1_op: + va = (void __user *) (xcp->regs[MIPSInst_FR(ir)] + + xcp->regs[MIPSInst_FT(ir)]); + + MIPS_FPU_EMU_INC_STATS(loads); + if (!access_ok(va, sizeof(u32))) { + MIPS_FPU_EMU_INC_STATS(errors); + *fault_addr = va; + return SIGBUS; + } + if (__get_user(val, va)) { + MIPS_FPU_EMU_INC_STATS(errors); + *fault_addr = va; + return SIGSEGV; + } + SITOREG(val, MIPSInst_FD(ir)); + break; + + case swxc1_op: + va = (void __user *) (xcp->regs[MIPSInst_FR(ir)] + + xcp->regs[MIPSInst_FT(ir)]); + + MIPS_FPU_EMU_INC_STATS(stores); + + SIFROMREG(val, MIPSInst_FS(ir)); + if (!access_ok(va, sizeof(u32))) { + MIPS_FPU_EMU_INC_STATS(errors); + *fault_addr = va; + return SIGBUS; + } + if (put_user(val, va)) { + MIPS_FPU_EMU_INC_STATS(errors); + *fault_addr = va; + return SIGSEGV; + } + break; + + case madd_s_op: + if (cpu_has_mac2008_only) + handler = ieee754sp_madd; + else + handler = fpemu_sp_madd; + goto scoptop; + case msub_s_op: + if (cpu_has_mac2008_only) + handler = ieee754sp_msub; + else + handler = fpemu_sp_msub; + goto scoptop; + case nmadd_s_op: + if (cpu_has_mac2008_only) + handler = ieee754sp_nmadd; + else + handler = fpemu_sp_nmadd; + goto scoptop; + case nmsub_s_op: + if (cpu_has_mac2008_only) + handler = ieee754sp_nmsub; + else + handler = fpemu_sp_nmsub; + goto scoptop; + + scoptop: + SPFROMREG(fr, MIPSInst_FR(ir)); + SPFROMREG(fs, MIPSInst_FS(ir)); + SPFROMREG(ft, MIPSInst_FT(ir)); + fd = (*handler) (fr, fs, ft); + SPTOREG(fd, MIPSInst_FD(ir)); + + copcsr: + if (ieee754_cxtest(IEEE754_INEXACT)) { + MIPS_FPU_EMU_INC_STATS(ieee754_inexact); + rcsr |= FPU_CSR_INE_X | FPU_CSR_INE_S; + } + if (ieee754_cxtest(IEEE754_UNDERFLOW)) { + MIPS_FPU_EMU_INC_STATS(ieee754_underflow); + rcsr |= FPU_CSR_UDF_X | FPU_CSR_UDF_S; + } + if (ieee754_cxtest(IEEE754_OVERFLOW)) { + MIPS_FPU_EMU_INC_STATS(ieee754_overflow); + rcsr |= FPU_CSR_OVF_X | FPU_CSR_OVF_S; + } + if (ieee754_cxtest(IEEE754_INVALID_OPERATION)) { + MIPS_FPU_EMU_INC_STATS(ieee754_invalidop); + rcsr |= FPU_CSR_INV_X | FPU_CSR_INV_S; + } + + ctx->fcr31 = (ctx->fcr31 & ~FPU_CSR_ALL_X) | rcsr; + if ((ctx->fcr31 >> 5) & ctx->fcr31 & FPU_CSR_ALL_E) { + /*printk ("SIGFPE: FPU csr = %08x\n", + ctx->fcr31); */ + return SIGFPE; + } + + break; + + default: + return SIGILL; + } + break; + } + + case d_fmt:{ /* 1 */ + union ieee754dp(*handler) (union ieee754dp, union ieee754dp, union ieee754dp); + union ieee754dp fd, fr, fs, ft; + u64 __user *va; + u64 val; + + switch (MIPSInst_FUNC(ir)) { + case ldxc1_op: + va = (void __user *) (xcp->regs[MIPSInst_FR(ir)] + + xcp->regs[MIPSInst_FT(ir)]); + + MIPS_FPU_EMU_INC_STATS(loads); + if (!access_ok(va, sizeof(u64))) { + MIPS_FPU_EMU_INC_STATS(errors); + *fault_addr = va; + return SIGBUS; + } + if (__get_user(val, va)) { + MIPS_FPU_EMU_INC_STATS(errors); + *fault_addr = va; + return SIGSEGV; + } + DITOREG(val, MIPSInst_FD(ir)); + break; + + case sdxc1_op: + va = (void __user *) (xcp->regs[MIPSInst_FR(ir)] + + xcp->regs[MIPSInst_FT(ir)]); + + MIPS_FPU_EMU_INC_STATS(stores); + DIFROMREG(val, MIPSInst_FS(ir)); + if (!access_ok(va, sizeof(u64))) { + MIPS_FPU_EMU_INC_STATS(errors); + *fault_addr = va; + return SIGBUS; + } + if (__put_user(val, va)) { + MIPS_FPU_EMU_INC_STATS(errors); + *fault_addr = va; + return SIGSEGV; + } + break; + + case madd_d_op: + if (cpu_has_mac2008_only) + handler = ieee754dp_madd; + else + handler = fpemu_dp_madd; + goto dcoptop; + case msub_d_op: + if (cpu_has_mac2008_only) + handler = ieee754dp_msub; + else + handler = fpemu_dp_msub; + goto dcoptop; + case nmadd_d_op: + if (cpu_has_mac2008_only) + handler = ieee754dp_nmadd; + else + handler = fpemu_dp_nmadd; + goto dcoptop; + case nmsub_d_op: + if (cpu_has_mac2008_only) + handler = ieee754dp_nmsub; + else + handler = fpemu_dp_nmsub; + goto dcoptop; + + dcoptop: + DPFROMREG(fr, MIPSInst_FR(ir)); + DPFROMREG(fs, MIPSInst_FS(ir)); + DPFROMREG(ft, MIPSInst_FT(ir)); + fd = (*handler) (fr, fs, ft); + DPTOREG(fd, MIPSInst_FD(ir)); + goto copcsr; + + default: + return SIGILL; + } + break; + } + + case 0x3: + if (MIPSInst_FUNC(ir) != pfetch_op) + return SIGILL; + + /* ignore prefx operation */ + break; + + default: + return SIGILL; + } + + return 0; +} + + + +/* + * Emulate a single COP1 arithmetic instruction. + */ +static int fpu_emu(struct pt_regs *xcp, struct mips_fpu_struct *ctx, + mips_instruction ir) +{ + int rfmt; /* resulting format */ + unsigned int rcsr = 0; /* resulting csr */ + unsigned int oldrm; + unsigned int cbit; + unsigned int cond; + union { + union ieee754dp d; + union ieee754sp s; + int w; + s64 l; + } rv; /* resulting value */ + u64 bits; + + MIPS_FPU_EMU_INC_STATS(cp1ops); + switch (rfmt = (MIPSInst_FFMT(ir) & 0xf)) { + case s_fmt: { /* 0 */ + union { + union ieee754sp(*b) (union ieee754sp, union ieee754sp); + union ieee754sp(*u) (union ieee754sp); + } handler; + union ieee754sp fd, fs, ft; + + switch (MIPSInst_FUNC(ir)) { + /* binary ops */ + case fadd_op: + MIPS_FPU_EMU_INC_STATS(add_s); + handler.b = ieee754sp_add; + goto scopbop; + case fsub_op: + MIPS_FPU_EMU_INC_STATS(sub_s); + handler.b = ieee754sp_sub; + goto scopbop; + case fmul_op: + MIPS_FPU_EMU_INC_STATS(mul_s); + handler.b = ieee754sp_mul; + goto scopbop; + case fdiv_op: + MIPS_FPU_EMU_INC_STATS(div_s); + handler.b = ieee754sp_div; + goto scopbop; + + /* unary ops */ + case fsqrt_op: + if (!cpu_has_mips_2_3_4_5_r) + return SIGILL; + + MIPS_FPU_EMU_INC_STATS(sqrt_s); + handler.u = ieee754sp_sqrt; + goto scopuop; + + /* + * Note that on some MIPS IV implementations such as the + * R5000 and R8000 the FSQRT and FRECIP instructions do not + * achieve full IEEE-754 accuracy - however this emulator does. + */ + case frsqrt_op: + if (!cpu_has_mips_4_5_64_r2_r6) + return SIGILL; + + MIPS_FPU_EMU_INC_STATS(rsqrt_s); + handler.u = fpemu_sp_rsqrt; + goto scopuop; + + case frecip_op: + if (!cpu_has_mips_4_5_64_r2_r6) + return SIGILL; + + MIPS_FPU_EMU_INC_STATS(recip_s); + handler.u = fpemu_sp_recip; + goto scopuop; + + case fmovc_op: + if (!cpu_has_mips_4_5_r) + return SIGILL; + + cond = fpucondbit[MIPSInst_FT(ir) >> 2]; + if (((ctx->fcr31 & cond) != 0) != + ((MIPSInst_FT(ir) & 1) != 0)) + return 0; + SPFROMREG(rv.s, MIPSInst_FS(ir)); + break; + + case fmovz_op: + if (!cpu_has_mips_4_5_r) + return SIGILL; + + if (xcp->regs[MIPSInst_FT(ir)] != 0) + return 0; + SPFROMREG(rv.s, MIPSInst_FS(ir)); + break; + + case fmovn_op: + if (!cpu_has_mips_4_5_r) + return SIGILL; + + if (xcp->regs[MIPSInst_FT(ir)] == 0) + return 0; + SPFROMREG(rv.s, MIPSInst_FS(ir)); + break; + + case fseleqz_op: + if (!cpu_has_mips_r6) + return SIGILL; + + MIPS_FPU_EMU_INC_STATS(seleqz_s); + SPFROMREG(rv.s, MIPSInst_FT(ir)); + if (rv.w & 0x1) + rv.w = 0; + else + SPFROMREG(rv.s, MIPSInst_FS(ir)); + break; + + case fselnez_op: + if (!cpu_has_mips_r6) + return SIGILL; + + MIPS_FPU_EMU_INC_STATS(selnez_s); + SPFROMREG(rv.s, MIPSInst_FT(ir)); + if (rv.w & 0x1) + SPFROMREG(rv.s, MIPSInst_FS(ir)); + else + rv.w = 0; + break; + + case fmaddf_op: { + union ieee754sp ft, fs, fd; + + if (!cpu_has_mips_r6) + return SIGILL; + + MIPS_FPU_EMU_INC_STATS(maddf_s); + SPFROMREG(ft, MIPSInst_FT(ir)); + SPFROMREG(fs, MIPSInst_FS(ir)); + SPFROMREG(fd, MIPSInst_FD(ir)); + rv.s = ieee754sp_maddf(fd, fs, ft); + goto copcsr; + } + + case fmsubf_op: { + union ieee754sp ft, fs, fd; + + if (!cpu_has_mips_r6) + return SIGILL; + + MIPS_FPU_EMU_INC_STATS(msubf_s); + SPFROMREG(ft, MIPSInst_FT(ir)); + SPFROMREG(fs, MIPSInst_FS(ir)); + SPFROMREG(fd, MIPSInst_FD(ir)); + rv.s = ieee754sp_msubf(fd, fs, ft); + goto copcsr; + } + + case frint_op: { + union ieee754sp fs; + + if (!cpu_has_mips_r6) + return SIGILL; + + MIPS_FPU_EMU_INC_STATS(rint_s); + SPFROMREG(fs, MIPSInst_FS(ir)); + rv.s = ieee754sp_rint(fs); + goto copcsr; + } + + case fclass_op: { + union ieee754sp fs; + + if (!cpu_has_mips_r6) + return SIGILL; + + MIPS_FPU_EMU_INC_STATS(class_s); + SPFROMREG(fs, MIPSInst_FS(ir)); + rv.w = ieee754sp_2008class(fs); + rfmt = w_fmt; + goto copcsr; + } + + case fmin_op: { + union ieee754sp fs, ft; + + if (!cpu_has_mips_r6) + return SIGILL; + + MIPS_FPU_EMU_INC_STATS(min_s); + SPFROMREG(ft, MIPSInst_FT(ir)); + SPFROMREG(fs, MIPSInst_FS(ir)); + rv.s = ieee754sp_fmin(fs, ft); + goto copcsr; + } + + case fmina_op: { + union ieee754sp fs, ft; + + if (!cpu_has_mips_r6) + return SIGILL; + + MIPS_FPU_EMU_INC_STATS(mina_s); + SPFROMREG(ft, MIPSInst_FT(ir)); + SPFROMREG(fs, MIPSInst_FS(ir)); + rv.s = ieee754sp_fmina(fs, ft); + goto copcsr; + } + + case fmax_op: { + union ieee754sp fs, ft; + + if (!cpu_has_mips_r6) + return SIGILL; + + MIPS_FPU_EMU_INC_STATS(max_s); + SPFROMREG(ft, MIPSInst_FT(ir)); + SPFROMREG(fs, MIPSInst_FS(ir)); + rv.s = ieee754sp_fmax(fs, ft); + goto copcsr; + } + + case fmaxa_op: { + union ieee754sp fs, ft; + + if (!cpu_has_mips_r6) + return SIGILL; + + MIPS_FPU_EMU_INC_STATS(maxa_s); + SPFROMREG(ft, MIPSInst_FT(ir)); + SPFROMREG(fs, MIPSInst_FS(ir)); + rv.s = ieee754sp_fmaxa(fs, ft); + goto copcsr; + } + + case fabs_op: + MIPS_FPU_EMU_INC_STATS(abs_s); + handler.u = ieee754sp_abs; + goto scopuop; + + case fneg_op: + MIPS_FPU_EMU_INC_STATS(neg_s); + handler.u = ieee754sp_neg; + goto scopuop; + + case fmov_op: + /* an easy one */ + MIPS_FPU_EMU_INC_STATS(mov_s); + SPFROMREG(rv.s, MIPSInst_FS(ir)); + goto copcsr; + + /* binary op on handler */ +scopbop: + SPFROMREG(fs, MIPSInst_FS(ir)); + SPFROMREG(ft, MIPSInst_FT(ir)); + + rv.s = (*handler.b) (fs, ft); + goto copcsr; +scopuop: + SPFROMREG(fs, MIPSInst_FS(ir)); + rv.s = (*handler.u) (fs); + goto copcsr; +copcsr: + if (ieee754_cxtest(IEEE754_INEXACT)) { + MIPS_FPU_EMU_INC_STATS(ieee754_inexact); + rcsr |= FPU_CSR_INE_X | FPU_CSR_INE_S; + } + if (ieee754_cxtest(IEEE754_UNDERFLOW)) { + MIPS_FPU_EMU_INC_STATS(ieee754_underflow); + rcsr |= FPU_CSR_UDF_X | FPU_CSR_UDF_S; + } + if (ieee754_cxtest(IEEE754_OVERFLOW)) { + MIPS_FPU_EMU_INC_STATS(ieee754_overflow); + rcsr |= FPU_CSR_OVF_X | FPU_CSR_OVF_S; + } + if (ieee754_cxtest(IEEE754_ZERO_DIVIDE)) { + MIPS_FPU_EMU_INC_STATS(ieee754_zerodiv); + rcsr |= FPU_CSR_DIV_X | FPU_CSR_DIV_S; + } + if (ieee754_cxtest(IEEE754_INVALID_OPERATION)) { + MIPS_FPU_EMU_INC_STATS(ieee754_invalidop); + rcsr |= FPU_CSR_INV_X | FPU_CSR_INV_S; + } + break; + + /* unary conv ops */ + case fcvts_op: + return SIGILL; /* not defined */ + + case fcvtd_op: + MIPS_FPU_EMU_INC_STATS(cvt_d_s); + SPFROMREG(fs, MIPSInst_FS(ir)); + rv.d = ieee754dp_fsp(fs); + rfmt = d_fmt; + goto copcsr; + + case fcvtw_op: + MIPS_FPU_EMU_INC_STATS(cvt_w_s); + SPFROMREG(fs, MIPSInst_FS(ir)); + rv.w = ieee754sp_tint(fs); + rfmt = w_fmt; + goto copcsr; + + case fround_op: + case ftrunc_op: + case fceil_op: + case ffloor_op: + if (!cpu_has_mips_2_3_4_5_r) + return SIGILL; + + if (MIPSInst_FUNC(ir) == fceil_op) + MIPS_FPU_EMU_INC_STATS(ceil_w_s); + if (MIPSInst_FUNC(ir) == ffloor_op) + MIPS_FPU_EMU_INC_STATS(floor_w_s); + if (MIPSInst_FUNC(ir) == fround_op) + MIPS_FPU_EMU_INC_STATS(round_w_s); + if (MIPSInst_FUNC(ir) == ftrunc_op) + MIPS_FPU_EMU_INC_STATS(trunc_w_s); + + oldrm = ieee754_csr.rm; + SPFROMREG(fs, MIPSInst_FS(ir)); + ieee754_csr.rm = MIPSInst_FUNC(ir); + rv.w = ieee754sp_tint(fs); + ieee754_csr.rm = oldrm; + rfmt = w_fmt; + goto copcsr; + + case fsel_op: + if (!cpu_has_mips_r6) + return SIGILL; + + MIPS_FPU_EMU_INC_STATS(sel_s); + SPFROMREG(fd, MIPSInst_FD(ir)); + if (fd.bits & 0x1) + SPFROMREG(rv.s, MIPSInst_FT(ir)); + else + SPFROMREG(rv.s, MIPSInst_FS(ir)); + break; + + case fcvtl_op: + if (!cpu_has_mips_3_4_5_64_r2_r6) + return SIGILL; + + MIPS_FPU_EMU_INC_STATS(cvt_l_s); + SPFROMREG(fs, MIPSInst_FS(ir)); + rv.l = ieee754sp_tlong(fs); + rfmt = l_fmt; + goto copcsr; + + case froundl_op: + case ftruncl_op: + case fceill_op: + case ffloorl_op: + if (!cpu_has_mips_3_4_5_64_r2_r6) + return SIGILL; + + if (MIPSInst_FUNC(ir) == fceill_op) + MIPS_FPU_EMU_INC_STATS(ceil_l_s); + if (MIPSInst_FUNC(ir) == ffloorl_op) + MIPS_FPU_EMU_INC_STATS(floor_l_s); + if (MIPSInst_FUNC(ir) == froundl_op) + MIPS_FPU_EMU_INC_STATS(round_l_s); + if (MIPSInst_FUNC(ir) == ftruncl_op) + MIPS_FPU_EMU_INC_STATS(trunc_l_s); + + oldrm = ieee754_csr.rm; + SPFROMREG(fs, MIPSInst_FS(ir)); + ieee754_csr.rm = MIPSInst_FUNC(ir); + rv.l = ieee754sp_tlong(fs); + ieee754_csr.rm = oldrm; + rfmt = l_fmt; + goto copcsr; + + default: + if (!NO_R6EMU && MIPSInst_FUNC(ir) >= fcmp_op) { + unsigned int cmpop; + union ieee754sp fs, ft; + + cmpop = MIPSInst_FUNC(ir) - fcmp_op; + SPFROMREG(fs, MIPSInst_FS(ir)); + SPFROMREG(ft, MIPSInst_FT(ir)); + rv.w = ieee754sp_cmp(fs, ft, + cmptab[cmpop & 0x7], cmpop & 0x8); + rfmt = -1; + if ((cmpop & 0x8) && ieee754_cxtest + (IEEE754_INVALID_OPERATION)) + rcsr = FPU_CSR_INV_X | FPU_CSR_INV_S; + else + goto copcsr; + + } else + return SIGILL; + break; + } + break; + } + + case d_fmt: { + union ieee754dp fd, fs, ft; + union { + union ieee754dp(*b) (union ieee754dp, union ieee754dp); + union ieee754dp(*u) (union ieee754dp); + } handler; + + switch (MIPSInst_FUNC(ir)) { + /* binary ops */ + case fadd_op: + MIPS_FPU_EMU_INC_STATS(add_d); + handler.b = ieee754dp_add; + goto dcopbop; + case fsub_op: + MIPS_FPU_EMU_INC_STATS(sub_d); + handler.b = ieee754dp_sub; + goto dcopbop; + case fmul_op: + MIPS_FPU_EMU_INC_STATS(mul_d); + handler.b = ieee754dp_mul; + goto dcopbop; + case fdiv_op: + MIPS_FPU_EMU_INC_STATS(div_d); + handler.b = ieee754dp_div; + goto dcopbop; + + /* unary ops */ + case fsqrt_op: + if (!cpu_has_mips_2_3_4_5_r) + return SIGILL; + + MIPS_FPU_EMU_INC_STATS(sqrt_d); + handler.u = ieee754dp_sqrt; + goto dcopuop; + /* + * Note that on some MIPS IV implementations such as the + * R5000 and R8000 the FSQRT and FRECIP instructions do not + * achieve full IEEE-754 accuracy - however this emulator does. + */ + case frsqrt_op: + if (!cpu_has_mips_4_5_64_r2_r6) + return SIGILL; + + MIPS_FPU_EMU_INC_STATS(rsqrt_d); + handler.u = fpemu_dp_rsqrt; + goto dcopuop; + case frecip_op: + if (!cpu_has_mips_4_5_64_r2_r6) + return SIGILL; + + MIPS_FPU_EMU_INC_STATS(recip_d); + handler.u = fpemu_dp_recip; + goto dcopuop; + case fmovc_op: + if (!cpu_has_mips_4_5_r) + return SIGILL; + + cond = fpucondbit[MIPSInst_FT(ir) >> 2]; + if (((ctx->fcr31 & cond) != 0) != + ((MIPSInst_FT(ir) & 1) != 0)) + return 0; + DPFROMREG(rv.d, MIPSInst_FS(ir)); + break; + case fmovz_op: + if (!cpu_has_mips_4_5_r) + return SIGILL; + + if (xcp->regs[MIPSInst_FT(ir)] != 0) + return 0; + DPFROMREG(rv.d, MIPSInst_FS(ir)); + break; + case fmovn_op: + if (!cpu_has_mips_4_5_r) + return SIGILL; + + if (xcp->regs[MIPSInst_FT(ir)] == 0) + return 0; + DPFROMREG(rv.d, MIPSInst_FS(ir)); + break; + + case fseleqz_op: + if (!cpu_has_mips_r6) + return SIGILL; + + MIPS_FPU_EMU_INC_STATS(seleqz_d); + DPFROMREG(rv.d, MIPSInst_FT(ir)); + if (rv.l & 0x1) + rv.l = 0; + else + DPFROMREG(rv.d, MIPSInst_FS(ir)); + break; + + case fselnez_op: + if (!cpu_has_mips_r6) + return SIGILL; + + MIPS_FPU_EMU_INC_STATS(selnez_d); + DPFROMREG(rv.d, MIPSInst_FT(ir)); + if (rv.l & 0x1) + DPFROMREG(rv.d, MIPSInst_FS(ir)); + else + rv.l = 0; + break; + + case fmaddf_op: { + union ieee754dp ft, fs, fd; + + if (!cpu_has_mips_r6) + return SIGILL; + + MIPS_FPU_EMU_INC_STATS(maddf_d); + DPFROMREG(ft, MIPSInst_FT(ir)); + DPFROMREG(fs, MIPSInst_FS(ir)); + DPFROMREG(fd, MIPSInst_FD(ir)); + rv.d = ieee754dp_maddf(fd, fs, ft); + goto copcsr; + } + + case fmsubf_op: { + union ieee754dp ft, fs, fd; + + if (!cpu_has_mips_r6) + return SIGILL; + + MIPS_FPU_EMU_INC_STATS(msubf_d); + DPFROMREG(ft, MIPSInst_FT(ir)); + DPFROMREG(fs, MIPSInst_FS(ir)); + DPFROMREG(fd, MIPSInst_FD(ir)); + rv.d = ieee754dp_msubf(fd, fs, ft); + goto copcsr; + } + + case frint_op: { + union ieee754dp fs; + + if (!cpu_has_mips_r6) + return SIGILL; + + MIPS_FPU_EMU_INC_STATS(rint_d); + DPFROMREG(fs, MIPSInst_FS(ir)); + rv.d = ieee754dp_rint(fs); + goto copcsr; + } + + case fclass_op: { + union ieee754dp fs; + + if (!cpu_has_mips_r6) + return SIGILL; + + MIPS_FPU_EMU_INC_STATS(class_d); + DPFROMREG(fs, MIPSInst_FS(ir)); + rv.l = ieee754dp_2008class(fs); + rfmt = l_fmt; + goto copcsr; + } + + case fmin_op: { + union ieee754dp fs, ft; + + if (!cpu_has_mips_r6) + return SIGILL; + + MIPS_FPU_EMU_INC_STATS(min_d); + DPFROMREG(ft, MIPSInst_FT(ir)); + DPFROMREG(fs, MIPSInst_FS(ir)); + rv.d = ieee754dp_fmin(fs, ft); + goto copcsr; + } + + case fmina_op: { + union ieee754dp fs, ft; + + if (!cpu_has_mips_r6) + return SIGILL; + + MIPS_FPU_EMU_INC_STATS(mina_d); + DPFROMREG(ft, MIPSInst_FT(ir)); + DPFROMREG(fs, MIPSInst_FS(ir)); + rv.d = ieee754dp_fmina(fs, ft); + goto copcsr; + } + + case fmax_op: { + union ieee754dp fs, ft; + + if (!cpu_has_mips_r6) + return SIGILL; + + MIPS_FPU_EMU_INC_STATS(max_d); + DPFROMREG(ft, MIPSInst_FT(ir)); + DPFROMREG(fs, MIPSInst_FS(ir)); + rv.d = ieee754dp_fmax(fs, ft); + goto copcsr; + } + + case fmaxa_op: { + union ieee754dp fs, ft; + + if (!cpu_has_mips_r6) + return SIGILL; + + MIPS_FPU_EMU_INC_STATS(maxa_d); + DPFROMREG(ft, MIPSInst_FT(ir)); + DPFROMREG(fs, MIPSInst_FS(ir)); + rv.d = ieee754dp_fmaxa(fs, ft); + goto copcsr; + } + + case fabs_op: + MIPS_FPU_EMU_INC_STATS(abs_d); + handler.u = ieee754dp_abs; + goto dcopuop; + + case fneg_op: + MIPS_FPU_EMU_INC_STATS(neg_d); + handler.u = ieee754dp_neg; + goto dcopuop; + + case fmov_op: + /* an easy one */ + MIPS_FPU_EMU_INC_STATS(mov_d); + DPFROMREG(rv.d, MIPSInst_FS(ir)); + goto copcsr; + + /* binary op on handler */ +dcopbop: + DPFROMREG(fs, MIPSInst_FS(ir)); + DPFROMREG(ft, MIPSInst_FT(ir)); + + rv.d = (*handler.b) (fs, ft); + goto copcsr; +dcopuop: + DPFROMREG(fs, MIPSInst_FS(ir)); + rv.d = (*handler.u) (fs); + goto copcsr; + + /* + * unary conv ops + */ + case fcvts_op: + MIPS_FPU_EMU_INC_STATS(cvt_s_d); + DPFROMREG(fs, MIPSInst_FS(ir)); + rv.s = ieee754sp_fdp(fs); + rfmt = s_fmt; + goto copcsr; + + case fcvtd_op: + return SIGILL; /* not defined */ + + case fcvtw_op: + MIPS_FPU_EMU_INC_STATS(cvt_w_d); + DPFROMREG(fs, MIPSInst_FS(ir)); + rv.w = ieee754dp_tint(fs); /* wrong */ + rfmt = w_fmt; + goto copcsr; + + case fround_op: + case ftrunc_op: + case fceil_op: + case ffloor_op: + if (!cpu_has_mips_2_3_4_5_r) + return SIGILL; + + if (MIPSInst_FUNC(ir) == fceil_op) + MIPS_FPU_EMU_INC_STATS(ceil_w_d); + if (MIPSInst_FUNC(ir) == ffloor_op) + MIPS_FPU_EMU_INC_STATS(floor_w_d); + if (MIPSInst_FUNC(ir) == fround_op) + MIPS_FPU_EMU_INC_STATS(round_w_d); + if (MIPSInst_FUNC(ir) == ftrunc_op) + MIPS_FPU_EMU_INC_STATS(trunc_w_d); + + oldrm = ieee754_csr.rm; + DPFROMREG(fs, MIPSInst_FS(ir)); + ieee754_csr.rm = MIPSInst_FUNC(ir); + rv.w = ieee754dp_tint(fs); + ieee754_csr.rm = oldrm; + rfmt = w_fmt; + goto copcsr; + + case fsel_op: + if (!cpu_has_mips_r6) + return SIGILL; + + MIPS_FPU_EMU_INC_STATS(sel_d); + DPFROMREG(fd, MIPSInst_FD(ir)); + if (fd.bits & 0x1) + DPFROMREG(rv.d, MIPSInst_FT(ir)); + else + DPFROMREG(rv.d, MIPSInst_FS(ir)); + break; + + case fcvtl_op: + if (!cpu_has_mips_3_4_5_64_r2_r6) + return SIGILL; + + MIPS_FPU_EMU_INC_STATS(cvt_l_d); + DPFROMREG(fs, MIPSInst_FS(ir)); + rv.l = ieee754dp_tlong(fs); + rfmt = l_fmt; + goto copcsr; + + case froundl_op: + case ftruncl_op: + case fceill_op: + case ffloorl_op: + if (!cpu_has_mips_3_4_5_64_r2_r6) + return SIGILL; + + if (MIPSInst_FUNC(ir) == fceill_op) + MIPS_FPU_EMU_INC_STATS(ceil_l_d); + if (MIPSInst_FUNC(ir) == ffloorl_op) + MIPS_FPU_EMU_INC_STATS(floor_l_d); + if (MIPSInst_FUNC(ir) == froundl_op) + MIPS_FPU_EMU_INC_STATS(round_l_d); + if (MIPSInst_FUNC(ir) == ftruncl_op) + MIPS_FPU_EMU_INC_STATS(trunc_l_d); + + oldrm = ieee754_csr.rm; + DPFROMREG(fs, MIPSInst_FS(ir)); + ieee754_csr.rm = MIPSInst_FUNC(ir); + rv.l = ieee754dp_tlong(fs); + ieee754_csr.rm = oldrm; + rfmt = l_fmt; + goto copcsr; + + default: + if (!NO_R6EMU && MIPSInst_FUNC(ir) >= fcmp_op) { + unsigned int cmpop; + union ieee754dp fs, ft; + + cmpop = MIPSInst_FUNC(ir) - fcmp_op; + DPFROMREG(fs, MIPSInst_FS(ir)); + DPFROMREG(ft, MIPSInst_FT(ir)); + rv.w = ieee754dp_cmp(fs, ft, + cmptab[cmpop & 0x7], cmpop & 0x8); + rfmt = -1; + if ((cmpop & 0x8) + && + ieee754_cxtest + (IEEE754_INVALID_OPERATION)) + rcsr = FPU_CSR_INV_X | FPU_CSR_INV_S; + else + goto copcsr; + + } + else { + return SIGILL; + } + break; + } + break; + } + + case w_fmt: { + union ieee754dp fs; + + switch (MIPSInst_FUNC(ir)) { + case fcvts_op: + /* convert word to single precision real */ + MIPS_FPU_EMU_INC_STATS(cvt_s_w); + SPFROMREG(fs, MIPSInst_FS(ir)); + rv.s = ieee754sp_fint(fs.bits); + rfmt = s_fmt; + goto copcsr; + case fcvtd_op: + /* convert word to double precision real */ + MIPS_FPU_EMU_INC_STATS(cvt_d_w); + SPFROMREG(fs, MIPSInst_FS(ir)); + rv.d = ieee754dp_fint(fs.bits); + rfmt = d_fmt; + goto copcsr; + default: { + /* Emulating the new CMP.condn.fmt R6 instruction */ +#define CMPOP_MASK 0x7 +#define SIGN_BIT (0x1 << 3) +#define PREDICATE_BIT (0x1 << 4) + + int cmpop = MIPSInst_FUNC(ir) & CMPOP_MASK; + int sig = MIPSInst_FUNC(ir) & SIGN_BIT; + union ieee754sp fs, ft; + + /* This is an R6 only instruction */ + if (!cpu_has_mips_r6 || + (MIPSInst_FUNC(ir) & 0x20)) + return SIGILL; + + if (!sig) { + if (!(MIPSInst_FUNC(ir) & PREDICATE_BIT)) { + switch (cmpop) { + case 0: + MIPS_FPU_EMU_INC_STATS(cmp_af_s); + break; + case 1: + MIPS_FPU_EMU_INC_STATS(cmp_un_s); + break; + case 2: + MIPS_FPU_EMU_INC_STATS(cmp_eq_s); + break; + case 3: + MIPS_FPU_EMU_INC_STATS(cmp_ueq_s); + break; + case 4: + MIPS_FPU_EMU_INC_STATS(cmp_lt_s); + break; + case 5: + MIPS_FPU_EMU_INC_STATS(cmp_ult_s); + break; + case 6: + MIPS_FPU_EMU_INC_STATS(cmp_le_s); + break; + case 7: + MIPS_FPU_EMU_INC_STATS(cmp_ule_s); + break; + } + } else { + switch (cmpop) { + case 1: + MIPS_FPU_EMU_INC_STATS(cmp_or_s); + break; + case 2: + MIPS_FPU_EMU_INC_STATS(cmp_une_s); + break; + case 3: + MIPS_FPU_EMU_INC_STATS(cmp_ne_s); + break; + } + } + } else { + if (!(MIPSInst_FUNC(ir) & PREDICATE_BIT)) { + switch (cmpop) { + case 0: + MIPS_FPU_EMU_INC_STATS(cmp_saf_s); + break; + case 1: + MIPS_FPU_EMU_INC_STATS(cmp_sun_s); + break; + case 2: + MIPS_FPU_EMU_INC_STATS(cmp_seq_s); + break; + case 3: + MIPS_FPU_EMU_INC_STATS(cmp_sueq_s); + break; + case 4: + MIPS_FPU_EMU_INC_STATS(cmp_slt_s); + break; + case 5: + MIPS_FPU_EMU_INC_STATS(cmp_sult_s); + break; + case 6: + MIPS_FPU_EMU_INC_STATS(cmp_sle_s); + break; + case 7: + MIPS_FPU_EMU_INC_STATS(cmp_sule_s); + break; + } + } else { + switch (cmpop) { + case 1: + MIPS_FPU_EMU_INC_STATS(cmp_sor_s); + break; + case 2: + MIPS_FPU_EMU_INC_STATS(cmp_sune_s); + break; + case 3: + MIPS_FPU_EMU_INC_STATS(cmp_sne_s); + break; + } + } + } + + /* fmt is w_fmt for single precision so fix it */ + rfmt = s_fmt; + /* default to false */ + rv.w = 0; + + /* CMP.condn.S */ + SPFROMREG(fs, MIPSInst_FS(ir)); + SPFROMREG(ft, MIPSInst_FT(ir)); + + /* positive predicates */ + if (!(MIPSInst_FUNC(ir) & PREDICATE_BIT)) { + if (ieee754sp_cmp(fs, ft, cmptab[cmpop], + sig)) + rv.w = -1; /* true, all 1s */ + if ((sig) && + ieee754_cxtest(IEEE754_INVALID_OPERATION)) + rcsr = FPU_CSR_INV_X | FPU_CSR_INV_S; + else + goto copcsr; + } else { + /* negative predicates */ + switch (cmpop) { + case 1: + case 2: + case 3: + if (ieee754sp_cmp(fs, ft, + negative_cmptab[cmpop], + sig)) + rv.w = -1; /* true, all 1s */ + if (sig && + ieee754_cxtest(IEEE754_INVALID_OPERATION)) + rcsr = FPU_CSR_INV_X | FPU_CSR_INV_S; + else + goto copcsr; + break; + default: + /* Reserved R6 ops */ + return SIGILL; + } + } + break; + } + } + break; + } + + case l_fmt: + + if (!cpu_has_mips_3_4_5_64_r2_r6) + return SIGILL; + + DIFROMREG(bits, MIPSInst_FS(ir)); + + switch (MIPSInst_FUNC(ir)) { + case fcvts_op: + /* convert long to single precision real */ + MIPS_FPU_EMU_INC_STATS(cvt_s_l); + rv.s = ieee754sp_flong(bits); + rfmt = s_fmt; + goto copcsr; + case fcvtd_op: + /* convert long to double precision real */ + MIPS_FPU_EMU_INC_STATS(cvt_d_l); + rv.d = ieee754dp_flong(bits); + rfmt = d_fmt; + goto copcsr; + default: { + /* Emulating the new CMP.condn.fmt R6 instruction */ + int cmpop = MIPSInst_FUNC(ir) & CMPOP_MASK; + int sig = MIPSInst_FUNC(ir) & SIGN_BIT; + union ieee754dp fs, ft; + + if (!cpu_has_mips_r6 || + (MIPSInst_FUNC(ir) & 0x20)) + return SIGILL; + + if (!sig) { + if (!(MIPSInst_FUNC(ir) & PREDICATE_BIT)) { + switch (cmpop) { + case 0: + MIPS_FPU_EMU_INC_STATS(cmp_af_d); + break; + case 1: + MIPS_FPU_EMU_INC_STATS(cmp_un_d); + break; + case 2: + MIPS_FPU_EMU_INC_STATS(cmp_eq_d); + break; + case 3: + MIPS_FPU_EMU_INC_STATS(cmp_ueq_d); + break; + case 4: + MIPS_FPU_EMU_INC_STATS(cmp_lt_d); + break; + case 5: + MIPS_FPU_EMU_INC_STATS(cmp_ult_d); + break; + case 6: + MIPS_FPU_EMU_INC_STATS(cmp_le_d); + break; + case 7: + MIPS_FPU_EMU_INC_STATS(cmp_ule_d); + break; + } + } else { + switch (cmpop) { + case 1: + MIPS_FPU_EMU_INC_STATS(cmp_or_d); + break; + case 2: + MIPS_FPU_EMU_INC_STATS(cmp_une_d); + break; + case 3: + MIPS_FPU_EMU_INC_STATS(cmp_ne_d); + break; + } + } + } else { + if (!(MIPSInst_FUNC(ir) & PREDICATE_BIT)) { + switch (cmpop) { + case 0: + MIPS_FPU_EMU_INC_STATS(cmp_saf_d); + break; + case 1: + MIPS_FPU_EMU_INC_STATS(cmp_sun_d); + break; + case 2: + MIPS_FPU_EMU_INC_STATS(cmp_seq_d); + break; + case 3: + MIPS_FPU_EMU_INC_STATS(cmp_sueq_d); + break; + case 4: + MIPS_FPU_EMU_INC_STATS(cmp_slt_d); + break; + case 5: + MIPS_FPU_EMU_INC_STATS(cmp_sult_d); + break; + case 6: + MIPS_FPU_EMU_INC_STATS(cmp_sle_d); + break; + case 7: + MIPS_FPU_EMU_INC_STATS(cmp_sule_d); + break; + } + } else { + switch (cmpop) { + case 1: + MIPS_FPU_EMU_INC_STATS(cmp_sor_d); + break; + case 2: + MIPS_FPU_EMU_INC_STATS(cmp_sune_d); + break; + case 3: + MIPS_FPU_EMU_INC_STATS(cmp_sne_d); + break; + } + } + } + + /* fmt is l_fmt for double precision so fix it */ + rfmt = d_fmt; + /* default to false */ + rv.l = 0; + + /* CMP.condn.D */ + DPFROMREG(fs, MIPSInst_FS(ir)); + DPFROMREG(ft, MIPSInst_FT(ir)); + + /* positive predicates */ + if (!(MIPSInst_FUNC(ir) & PREDICATE_BIT)) { + if (ieee754dp_cmp(fs, ft, + cmptab[cmpop], sig)) + rv.l = -1LL; /* true, all 1s */ + if (sig && + ieee754_cxtest(IEEE754_INVALID_OPERATION)) + rcsr = FPU_CSR_INV_X | FPU_CSR_INV_S; + else + goto copcsr; + } else { + /* negative predicates */ + switch (cmpop) { + case 1: + case 2: + case 3: + if (ieee754dp_cmp(fs, ft, + negative_cmptab[cmpop], + sig)) + rv.l = -1LL; /* true, all 1s */ + if (sig && + ieee754_cxtest(IEEE754_INVALID_OPERATION)) + rcsr = FPU_CSR_INV_X | FPU_CSR_INV_S; + else + goto copcsr; + break; + default: + /* Reserved R6 ops */ + return SIGILL; + } + } + break; + } + } + break; + + default: + return SIGILL; + } + + /* + * Update the fpu CSR register for this operation. + * If an exception is required, generate a tidy SIGFPE exception, + * without updating the result register. + * Note: cause exception bits do not accumulate, they are rewritten + * for each op; only the flag/sticky bits accumulate. + */ + ctx->fcr31 = (ctx->fcr31 & ~FPU_CSR_ALL_X) | rcsr; + if ((ctx->fcr31 >> 5) & ctx->fcr31 & FPU_CSR_ALL_E) { + /*printk ("SIGFPE: FPU csr = %08x\n",ctx->fcr31); */ + return SIGFPE; + } + + /* + * Now we can safely write the result back to the register file. + */ + switch (rfmt) { + case -1: + + if (cpu_has_mips_4_5_r) + cbit = fpucondbit[MIPSInst_FD(ir) >> 2]; + else + cbit = FPU_CSR_COND; + if (rv.w) + ctx->fcr31 |= cbit; + else + ctx->fcr31 &= ~cbit; + break; + + case d_fmt: + DPTOREG(rv.d, MIPSInst_FD(ir)); + break; + case s_fmt: + SPTOREG(rv.s, MIPSInst_FD(ir)); + break; + case w_fmt: + SITOREG(rv.w, MIPSInst_FD(ir)); + break; + case l_fmt: + if (!cpu_has_mips_3_4_5_64_r2_r6) + return SIGILL; + + DITOREG(rv.l, MIPSInst_FD(ir)); + break; + default: + return SIGILL; + } + + return 0; +} + +/* + * Emulate FPU instructions. + * + * If we use FPU hardware, then we have been typically called to handle + * an unimplemented operation, such as where an operand is a NaN or + * denormalized. In that case exit the emulation loop after a single + * iteration so as to let hardware execute any subsequent instructions. + * + * If we have no FPU hardware or it has been disabled, then continue + * emulating floating-point instructions until one of these conditions + * has occurred: + * + * - a non-FPU instruction has been encountered, + * + * - an attempt to emulate has ended with a signal, + * + * - the ISA mode has been switched. + * + * We need to terminate the emulation loop if we got switched to the + * MIPS16 mode, whether supported or not, so that we do not attempt + * to emulate a MIPS16 instruction as a regular MIPS FPU instruction. + * Similarly if we got switched to the microMIPS mode and only the + * regular MIPS mode is supported, so that we do not attempt to emulate + * a microMIPS instruction as a regular MIPS FPU instruction. Or if + * we got switched to the regular MIPS mode and only the microMIPS mode + * is supported, so that we do not attempt to emulate a regular MIPS + * instruction that should cause an Address Error exception instead. + * For simplicity we always terminate upon an ISA mode switch. + */ +int fpu_emulator_cop1Handler(struct pt_regs *xcp, struct mips_fpu_struct *ctx, + int has_fpu, void __user **fault_addr) +{ + unsigned long oldepc, prevepc; + struct mm_decoded_insn dec_insn; + u16 instr[4]; + u16 *instr_ptr; + int sig = 0; + + /* + * Initialize context if it hasn't been used already, otherwise ensure + * it has been saved to struct thread_struct. + */ + if (!init_fp_ctx(current)) + lose_fpu(1); + + oldepc = xcp->cp0_epc; + do { + prevepc = xcp->cp0_epc; + + if (get_isa16_mode(prevepc) && cpu_has_mmips) { + /* + * Get next 2 microMIPS instructions and convert them + * into 32-bit instructions. + */ + if ((get_user(instr[0], (u16 __user *)msk_isa16_mode(xcp->cp0_epc))) || + (get_user(instr[1], (u16 __user *)msk_isa16_mode(xcp->cp0_epc + 2))) || + (get_user(instr[2], (u16 __user *)msk_isa16_mode(xcp->cp0_epc + 4))) || + (get_user(instr[3], (u16 __user *)msk_isa16_mode(xcp->cp0_epc + 6)))) { + MIPS_FPU_EMU_INC_STATS(errors); + return SIGBUS; + } + instr_ptr = instr; + + /* Get first instruction. */ + if (mm_insn_16bit(*instr_ptr)) { + /* Duplicate the half-word. */ + dec_insn.insn = (*instr_ptr << 16) | + (*instr_ptr); + /* 16-bit instruction. */ + dec_insn.pc_inc = 2; + instr_ptr += 1; + } else { + dec_insn.insn = (*instr_ptr << 16) | + *(instr_ptr+1); + /* 32-bit instruction. */ + dec_insn.pc_inc = 4; + instr_ptr += 2; + } + /* Get second instruction. */ + if (mm_insn_16bit(*instr_ptr)) { + /* Duplicate the half-word. */ + dec_insn.next_insn = (*instr_ptr << 16) | + (*instr_ptr); + /* 16-bit instruction. */ + dec_insn.next_pc_inc = 2; + } else { + dec_insn.next_insn = (*instr_ptr << 16) | + *(instr_ptr+1); + /* 32-bit instruction. */ + dec_insn.next_pc_inc = 4; + } + dec_insn.micro_mips_mode = 1; + } else { + if ((get_user(dec_insn.insn, + (mips_instruction __user *) xcp->cp0_epc)) || + (get_user(dec_insn.next_insn, + (mips_instruction __user *)(xcp->cp0_epc+4)))) { + MIPS_FPU_EMU_INC_STATS(errors); + return SIGBUS; + } + dec_insn.pc_inc = 4; + dec_insn.next_pc_inc = 4; + dec_insn.micro_mips_mode = 0; + } + + if ((dec_insn.insn == 0) || + ((dec_insn.pc_inc == 2) && + ((dec_insn.insn & 0xffff) == MM_NOP16))) + xcp->cp0_epc += dec_insn.pc_inc; /* Skip NOPs */ + else { + /* + * The 'ieee754_csr' is an alias of ctx->fcr31. + * No need to copy ctx->fcr31 to ieee754_csr. + */ + sig = cop1Emulate(xcp, ctx, dec_insn, fault_addr); + } + + if (has_fpu) + break; + if (sig) + break; + /* + * We have to check for the ISA bit explicitly here, + * because `get_isa16_mode' may return 0 if support + * for code compression has been globally disabled, + * or otherwise we may produce the wrong signal or + * even proceed successfully where we must not. + */ + if ((xcp->cp0_epc ^ prevepc) & 0x1) + break; + + cond_resched(); + } while (xcp->cp0_epc > prevepc); + + /* SIGILL indicates a non-fpu instruction */ + if (sig == SIGILL && xcp->cp0_epc != oldepc) + /* but if EPC has advanced, then ignore it */ + sig = 0; + + return sig; +} diff --git a/arch/mips/math-emu/dp_2008class.c b/arch/mips/math-emu/dp_2008class.c new file mode 100644 index 000000000..81a0a63b1 --- /dev/null +++ b/arch/mips/math-emu/dp_2008class.c @@ -0,0 +1,52 @@ +// SPDX-License-Identifier: GPL-2.0-only +/* + * IEEE754 floating point arithmetic + * double precision: CLASS.f + * FPR[fd] = class(FPR[fs]) + * + * MIPS floating point support + * Copyright (C) 2015 Imagination Technologies, Ltd. + * Author: Markos Chandras <markos.chandras@imgtec.com> + */ + +#include "ieee754dp.h" + +int ieee754dp_2008class(union ieee754dp x) +{ + COMPXDP; + + EXPLODEXDP; + + /* + * 10 bit mask as follows: + * + * bit0 = SNAN + * bit1 = QNAN + * bit2 = -INF + * bit3 = -NORM + * bit4 = -DNORM + * bit5 = -ZERO + * bit6 = INF + * bit7 = NORM + * bit8 = DNORM + * bit9 = ZERO + */ + + switch(xc) { + case IEEE754_CLASS_SNAN: + return 0x01; + case IEEE754_CLASS_QNAN: + return 0x02; + case IEEE754_CLASS_INF: + return 0x04 << (xs ? 0 : 4); + case IEEE754_CLASS_NORM: + return 0x08 << (xs ? 0 : 4); + case IEEE754_CLASS_DNORM: + return 0x10 << (xs ? 0 : 4); + case IEEE754_CLASS_ZERO: + return 0x20 << (xs ? 0 : 4); + default: + pr_err("Unknown class: %d\n", xc); + return 0; + } +} diff --git a/arch/mips/math-emu/dp_add.c b/arch/mips/math-emu/dp_add.c new file mode 100644 index 000000000..78504736b --- /dev/null +++ b/arch/mips/math-emu/dp_add.c @@ -0,0 +1,165 @@ +// SPDX-License-Identifier: GPL-2.0-only +/* IEEE754 floating point arithmetic + * double precision: common utilities + */ +/* + * MIPS floating point support + * Copyright (C) 1994-2000 Algorithmics Ltd. + */ + +#include "ieee754dp.h" + +union ieee754dp ieee754dp_add(union ieee754dp x, union ieee754dp y) +{ + int s; + + COMPXDP; + COMPYDP; + + EXPLODEXDP; + EXPLODEYDP; + + ieee754_clearcx(); + + FLUSHXDP; + FLUSHYDP; + + switch (CLPAIR(xc, yc)) { + case CLPAIR(IEEE754_CLASS_QNAN, IEEE754_CLASS_SNAN): + case CLPAIR(IEEE754_CLASS_ZERO, IEEE754_CLASS_SNAN): + case CLPAIR(IEEE754_CLASS_NORM, IEEE754_CLASS_SNAN): + case CLPAIR(IEEE754_CLASS_DNORM, IEEE754_CLASS_SNAN): + case CLPAIR(IEEE754_CLASS_INF, IEEE754_CLASS_SNAN): + return ieee754dp_nanxcpt(y); + + case CLPAIR(IEEE754_CLASS_SNAN, IEEE754_CLASS_SNAN): + case CLPAIR(IEEE754_CLASS_SNAN, IEEE754_CLASS_QNAN): + case CLPAIR(IEEE754_CLASS_SNAN, IEEE754_CLASS_ZERO): + case CLPAIR(IEEE754_CLASS_SNAN, IEEE754_CLASS_NORM): + case CLPAIR(IEEE754_CLASS_SNAN, IEEE754_CLASS_DNORM): + case CLPAIR(IEEE754_CLASS_SNAN, IEEE754_CLASS_INF): + return ieee754dp_nanxcpt(x); + + case CLPAIR(IEEE754_CLASS_ZERO, IEEE754_CLASS_QNAN): + case CLPAIR(IEEE754_CLASS_NORM, IEEE754_CLASS_QNAN): + case CLPAIR(IEEE754_CLASS_DNORM, IEEE754_CLASS_QNAN): + case CLPAIR(IEEE754_CLASS_INF, IEEE754_CLASS_QNAN): + return y; + + case CLPAIR(IEEE754_CLASS_QNAN, IEEE754_CLASS_QNAN): + case CLPAIR(IEEE754_CLASS_QNAN, IEEE754_CLASS_ZERO): + case CLPAIR(IEEE754_CLASS_QNAN, IEEE754_CLASS_NORM): + case CLPAIR(IEEE754_CLASS_QNAN, IEEE754_CLASS_DNORM): + case CLPAIR(IEEE754_CLASS_QNAN, IEEE754_CLASS_INF): + return x; + + + /* + * Infinity handling + */ + case CLPAIR(IEEE754_CLASS_INF, IEEE754_CLASS_INF): + if (xs == ys) + return x; + ieee754_setcx(IEEE754_INVALID_OPERATION); + return ieee754dp_indef(); + + case CLPAIR(IEEE754_CLASS_NORM, IEEE754_CLASS_INF): + case CLPAIR(IEEE754_CLASS_ZERO, IEEE754_CLASS_INF): + case CLPAIR(IEEE754_CLASS_DNORM, IEEE754_CLASS_INF): + return y; + + case CLPAIR(IEEE754_CLASS_INF, IEEE754_CLASS_ZERO): + case CLPAIR(IEEE754_CLASS_INF, IEEE754_CLASS_NORM): + case CLPAIR(IEEE754_CLASS_INF, IEEE754_CLASS_DNORM): + return x; + + /* + * Zero handling + */ + case CLPAIR(IEEE754_CLASS_ZERO, IEEE754_CLASS_ZERO): + if (xs == ys) + return x; + else + return ieee754dp_zero(ieee754_csr.rm == FPU_CSR_RD); + + case CLPAIR(IEEE754_CLASS_NORM, IEEE754_CLASS_ZERO): + case CLPAIR(IEEE754_CLASS_DNORM, IEEE754_CLASS_ZERO): + return x; + + case CLPAIR(IEEE754_CLASS_ZERO, IEEE754_CLASS_NORM): + case CLPAIR(IEEE754_CLASS_ZERO, IEEE754_CLASS_DNORM): + return y; + + case CLPAIR(IEEE754_CLASS_DNORM, IEEE754_CLASS_DNORM): + DPDNORMX; + fallthrough; + case CLPAIR(IEEE754_CLASS_NORM, IEEE754_CLASS_DNORM): + DPDNORMY; + break; + + case CLPAIR(IEEE754_CLASS_DNORM, IEEE754_CLASS_NORM): + DPDNORMX; + break; + + case CLPAIR(IEEE754_CLASS_NORM, IEEE754_CLASS_NORM): + break; + } + assert(xm & DP_HIDDEN_BIT); + assert(ym & DP_HIDDEN_BIT); + + /* + * Provide guard,round and stick bit space. + */ + xm <<= 3; + ym <<= 3; + + if (xe > ye) { + /* + * Have to shift y fraction right to align. + */ + s = xe - ye; + ym = XDPSRS(ym, s); + ye += s; + } else if (ye > xe) { + /* + * Have to shift x fraction right to align. + */ + s = ye - xe; + xm = XDPSRS(xm, s); + xe += s; + } + assert(xe == ye); + assert(xe <= DP_EMAX); + + if (xs == ys) { + /* + * Generate 28 bit result of adding two 27 bit numbers + * leaving result in xm, xs and xe. + */ + xm = xm + ym; + + if (xm >> (DP_FBITS + 1 + 3)) { /* carry out */ + xm = XDPSRS1(xm); + xe++; + } + } else { + if (xm >= ym) { + xm = xm - ym; + } else { + xm = ym - xm; + xs = ys; + } + if (xm == 0) + return ieee754dp_zero(ieee754_csr.rm == FPU_CSR_RD); + + /* + * Normalize to rounding precision. + */ + while ((xm >> (DP_FBITS + 3)) == 0) { + xm <<= 1; + xe--; + } + } + + return ieee754dp_format(xs, xe, xm); +} diff --git a/arch/mips/math-emu/dp_cmp.c b/arch/mips/math-emu/dp_cmp.c new file mode 100644 index 000000000..a59680b03 --- /dev/null +++ b/arch/mips/math-emu/dp_cmp.c @@ -0,0 +1,47 @@ +// SPDX-License-Identifier: GPL-2.0-only +/* IEEE754 floating point arithmetic + * double precision: common utilities + */ +/* + * MIPS floating point support + * Copyright (C) 1994-2000 Algorithmics Ltd. + */ + +#include "ieee754dp.h" + +int ieee754dp_cmp(union ieee754dp x, union ieee754dp y, int cmp, int sig) +{ + s64 vx; + s64 vy; + + COMPXDP; + COMPYDP; + + EXPLODEXDP; + EXPLODEYDP; + FLUSHXDP; + FLUSHYDP; + ieee754_clearcx(); /* Even clear inexact flag here */ + + if (ieee754_class_nan(xc) || ieee754_class_nan(yc)) { + if (sig || + xc == IEEE754_CLASS_SNAN || yc == IEEE754_CLASS_SNAN) + ieee754_setcx(IEEE754_INVALID_OPERATION); + return (cmp & IEEE754_CUN) != 0; + } else { + vx = x.bits; + vy = y.bits; + + if (vx < 0) + vx = -vx ^ DP_SIGN_BIT; + if (vy < 0) + vy = -vy ^ DP_SIGN_BIT; + + if (vx < vy) + return (cmp & IEEE754_CLT) != 0; + else if (vx == vy) + return (cmp & IEEE754_CEQ) != 0; + else + return (cmp & IEEE754_CGT) != 0; + } +} diff --git a/arch/mips/math-emu/dp_div.c b/arch/mips/math-emu/dp_div.c new file mode 100644 index 000000000..ac1ecc462 --- /dev/null +++ b/arch/mips/math-emu/dp_div.c @@ -0,0 +1,143 @@ +// SPDX-License-Identifier: GPL-2.0-only +/* IEEE754 floating point arithmetic + * double precision: common utilities + */ +/* + * MIPS floating point support + * Copyright (C) 1994-2000 Algorithmics Ltd. + */ + +#include "ieee754dp.h" + +union ieee754dp ieee754dp_div(union ieee754dp x, union ieee754dp y) +{ + u64 rm; + int re; + u64 bm; + + COMPXDP; + COMPYDP; + + EXPLODEXDP; + EXPLODEYDP; + + ieee754_clearcx(); + + FLUSHXDP; + FLUSHYDP; + + switch (CLPAIR(xc, yc)) { + case CLPAIR(IEEE754_CLASS_QNAN, IEEE754_CLASS_SNAN): + case CLPAIR(IEEE754_CLASS_ZERO, IEEE754_CLASS_SNAN): + case CLPAIR(IEEE754_CLASS_NORM, IEEE754_CLASS_SNAN): + case CLPAIR(IEEE754_CLASS_DNORM, IEEE754_CLASS_SNAN): + case CLPAIR(IEEE754_CLASS_INF, IEEE754_CLASS_SNAN): + return ieee754dp_nanxcpt(y); + + case CLPAIR(IEEE754_CLASS_SNAN, IEEE754_CLASS_SNAN): + case CLPAIR(IEEE754_CLASS_SNAN, IEEE754_CLASS_QNAN): + case CLPAIR(IEEE754_CLASS_SNAN, IEEE754_CLASS_ZERO): + case CLPAIR(IEEE754_CLASS_SNAN, IEEE754_CLASS_NORM): + case CLPAIR(IEEE754_CLASS_SNAN, IEEE754_CLASS_DNORM): + case CLPAIR(IEEE754_CLASS_SNAN, IEEE754_CLASS_INF): + return ieee754dp_nanxcpt(x); + + case CLPAIR(IEEE754_CLASS_ZERO, IEEE754_CLASS_QNAN): + case CLPAIR(IEEE754_CLASS_NORM, IEEE754_CLASS_QNAN): + case CLPAIR(IEEE754_CLASS_DNORM, IEEE754_CLASS_QNAN): + case CLPAIR(IEEE754_CLASS_INF, IEEE754_CLASS_QNAN): + return y; + + case CLPAIR(IEEE754_CLASS_QNAN, IEEE754_CLASS_QNAN): + case CLPAIR(IEEE754_CLASS_QNAN, IEEE754_CLASS_ZERO): + case CLPAIR(IEEE754_CLASS_QNAN, IEEE754_CLASS_NORM): + case CLPAIR(IEEE754_CLASS_QNAN, IEEE754_CLASS_DNORM): + case CLPAIR(IEEE754_CLASS_QNAN, IEEE754_CLASS_INF): + return x; + + + /* + * Infinity handling + */ + case CLPAIR(IEEE754_CLASS_INF, IEEE754_CLASS_INF): + ieee754_setcx(IEEE754_INVALID_OPERATION); + return ieee754dp_indef(); + + case CLPAIR(IEEE754_CLASS_NORM, IEEE754_CLASS_INF): + case CLPAIR(IEEE754_CLASS_ZERO, IEEE754_CLASS_INF): + case CLPAIR(IEEE754_CLASS_DNORM, IEEE754_CLASS_INF): + return ieee754dp_zero(xs ^ ys); + + case CLPAIR(IEEE754_CLASS_INF, IEEE754_CLASS_ZERO): + case CLPAIR(IEEE754_CLASS_INF, IEEE754_CLASS_NORM): + case CLPAIR(IEEE754_CLASS_INF, IEEE754_CLASS_DNORM): + return ieee754dp_inf(xs ^ ys); + + /* + * Zero handling + */ + case CLPAIR(IEEE754_CLASS_ZERO, IEEE754_CLASS_ZERO): + ieee754_setcx(IEEE754_INVALID_OPERATION); + return ieee754dp_indef(); + + case CLPAIR(IEEE754_CLASS_NORM, IEEE754_CLASS_ZERO): + case CLPAIR(IEEE754_CLASS_DNORM, IEEE754_CLASS_ZERO): + ieee754_setcx(IEEE754_ZERO_DIVIDE); + return ieee754dp_inf(xs ^ ys); + + case CLPAIR(IEEE754_CLASS_ZERO, IEEE754_CLASS_NORM): + case CLPAIR(IEEE754_CLASS_ZERO, IEEE754_CLASS_DNORM): + return ieee754dp_zero(xs == ys ? 0 : 1); + + case CLPAIR(IEEE754_CLASS_DNORM, IEEE754_CLASS_DNORM): + DPDNORMX; + fallthrough; + case CLPAIR(IEEE754_CLASS_NORM, IEEE754_CLASS_DNORM): + DPDNORMY; + break; + + case CLPAIR(IEEE754_CLASS_DNORM, IEEE754_CLASS_NORM): + DPDNORMX; + break; + + case CLPAIR(IEEE754_CLASS_NORM, IEEE754_CLASS_NORM): + break; + } + assert(xm & DP_HIDDEN_BIT); + assert(ym & DP_HIDDEN_BIT); + + /* provide rounding space */ + xm <<= 3; + ym <<= 3; + + /* now the dirty work */ + + rm = 0; + re = xe - ye; + + for (bm = DP_MBIT(DP_FBITS + 2); bm; bm >>= 1) { + if (xm >= ym) { + xm -= ym; + rm |= bm; + if (xm == 0) + break; + } + xm <<= 1; + } + + rm <<= 1; + if (xm) + rm |= 1; /* have remainder, set sticky */ + + assert(rm); + + /* + * Normalise rm to rounding precision ? + */ + while ((rm >> (DP_FBITS + 3)) == 0) { + rm <<= 1; + re--; + } + + return ieee754dp_format(xs == ys ? 0 : 1, re, rm); +} diff --git a/arch/mips/math-emu/dp_fint.c b/arch/mips/math-emu/dp_fint.c new file mode 100644 index 000000000..996b15ba0 --- /dev/null +++ b/arch/mips/math-emu/dp_fint.c @@ -0,0 +1,44 @@ +// SPDX-License-Identifier: GPL-2.0-only +/* IEEE754 floating point arithmetic + * double precision: common utilities + */ +/* + * MIPS floating point support + * Copyright (C) 1994-2000 Algorithmics Ltd. + */ + +#include "ieee754dp.h" + +union ieee754dp ieee754dp_fint(int x) +{ + u64 xm; + int xe; + int xs; + + ieee754_clearcx(); + + if (x == 0) + return ieee754dp_zero(0); + if (x == 1 || x == -1) + return ieee754dp_one(x < 0); + if (x == 10 || x == -10) + return ieee754dp_ten(x < 0); + + xs = (x < 0); + if (xs) { + if (x == (1 << 31)) + xm = ((unsigned) 1 << 31); /* max neg can't be safely negated */ + else + xm = -x; + } else { + xm = x; + } + + /* normalize - result can never be inexact or overflow */ + xe = DP_FBITS; + while ((xm >> DP_FBITS) == 0) { + xm <<= 1; + xe--; + } + return builddp(xs, xe + DP_EBIAS, xm & ~DP_HIDDEN_BIT); +} diff --git a/arch/mips/math-emu/dp_flong.c b/arch/mips/math-emu/dp_flong.c new file mode 100644 index 000000000..681ee00c9 --- /dev/null +++ b/arch/mips/math-emu/dp_flong.c @@ -0,0 +1,53 @@ +// SPDX-License-Identifier: GPL-2.0-only +/* IEEE754 floating point arithmetic + * double precision: common utilities + */ +/* + * MIPS floating point support + * Copyright (C) 1994-2000 Algorithmics Ltd. + */ + +#include "ieee754dp.h" + +union ieee754dp ieee754dp_flong(s64 x) +{ + u64 xm; + int xe; + int xs; + + ieee754_clearcx(); + + if (x == 0) + return ieee754dp_zero(0); + if (x == 1 || x == -1) + return ieee754dp_one(x < 0); + if (x == 10 || x == -10) + return ieee754dp_ten(x < 0); + + xs = (x < 0); + if (xs) { + if (x == (1ULL << 63)) + xm = (1ULL << 63); /* max neg can't be safely negated */ + else + xm = -x; + } else { + xm = x; + } + + /* normalize */ + xe = DP_FBITS + 3; + if (xm >> (DP_FBITS + 1 + 3)) { + /* shunt out overflow bits */ + while (xm >> (DP_FBITS + 1 + 3)) { + XDPSRSX1(); + } + } else { + /* normalize in grs extended double precision */ + while ((xm >> (DP_FBITS + 3)) == 0) { + xm <<= 1; + xe--; + } + } + + return ieee754dp_format(xs, xe, xm); +} diff --git a/arch/mips/math-emu/dp_fmax.c b/arch/mips/math-emu/dp_fmax.c new file mode 100644 index 000000000..126ec90bb --- /dev/null +++ b/arch/mips/math-emu/dp_fmax.c @@ -0,0 +1,252 @@ +// SPDX-License-Identifier: GPL-2.0-only +/* + * IEEE754 floating point arithmetic + * double precision: MIN{,A}.f + * MIN : Scalar Floating-Point Minimum + * MINA: Scalar Floating-Point argument with Minimum Absolute Value + * + * MIN.D : FPR[fd] = minNum(FPR[fs],FPR[ft]) + * MINA.D: FPR[fd] = maxNumMag(FPR[fs],FPR[ft]) + * + * MIPS floating point support + * Copyright (C) 2015 Imagination Technologies, Ltd. + * Author: Markos Chandras <markos.chandras@imgtec.com> + */ + +#include "ieee754dp.h" + +union ieee754dp ieee754dp_fmax(union ieee754dp x, union ieee754dp y) +{ + COMPXDP; + COMPYDP; + + EXPLODEXDP; + EXPLODEYDP; + + FLUSHXDP; + FLUSHYDP; + + ieee754_clearcx(); + + switch (CLPAIR(xc, yc)) { + case CLPAIR(IEEE754_CLASS_QNAN, IEEE754_CLASS_SNAN): + case CLPAIR(IEEE754_CLASS_ZERO, IEEE754_CLASS_SNAN): + case CLPAIR(IEEE754_CLASS_NORM, IEEE754_CLASS_SNAN): + case CLPAIR(IEEE754_CLASS_DNORM, IEEE754_CLASS_SNAN): + case CLPAIR(IEEE754_CLASS_INF, IEEE754_CLASS_SNAN): + return ieee754dp_nanxcpt(y); + + case CLPAIR(IEEE754_CLASS_SNAN, IEEE754_CLASS_SNAN): + case CLPAIR(IEEE754_CLASS_SNAN, IEEE754_CLASS_QNAN): + case CLPAIR(IEEE754_CLASS_SNAN, IEEE754_CLASS_ZERO): + case CLPAIR(IEEE754_CLASS_SNAN, IEEE754_CLASS_NORM): + case CLPAIR(IEEE754_CLASS_SNAN, IEEE754_CLASS_DNORM): + case CLPAIR(IEEE754_CLASS_SNAN, IEEE754_CLASS_INF): + return ieee754dp_nanxcpt(x); + + /* + * Quiet NaN handling + */ + + /* + * The case of both inputs quiet NaNs + */ + case CLPAIR(IEEE754_CLASS_QNAN, IEEE754_CLASS_QNAN): + return x; + + /* + * The cases of exactly one input quiet NaN (numbers + * are here preferred as returned values to NaNs) + */ + case CLPAIR(IEEE754_CLASS_ZERO, IEEE754_CLASS_QNAN): + case CLPAIR(IEEE754_CLASS_NORM, IEEE754_CLASS_QNAN): + case CLPAIR(IEEE754_CLASS_DNORM, IEEE754_CLASS_QNAN): + case CLPAIR(IEEE754_CLASS_INF, IEEE754_CLASS_QNAN): + return x; + + case CLPAIR(IEEE754_CLASS_QNAN, IEEE754_CLASS_ZERO): + case CLPAIR(IEEE754_CLASS_QNAN, IEEE754_CLASS_NORM): + case CLPAIR(IEEE754_CLASS_QNAN, IEEE754_CLASS_DNORM): + case CLPAIR(IEEE754_CLASS_QNAN, IEEE754_CLASS_INF): + return y; + + /* + * Infinity and zero handling + */ + case CLPAIR(IEEE754_CLASS_INF, IEEE754_CLASS_ZERO): + case CLPAIR(IEEE754_CLASS_INF, IEEE754_CLASS_NORM): + case CLPAIR(IEEE754_CLASS_INF, IEEE754_CLASS_DNORM): + case CLPAIR(IEEE754_CLASS_NORM, IEEE754_CLASS_ZERO): + case CLPAIR(IEEE754_CLASS_DNORM, IEEE754_CLASS_ZERO): + return xs ? y : x; + + case CLPAIR(IEEE754_CLASS_INF, IEEE754_CLASS_INF): + case CLPAIR(IEEE754_CLASS_NORM, IEEE754_CLASS_INF): + case CLPAIR(IEEE754_CLASS_DNORM, IEEE754_CLASS_INF): + case CLPAIR(IEEE754_CLASS_ZERO, IEEE754_CLASS_INF): + case CLPAIR(IEEE754_CLASS_ZERO, IEEE754_CLASS_NORM): + case CLPAIR(IEEE754_CLASS_ZERO, IEEE754_CLASS_DNORM): + return ys ? x : y; + + case CLPAIR(IEEE754_CLASS_ZERO, IEEE754_CLASS_ZERO): + return ieee754dp_zero(xs & ys); + + case CLPAIR(IEEE754_CLASS_DNORM, IEEE754_CLASS_DNORM): + DPDNORMX; + fallthrough; + case CLPAIR(IEEE754_CLASS_NORM, IEEE754_CLASS_DNORM): + DPDNORMY; + break; + + case CLPAIR(IEEE754_CLASS_DNORM, IEEE754_CLASS_NORM): + DPDNORMX; + } + + /* Finally get to do some computation */ + + assert(xm & DP_HIDDEN_BIT); + assert(ym & DP_HIDDEN_BIT); + + /* Compare signs */ + if (xs > ys) + return y; + else if (xs < ys) + return x; + + /* Signs of inputs are equal, let's compare exponents */ + if (xs == 0) { + /* Inputs are both positive */ + if (xe > ye) + return x; + else if (xe < ye) + return y; + } else { + /* Inputs are both negative */ + if (xe > ye) + return y; + else if (xe < ye) + return x; + } + + /* Signs and exponents of inputs are equal, let's compare mantissas */ + if (xs == 0) { + /* Inputs are both positive, with equal signs and exponents */ + if (xm <= ym) + return y; + return x; + } + /* Inputs are both negative, with equal signs and exponents */ + if (xm <= ym) + return x; + return y; +} + +union ieee754dp ieee754dp_fmaxa(union ieee754dp x, union ieee754dp y) +{ + COMPXDP; + COMPYDP; + + EXPLODEXDP; + EXPLODEYDP; + + FLUSHXDP; + FLUSHYDP; + + ieee754_clearcx(); + + switch (CLPAIR(xc, yc)) { + case CLPAIR(IEEE754_CLASS_QNAN, IEEE754_CLASS_SNAN): + case CLPAIR(IEEE754_CLASS_ZERO, IEEE754_CLASS_SNAN): + case CLPAIR(IEEE754_CLASS_NORM, IEEE754_CLASS_SNAN): + case CLPAIR(IEEE754_CLASS_DNORM, IEEE754_CLASS_SNAN): + case CLPAIR(IEEE754_CLASS_INF, IEEE754_CLASS_SNAN): + return ieee754dp_nanxcpt(y); + + case CLPAIR(IEEE754_CLASS_SNAN, IEEE754_CLASS_SNAN): + case CLPAIR(IEEE754_CLASS_SNAN, IEEE754_CLASS_QNAN): + case CLPAIR(IEEE754_CLASS_SNAN, IEEE754_CLASS_ZERO): + case CLPAIR(IEEE754_CLASS_SNAN, IEEE754_CLASS_NORM): + case CLPAIR(IEEE754_CLASS_SNAN, IEEE754_CLASS_DNORM): + case CLPAIR(IEEE754_CLASS_SNAN, IEEE754_CLASS_INF): + return ieee754dp_nanxcpt(x); + + /* + * Quiet NaN handling + */ + + /* + * The case of both inputs quiet NaNs + */ + case CLPAIR(IEEE754_CLASS_QNAN, IEEE754_CLASS_QNAN): + return x; + + /* + * The cases of exactly one input quiet NaN (numbers + * are here preferred as returned values to NaNs) + */ + case CLPAIR(IEEE754_CLASS_ZERO, IEEE754_CLASS_QNAN): + case CLPAIR(IEEE754_CLASS_NORM, IEEE754_CLASS_QNAN): + case CLPAIR(IEEE754_CLASS_DNORM, IEEE754_CLASS_QNAN): + case CLPAIR(IEEE754_CLASS_INF, IEEE754_CLASS_QNAN): + return x; + + case CLPAIR(IEEE754_CLASS_QNAN, IEEE754_CLASS_ZERO): + case CLPAIR(IEEE754_CLASS_QNAN, IEEE754_CLASS_NORM): + case CLPAIR(IEEE754_CLASS_QNAN, IEEE754_CLASS_DNORM): + case CLPAIR(IEEE754_CLASS_QNAN, IEEE754_CLASS_INF): + return y; + + /* + * Infinity and zero handling + */ + case CLPAIR(IEEE754_CLASS_INF, IEEE754_CLASS_INF): + return ieee754dp_inf(xs & ys); + + case CLPAIR(IEEE754_CLASS_INF, IEEE754_CLASS_ZERO): + case CLPAIR(IEEE754_CLASS_INF, IEEE754_CLASS_NORM): + case CLPAIR(IEEE754_CLASS_INF, IEEE754_CLASS_DNORM): + case CLPAIR(IEEE754_CLASS_NORM, IEEE754_CLASS_ZERO): + case CLPAIR(IEEE754_CLASS_DNORM, IEEE754_CLASS_ZERO): + return x; + + case CLPAIR(IEEE754_CLASS_NORM, IEEE754_CLASS_INF): + case CLPAIR(IEEE754_CLASS_DNORM, IEEE754_CLASS_INF): + case CLPAIR(IEEE754_CLASS_ZERO, IEEE754_CLASS_INF): + case CLPAIR(IEEE754_CLASS_ZERO, IEEE754_CLASS_NORM): + case CLPAIR(IEEE754_CLASS_ZERO, IEEE754_CLASS_DNORM): + return y; + + case CLPAIR(IEEE754_CLASS_ZERO, IEEE754_CLASS_ZERO): + return ieee754dp_zero(xs & ys); + + case CLPAIR(IEEE754_CLASS_DNORM, IEEE754_CLASS_DNORM): + DPDNORMX; + fallthrough; + case CLPAIR(IEEE754_CLASS_NORM, IEEE754_CLASS_DNORM): + DPDNORMY; + break; + + case CLPAIR(IEEE754_CLASS_DNORM, IEEE754_CLASS_NORM): + DPDNORMX; + } + + /* Finally get to do some computation */ + + assert(xm & DP_HIDDEN_BIT); + assert(ym & DP_HIDDEN_BIT); + + /* Compare exponent */ + if (xe > ye) + return x; + else if (xe < ye) + return y; + + /* Compare mantissa */ + if (xm < ym) + return y; + else if (xm > ym) + return x; + else if (xs == 0) + return x; + return y; +} diff --git a/arch/mips/math-emu/dp_fmin.c b/arch/mips/math-emu/dp_fmin.c new file mode 100644 index 000000000..35ded4c45 --- /dev/null +++ b/arch/mips/math-emu/dp_fmin.c @@ -0,0 +1,252 @@ +// SPDX-License-Identifier: GPL-2.0-only +/* + * IEEE754 floating point arithmetic + * double precision: MIN{,A}.f + * MIN : Scalar Floating-Point Minimum + * MINA: Scalar Floating-Point argument with Minimum Absolute Value + * + * MIN.D : FPR[fd] = minNum(FPR[fs],FPR[ft]) + * MINA.D: FPR[fd] = maxNumMag(FPR[fs],FPR[ft]) + * + * MIPS floating point support + * Copyright (C) 2015 Imagination Technologies, Ltd. + * Author: Markos Chandras <markos.chandras@imgtec.com> + */ + +#include "ieee754dp.h" + +union ieee754dp ieee754dp_fmin(union ieee754dp x, union ieee754dp y) +{ + COMPXDP; + COMPYDP; + + EXPLODEXDP; + EXPLODEYDP; + + FLUSHXDP; + FLUSHYDP; + + ieee754_clearcx(); + + switch (CLPAIR(xc, yc)) { + case CLPAIR(IEEE754_CLASS_QNAN, IEEE754_CLASS_SNAN): + case CLPAIR(IEEE754_CLASS_ZERO, IEEE754_CLASS_SNAN): + case CLPAIR(IEEE754_CLASS_NORM, IEEE754_CLASS_SNAN): + case CLPAIR(IEEE754_CLASS_DNORM, IEEE754_CLASS_SNAN): + case CLPAIR(IEEE754_CLASS_INF, IEEE754_CLASS_SNAN): + return ieee754dp_nanxcpt(y); + + case CLPAIR(IEEE754_CLASS_SNAN, IEEE754_CLASS_SNAN): + case CLPAIR(IEEE754_CLASS_SNAN, IEEE754_CLASS_QNAN): + case CLPAIR(IEEE754_CLASS_SNAN, IEEE754_CLASS_ZERO): + case CLPAIR(IEEE754_CLASS_SNAN, IEEE754_CLASS_NORM): + case CLPAIR(IEEE754_CLASS_SNAN, IEEE754_CLASS_DNORM): + case CLPAIR(IEEE754_CLASS_SNAN, IEEE754_CLASS_INF): + return ieee754dp_nanxcpt(x); + + /* + * Quiet NaN handling + */ + + /* + * The case of both inputs quiet NaNs + */ + case CLPAIR(IEEE754_CLASS_QNAN, IEEE754_CLASS_QNAN): + return x; + + /* + * The cases of exactly one input quiet NaN (numbers + * are here preferred as returned values to NaNs) + */ + case CLPAIR(IEEE754_CLASS_ZERO, IEEE754_CLASS_QNAN): + case CLPAIR(IEEE754_CLASS_NORM, IEEE754_CLASS_QNAN): + case CLPAIR(IEEE754_CLASS_DNORM, IEEE754_CLASS_QNAN): + case CLPAIR(IEEE754_CLASS_INF, IEEE754_CLASS_QNAN): + return x; + + case CLPAIR(IEEE754_CLASS_QNAN, IEEE754_CLASS_ZERO): + case CLPAIR(IEEE754_CLASS_QNAN, IEEE754_CLASS_NORM): + case CLPAIR(IEEE754_CLASS_QNAN, IEEE754_CLASS_DNORM): + case CLPAIR(IEEE754_CLASS_QNAN, IEEE754_CLASS_INF): + return y; + + /* + * Infinity and zero handling + */ + case CLPAIR(IEEE754_CLASS_INF, IEEE754_CLASS_ZERO): + case CLPAIR(IEEE754_CLASS_INF, IEEE754_CLASS_NORM): + case CLPAIR(IEEE754_CLASS_INF, IEEE754_CLASS_DNORM): + case CLPAIR(IEEE754_CLASS_NORM, IEEE754_CLASS_ZERO): + case CLPAIR(IEEE754_CLASS_DNORM, IEEE754_CLASS_ZERO): + return xs ? x : y; + + case CLPAIR(IEEE754_CLASS_INF, IEEE754_CLASS_INF): + case CLPAIR(IEEE754_CLASS_NORM, IEEE754_CLASS_INF): + case CLPAIR(IEEE754_CLASS_DNORM, IEEE754_CLASS_INF): + case CLPAIR(IEEE754_CLASS_ZERO, IEEE754_CLASS_INF): + case CLPAIR(IEEE754_CLASS_ZERO, IEEE754_CLASS_NORM): + case CLPAIR(IEEE754_CLASS_ZERO, IEEE754_CLASS_DNORM): + return ys ? y : x; + + case CLPAIR(IEEE754_CLASS_ZERO, IEEE754_CLASS_ZERO): + return ieee754dp_zero(xs | ys); + + case CLPAIR(IEEE754_CLASS_DNORM, IEEE754_CLASS_DNORM): + DPDNORMX; + fallthrough; + case CLPAIR(IEEE754_CLASS_NORM, IEEE754_CLASS_DNORM): + DPDNORMY; + break; + + case CLPAIR(IEEE754_CLASS_DNORM, IEEE754_CLASS_NORM): + DPDNORMX; + } + + /* Finally get to do some computation */ + + assert(xm & DP_HIDDEN_BIT); + assert(ym & DP_HIDDEN_BIT); + + /* Compare signs */ + if (xs > ys) + return x; + else if (xs < ys) + return y; + + /* Signs of inputs are the same, let's compare exponents */ + if (xs == 0) { + /* Inputs are both positive */ + if (xe > ye) + return y; + else if (xe < ye) + return x; + } else { + /* Inputs are both negative */ + if (xe > ye) + return x; + else if (xe < ye) + return y; + } + + /* Signs and exponents of inputs are equal, let's compare mantissas */ + if (xs == 0) { + /* Inputs are both positive, with equal signs and exponents */ + if (xm <= ym) + return x; + return y; + } + /* Inputs are both negative, with equal signs and exponents */ + if (xm <= ym) + return y; + return x; +} + +union ieee754dp ieee754dp_fmina(union ieee754dp x, union ieee754dp y) +{ + COMPXDP; + COMPYDP; + + EXPLODEXDP; + EXPLODEYDP; + + FLUSHXDP; + FLUSHYDP; + + ieee754_clearcx(); + + switch (CLPAIR(xc, yc)) { + case CLPAIR(IEEE754_CLASS_QNAN, IEEE754_CLASS_SNAN): + case CLPAIR(IEEE754_CLASS_ZERO, IEEE754_CLASS_SNAN): + case CLPAIR(IEEE754_CLASS_NORM, IEEE754_CLASS_SNAN): + case CLPAIR(IEEE754_CLASS_DNORM, IEEE754_CLASS_SNAN): + case CLPAIR(IEEE754_CLASS_INF, IEEE754_CLASS_SNAN): + return ieee754dp_nanxcpt(y); + + case CLPAIR(IEEE754_CLASS_SNAN, IEEE754_CLASS_SNAN): + case CLPAIR(IEEE754_CLASS_SNAN, IEEE754_CLASS_QNAN): + case CLPAIR(IEEE754_CLASS_SNAN, IEEE754_CLASS_ZERO): + case CLPAIR(IEEE754_CLASS_SNAN, IEEE754_CLASS_NORM): + case CLPAIR(IEEE754_CLASS_SNAN, IEEE754_CLASS_DNORM): + case CLPAIR(IEEE754_CLASS_SNAN, IEEE754_CLASS_INF): + return ieee754dp_nanxcpt(x); + + /* + * Quiet NaN handling + */ + + /* + * The case of both inputs quiet NaNs + */ + case CLPAIR(IEEE754_CLASS_QNAN, IEEE754_CLASS_QNAN): + return x; + + /* + * The cases of exactly one input quiet NaN (numbers + * are here preferred as returned values to NaNs) + */ + case CLPAIR(IEEE754_CLASS_ZERO, IEEE754_CLASS_QNAN): + case CLPAIR(IEEE754_CLASS_NORM, IEEE754_CLASS_QNAN): + case CLPAIR(IEEE754_CLASS_DNORM, IEEE754_CLASS_QNAN): + case CLPAIR(IEEE754_CLASS_INF, IEEE754_CLASS_QNAN): + return x; + + case CLPAIR(IEEE754_CLASS_QNAN, IEEE754_CLASS_ZERO): + case CLPAIR(IEEE754_CLASS_QNAN, IEEE754_CLASS_NORM): + case CLPAIR(IEEE754_CLASS_QNAN, IEEE754_CLASS_DNORM): + case CLPAIR(IEEE754_CLASS_QNAN, IEEE754_CLASS_INF): + return y; + + /* + * Infinity and zero handling + */ + case CLPAIR(IEEE754_CLASS_INF, IEEE754_CLASS_INF): + return ieee754dp_inf(xs | ys); + + case CLPAIR(IEEE754_CLASS_INF, IEEE754_CLASS_ZERO): + case CLPAIR(IEEE754_CLASS_INF, IEEE754_CLASS_NORM): + case CLPAIR(IEEE754_CLASS_INF, IEEE754_CLASS_DNORM): + case CLPAIR(IEEE754_CLASS_NORM, IEEE754_CLASS_ZERO): + case CLPAIR(IEEE754_CLASS_DNORM, IEEE754_CLASS_ZERO): + return y; + + case CLPAIR(IEEE754_CLASS_NORM, IEEE754_CLASS_INF): + case CLPAIR(IEEE754_CLASS_DNORM, IEEE754_CLASS_INF): + case CLPAIR(IEEE754_CLASS_ZERO, IEEE754_CLASS_INF): + case CLPAIR(IEEE754_CLASS_ZERO, IEEE754_CLASS_NORM): + case CLPAIR(IEEE754_CLASS_ZERO, IEEE754_CLASS_DNORM): + return x; + + case CLPAIR(IEEE754_CLASS_ZERO, IEEE754_CLASS_ZERO): + return ieee754dp_zero(xs | ys); + + case CLPAIR(IEEE754_CLASS_DNORM, IEEE754_CLASS_DNORM): + DPDNORMX; + fallthrough; + case CLPAIR(IEEE754_CLASS_NORM, IEEE754_CLASS_DNORM): + DPDNORMY; + break; + + case CLPAIR(IEEE754_CLASS_DNORM, IEEE754_CLASS_NORM): + DPDNORMX; + } + + /* Finally get to do some computation */ + + assert(xm & DP_HIDDEN_BIT); + assert(ym & DP_HIDDEN_BIT); + + /* Compare exponent */ + if (xe > ye) + return y; + else if (xe < ye) + return x; + + /* Compare mantissa */ + if (xm < ym) + return x; + else if (xm > ym) + return y; + else if (xs == 1) + return x; + return y; +} diff --git a/arch/mips/math-emu/dp_fsp.c b/arch/mips/math-emu/dp_fsp.c new file mode 100644 index 000000000..be8a929c4 --- /dev/null +++ b/arch/mips/math-emu/dp_fsp.c @@ -0,0 +1,63 @@ +// SPDX-License-Identifier: GPL-2.0-only +/* IEEE754 floating point arithmetic + * double precision: common utilities + */ +/* + * MIPS floating point support + * Copyright (C) 1994-2000 Algorithmics Ltd. + */ + +#include "ieee754sp.h" +#include "ieee754dp.h" + +static inline union ieee754dp ieee754dp_nan_fsp(int xs, u64 xm) +{ + return builddp(xs, DP_EMAX + 1 + DP_EBIAS, + xm << (DP_FBITS - SP_FBITS)); +} + +union ieee754dp ieee754dp_fsp(union ieee754sp x) +{ + COMPXSP; + + EXPLODEXSP; + + ieee754_clearcx(); + + FLUSHXSP; + + switch (xc) { + case IEEE754_CLASS_SNAN: + return ieee754dp_nanxcpt(ieee754dp_nan_fsp(xs, xm)); + + case IEEE754_CLASS_QNAN: + return ieee754dp_nan_fsp(xs, xm); + + case IEEE754_CLASS_INF: + return ieee754dp_inf(xs); + + case IEEE754_CLASS_ZERO: + return ieee754dp_zero(xs); + + case IEEE754_CLASS_DNORM: + /* normalize */ + while ((xm >> SP_FBITS) == 0) { + xm <<= 1; + xe--; + } + break; + + case IEEE754_CLASS_NORM: + break; + } + + /* + * Can't possibly overflow,underflow, or need rounding + */ + + /* drop the hidden bit */ + xm &= ~SP_HIDDEN_BIT; + + return builddp(xs, xe + DP_EBIAS, + (u64) xm << (DP_FBITS - SP_FBITS)); +} diff --git a/arch/mips/math-emu/dp_maddf.c b/arch/mips/math-emu/dp_maddf.c new file mode 100644 index 000000000..931e66f68 --- /dev/null +++ b/arch/mips/math-emu/dp_maddf.c @@ -0,0 +1,358 @@ +// SPDX-License-Identifier: GPL-2.0-only +/* + * IEEE754 floating point arithmetic + * double precision: MADDF.f (Fused Multiply Add) + * MADDF.fmt: FPR[fd] = FPR[fd] + (FPR[fs] x FPR[ft]) + * + * MIPS floating point support + * Copyright (C) 2015 Imagination Technologies, Ltd. + * Author: Markos Chandras <markos.chandras@imgtec.com> + */ + +#include "ieee754dp.h" + + +/* 128 bits shift right logical with rounding. */ +static void srl128(u64 *hptr, u64 *lptr, int count) +{ + u64 low; + + if (count >= 128) { + *lptr = *hptr != 0 || *lptr != 0; + *hptr = 0; + } else if (count >= 64) { + if (count == 64) { + *lptr = *hptr | (*lptr != 0); + } else { + low = *lptr; + *lptr = *hptr >> (count - 64); + *lptr |= (*hptr << (128 - count)) != 0 || low != 0; + } + *hptr = 0; + } else { + low = *lptr; + *lptr = low >> count | *hptr << (64 - count); + *lptr |= (low << (64 - count)) != 0; + *hptr = *hptr >> count; + } +} + +static union ieee754dp _dp_maddf(union ieee754dp z, union ieee754dp x, + union ieee754dp y, enum maddf_flags flags) +{ + int re; + int rs; + unsigned int lxm; + unsigned int hxm; + unsigned int lym; + unsigned int hym; + u64 lrm; + u64 hrm; + u64 lzm; + u64 hzm; + u64 t; + u64 at; + int s; + + COMPXDP; + COMPYDP; + COMPZDP; + + EXPLODEXDP; + EXPLODEYDP; + EXPLODEZDP; + + FLUSHXDP; + FLUSHYDP; + FLUSHZDP; + + ieee754_clearcx(); + + rs = xs ^ ys; + if (flags & MADDF_NEGATE_PRODUCT) + rs ^= 1; + if (flags & MADDF_NEGATE_ADDITION) + zs ^= 1; + + /* + * Handle the cases when at least one of x, y or z is a NaN. + * Order of precedence is sNaN, qNaN and z, x, y. + */ + if (zc == IEEE754_CLASS_SNAN) + return ieee754dp_nanxcpt(z); + if (xc == IEEE754_CLASS_SNAN) + return ieee754dp_nanxcpt(x); + if (yc == IEEE754_CLASS_SNAN) + return ieee754dp_nanxcpt(y); + if (zc == IEEE754_CLASS_QNAN) + return z; + if (xc == IEEE754_CLASS_QNAN) + return x; + if (yc == IEEE754_CLASS_QNAN) + return y; + + if (zc == IEEE754_CLASS_DNORM) + DPDNORMZ; + /* ZERO z cases are handled separately below */ + + switch (CLPAIR(xc, yc)) { + + /* + * Infinity handling + */ + case CLPAIR(IEEE754_CLASS_INF, IEEE754_CLASS_ZERO): + case CLPAIR(IEEE754_CLASS_ZERO, IEEE754_CLASS_INF): + ieee754_setcx(IEEE754_INVALID_OPERATION); + return ieee754dp_indef(); + + case CLPAIR(IEEE754_CLASS_NORM, IEEE754_CLASS_INF): + case CLPAIR(IEEE754_CLASS_DNORM, IEEE754_CLASS_INF): + case CLPAIR(IEEE754_CLASS_INF, IEEE754_CLASS_NORM): + case CLPAIR(IEEE754_CLASS_INF, IEEE754_CLASS_DNORM): + case CLPAIR(IEEE754_CLASS_INF, IEEE754_CLASS_INF): + if ((zc == IEEE754_CLASS_INF) && (zs != rs)) { + /* + * Cases of addition of infinities with opposite signs + * or subtraction of infinities with same signs. + */ + ieee754_setcx(IEEE754_INVALID_OPERATION); + return ieee754dp_indef(); + } + /* + * z is here either not an infinity, or an infinity having the + * same sign as product (x*y). The result must be an infinity, + * and its sign is determined only by the sign of product (x*y). + */ + return ieee754dp_inf(rs); + + case CLPAIR(IEEE754_CLASS_ZERO, IEEE754_CLASS_ZERO): + case CLPAIR(IEEE754_CLASS_ZERO, IEEE754_CLASS_NORM): + case CLPAIR(IEEE754_CLASS_ZERO, IEEE754_CLASS_DNORM): + case CLPAIR(IEEE754_CLASS_NORM, IEEE754_CLASS_ZERO): + case CLPAIR(IEEE754_CLASS_DNORM, IEEE754_CLASS_ZERO): + if (zc == IEEE754_CLASS_INF) + return ieee754dp_inf(zs); + if (zc == IEEE754_CLASS_ZERO) { + /* Handle cases +0 + (-0) and similar ones. */ + if (zs == rs) + /* + * Cases of addition of zeros of equal signs + * or subtraction of zeroes of opposite signs. + * The sign of the resulting zero is in any + * such case determined only by the sign of z. + */ + return z; + + return ieee754dp_zero(ieee754_csr.rm == FPU_CSR_RD); + } + /* x*y is here 0, and z is not 0, so just return z */ + return z; + + case CLPAIR(IEEE754_CLASS_DNORM, IEEE754_CLASS_DNORM): + DPDNORMX; + fallthrough; + case CLPAIR(IEEE754_CLASS_NORM, IEEE754_CLASS_DNORM): + if (zc == IEEE754_CLASS_INF) + return ieee754dp_inf(zs); + DPDNORMY; + break; + + case CLPAIR(IEEE754_CLASS_DNORM, IEEE754_CLASS_NORM): + if (zc == IEEE754_CLASS_INF) + return ieee754dp_inf(zs); + DPDNORMX; + break; + + case CLPAIR(IEEE754_CLASS_NORM, IEEE754_CLASS_NORM): + if (zc == IEEE754_CLASS_INF) + return ieee754dp_inf(zs); + /* continue to real computations */ + } + + /* Finally get to do some computation */ + + /* + * Do the multiplication bit first + * + * rm = xm * ym, re = xe + ye basically + * + * At this point xm and ym should have been normalized. + */ + assert(xm & DP_HIDDEN_BIT); + assert(ym & DP_HIDDEN_BIT); + + re = xe + ye; + + /* shunt to top of word */ + xm <<= 64 - (DP_FBITS + 1); + ym <<= 64 - (DP_FBITS + 1); + + /* + * Multiply 64 bits xm and ym to give 128 bits result in hrm:lrm. + */ + + lxm = xm; + hxm = xm >> 32; + lym = ym; + hym = ym >> 32; + + lrm = DPXMULT(lxm, lym); + hrm = DPXMULT(hxm, hym); + + t = DPXMULT(lxm, hym); + + at = lrm + (t << 32); + hrm += at < lrm; + lrm = at; + + hrm = hrm + (t >> 32); + + t = DPXMULT(hxm, lym); + + at = lrm + (t << 32); + hrm += at < lrm; + lrm = at; + + hrm = hrm + (t >> 32); + + /* Put explicit bit at bit 126 if necessary */ + if ((int64_t)hrm < 0) { + lrm = (hrm << 63) | (lrm >> 1); + hrm = hrm >> 1; + re++; + } + + assert(hrm & (1 << 62)); + + if (zc == IEEE754_CLASS_ZERO) { + /* + * Move explicit bit from bit 126 to bit 55 since the + * ieee754dp_format code expects the mantissa to be + * 56 bits wide (53 + 3 rounding bits). + */ + srl128(&hrm, &lrm, (126 - 55)); + return ieee754dp_format(rs, re, lrm); + } + + /* Move explicit bit from bit 52 to bit 126 */ + lzm = 0; + hzm = zm << 10; + assert(hzm & (1 << 62)); + + /* Make the exponents the same */ + if (ze > re) { + /* + * Have to shift y fraction right to align. + */ + s = ze - re; + srl128(&hrm, &lrm, s); + re += s; + } else if (re > ze) { + /* + * Have to shift x fraction right to align. + */ + s = re - ze; + srl128(&hzm, &lzm, s); + ze += s; + } + assert(ze == re); + assert(ze <= DP_EMAX); + + /* Do the addition */ + if (zs == rs) { + /* + * Generate 128 bit result by adding two 127 bit numbers + * leaving result in hzm:lzm, zs and ze. + */ + hzm = hzm + hrm + (lzm > (lzm + lrm)); + lzm = lzm + lrm; + if ((int64_t)hzm < 0) { /* carry out */ + srl128(&hzm, &lzm, 1); + ze++; + } + } else { + if (hzm > hrm || (hzm == hrm && lzm >= lrm)) { + hzm = hzm - hrm - (lzm < lrm); + lzm = lzm - lrm; + } else { + hzm = hrm - hzm - (lrm < lzm); + lzm = lrm - lzm; + zs = rs; + } + if (lzm == 0 && hzm == 0) + return ieee754dp_zero(ieee754_csr.rm == FPU_CSR_RD); + + /* + * Put explicit bit at bit 126 if necessary. + */ + if (hzm == 0) { + /* left shift by 63 or 64 bits */ + if ((int64_t)lzm < 0) { + /* MSB of lzm is the explicit bit */ + hzm = lzm >> 1; + lzm = lzm << 63; + ze -= 63; + } else { + hzm = lzm; + lzm = 0; + ze -= 64; + } + } + + t = 0; + while ((hzm >> (62 - t)) == 0) + t++; + + assert(t <= 62); + if (t) { + hzm = hzm << t | lzm >> (64 - t); + lzm = lzm << t; + ze -= t; + } + } + + /* + * Move explicit bit from bit 126 to bit 55 since the + * ieee754dp_format code expects the mantissa to be + * 56 bits wide (53 + 3 rounding bits). + */ + srl128(&hzm, &lzm, (126 - 55)); + + return ieee754dp_format(zs, ze, lzm); +} + +union ieee754dp ieee754dp_maddf(union ieee754dp z, union ieee754dp x, + union ieee754dp y) +{ + return _dp_maddf(z, x, y, 0); +} + +union ieee754dp ieee754dp_msubf(union ieee754dp z, union ieee754dp x, + union ieee754dp y) +{ + return _dp_maddf(z, x, y, MADDF_NEGATE_PRODUCT); +} + +union ieee754dp ieee754dp_madd(union ieee754dp z, union ieee754dp x, + union ieee754dp y) +{ + return _dp_maddf(z, x, y, 0); +} + +union ieee754dp ieee754dp_msub(union ieee754dp z, union ieee754dp x, + union ieee754dp y) +{ + return _dp_maddf(z, x, y, MADDF_NEGATE_ADDITION); +} + +union ieee754dp ieee754dp_nmadd(union ieee754dp z, union ieee754dp x, + union ieee754dp y) +{ + return _dp_maddf(z, x, y, MADDF_NEGATE_PRODUCT|MADDF_NEGATE_ADDITION); +} + +union ieee754dp ieee754dp_nmsub(union ieee754dp z, union ieee754dp x, + union ieee754dp y) +{ + return _dp_maddf(z, x, y, MADDF_NEGATE_PRODUCT); +} diff --git a/arch/mips/math-emu/dp_mul.c b/arch/mips/math-emu/dp_mul.c new file mode 100644 index 000000000..8a671bb7a --- /dev/null +++ b/arch/mips/math-emu/dp_mul.c @@ -0,0 +1,159 @@ +// SPDX-License-Identifier: GPL-2.0-only +/* IEEE754 floating point arithmetic + * double precision: common utilities + */ +/* + * MIPS floating point support + * Copyright (C) 1994-2000 Algorithmics Ltd. + */ + +#include "ieee754dp.h" + +union ieee754dp ieee754dp_mul(union ieee754dp x, union ieee754dp y) +{ + int re; + int rs; + u64 rm; + unsigned int lxm; + unsigned int hxm; + unsigned int lym; + unsigned int hym; + u64 lrm; + u64 hrm; + u64 t; + u64 at; + + COMPXDP; + COMPYDP; + + EXPLODEXDP; + EXPLODEYDP; + + ieee754_clearcx(); + + FLUSHXDP; + FLUSHYDP; + + switch (CLPAIR(xc, yc)) { + case CLPAIR(IEEE754_CLASS_QNAN, IEEE754_CLASS_SNAN): + case CLPAIR(IEEE754_CLASS_ZERO, IEEE754_CLASS_SNAN): + case CLPAIR(IEEE754_CLASS_NORM, IEEE754_CLASS_SNAN): + case CLPAIR(IEEE754_CLASS_DNORM, IEEE754_CLASS_SNAN): + case CLPAIR(IEEE754_CLASS_INF, IEEE754_CLASS_SNAN): + return ieee754dp_nanxcpt(y); + + case CLPAIR(IEEE754_CLASS_SNAN, IEEE754_CLASS_SNAN): + case CLPAIR(IEEE754_CLASS_SNAN, IEEE754_CLASS_QNAN): + case CLPAIR(IEEE754_CLASS_SNAN, IEEE754_CLASS_ZERO): + case CLPAIR(IEEE754_CLASS_SNAN, IEEE754_CLASS_NORM): + case CLPAIR(IEEE754_CLASS_SNAN, IEEE754_CLASS_DNORM): + case CLPAIR(IEEE754_CLASS_SNAN, IEEE754_CLASS_INF): + return ieee754dp_nanxcpt(x); + + case CLPAIR(IEEE754_CLASS_ZERO, IEEE754_CLASS_QNAN): + case CLPAIR(IEEE754_CLASS_NORM, IEEE754_CLASS_QNAN): + case CLPAIR(IEEE754_CLASS_DNORM, IEEE754_CLASS_QNAN): + case CLPAIR(IEEE754_CLASS_INF, IEEE754_CLASS_QNAN): + return y; + + case CLPAIR(IEEE754_CLASS_QNAN, IEEE754_CLASS_QNAN): + case CLPAIR(IEEE754_CLASS_QNAN, IEEE754_CLASS_ZERO): + case CLPAIR(IEEE754_CLASS_QNAN, IEEE754_CLASS_NORM): + case CLPAIR(IEEE754_CLASS_QNAN, IEEE754_CLASS_DNORM): + case CLPAIR(IEEE754_CLASS_QNAN, IEEE754_CLASS_INF): + return x; + + + /* + * Infinity handling + */ + case CLPAIR(IEEE754_CLASS_INF, IEEE754_CLASS_ZERO): + case CLPAIR(IEEE754_CLASS_ZERO, IEEE754_CLASS_INF): + ieee754_setcx(IEEE754_INVALID_OPERATION); + return ieee754dp_indef(); + + case CLPAIR(IEEE754_CLASS_NORM, IEEE754_CLASS_INF): + case CLPAIR(IEEE754_CLASS_DNORM, IEEE754_CLASS_INF): + case CLPAIR(IEEE754_CLASS_INF, IEEE754_CLASS_NORM): + case CLPAIR(IEEE754_CLASS_INF, IEEE754_CLASS_DNORM): + case CLPAIR(IEEE754_CLASS_INF, IEEE754_CLASS_INF): + return ieee754dp_inf(xs ^ ys); + + case CLPAIR(IEEE754_CLASS_ZERO, IEEE754_CLASS_ZERO): + case CLPAIR(IEEE754_CLASS_ZERO, IEEE754_CLASS_NORM): + case CLPAIR(IEEE754_CLASS_ZERO, IEEE754_CLASS_DNORM): + case CLPAIR(IEEE754_CLASS_NORM, IEEE754_CLASS_ZERO): + case CLPAIR(IEEE754_CLASS_DNORM, IEEE754_CLASS_ZERO): + return ieee754dp_zero(xs ^ ys); + + + case CLPAIR(IEEE754_CLASS_DNORM, IEEE754_CLASS_DNORM): + DPDNORMX; + fallthrough; + case CLPAIR(IEEE754_CLASS_NORM, IEEE754_CLASS_DNORM): + DPDNORMY; + break; + + case CLPAIR(IEEE754_CLASS_DNORM, IEEE754_CLASS_NORM): + DPDNORMX; + break; + + case CLPAIR(IEEE754_CLASS_NORM, IEEE754_CLASS_NORM): + break; + } + /* rm = xm * ym, re = xe+ye basically */ + assert(xm & DP_HIDDEN_BIT); + assert(ym & DP_HIDDEN_BIT); + + re = xe + ye; + rs = xs ^ ys; + + /* shunt to top of word */ + xm <<= 64 - (DP_FBITS + 1); + ym <<= 64 - (DP_FBITS + 1); + + /* + * Multiply 64 bits xm, ym to give high 64 bits rm with stickness. + */ + + lxm = xm; + hxm = xm >> 32; + lym = ym; + hym = ym >> 32; + + lrm = DPXMULT(lxm, lym); + hrm = DPXMULT(hxm, hym); + + t = DPXMULT(lxm, hym); + + at = lrm + (t << 32); + hrm += at < lrm; + lrm = at; + + hrm = hrm + (t >> 32); + + t = DPXMULT(hxm, lym); + + at = lrm + (t << 32); + hrm += at < lrm; + lrm = at; + + hrm = hrm + (t >> 32); + + rm = hrm | (lrm != 0); + + /* + * Sticky shift down to normal rounding precision. + */ + if ((s64) rm < 0) { + rm = (rm >> (64 - (DP_FBITS + 1 + 3))) | + ((rm << (DP_FBITS + 1 + 3)) != 0); + re++; + } else { + rm = (rm >> (64 - (DP_FBITS + 1 + 3 + 1))) | + ((rm << (DP_FBITS + 1 + 3 + 1)) != 0); + } + assert(rm & (DP_HIDDEN_BIT << 3)); + + return ieee754dp_format(rs, re, rm); +} diff --git a/arch/mips/math-emu/dp_rint.c b/arch/mips/math-emu/dp_rint.c new file mode 100644 index 000000000..7f30b7a30 --- /dev/null +++ b/arch/mips/math-emu/dp_rint.c @@ -0,0 +1,78 @@ +// SPDX-License-Identifier: GPL-2.0-only +/* IEEE754 floating point arithmetic + * double precision: common utilities + */ +/* + * MIPS floating point support + * Copyright (C) 1994-2000 Algorithmics Ltd. + * Copyright (C) 2017 Imagination Technologies, Ltd. + * Author: Aleksandar Markovic <aleksandar.markovic@imgtec.com> + */ + +#include "ieee754dp.h" + +union ieee754dp ieee754dp_rint(union ieee754dp x) +{ + union ieee754dp ret; + u64 residue; + int sticky; + int round; + int odd; + + COMPXDP; + + ieee754_clearcx(); + + EXPLODEXDP; + FLUSHXDP; + + if (xc == IEEE754_CLASS_SNAN) + return ieee754dp_nanxcpt(x); + + if ((xc == IEEE754_CLASS_QNAN) || + (xc == IEEE754_CLASS_INF) || + (xc == IEEE754_CLASS_ZERO)) + return x; + + if (xe >= DP_FBITS) + return x; + + if (xe < -1) { + residue = xm; + round = 0; + sticky = residue != 0; + xm = 0; + } else { + residue = xm << (64 - DP_FBITS + xe); + round = (residue >> 63) != 0; + sticky = (residue << 1) != 0; + xm >>= DP_FBITS - xe; + } + + odd = (xm & 0x1) != 0x0; + + switch (ieee754_csr.rm) { + case FPU_CSR_RN: /* toward nearest */ + if (round && (sticky || odd)) + xm++; + break; + case FPU_CSR_RZ: /* toward zero */ + break; + case FPU_CSR_RU: /* toward +infinity */ + if ((round || sticky) && !xs) + xm++; + break; + case FPU_CSR_RD: /* toward -infinity */ + if ((round || sticky) && xs) + xm++; + break; + } + + if (round || sticky) + ieee754_setcx(IEEE754_INEXACT); + + ret = ieee754dp_flong(xm); + DPSIGN(ret) = xs; + + return ret; +} diff --git a/arch/mips/math-emu/dp_simple.c b/arch/mips/math-emu/dp_simple.c new file mode 100644 index 000000000..b063aad7d --- /dev/null +++ b/arch/mips/math-emu/dp_simple.c @@ -0,0 +1,49 @@ +// SPDX-License-Identifier: GPL-2.0-only +/* IEEE754 floating point arithmetic + * double precision: common utilities + */ +/* + * MIPS floating point support + * Copyright (C) 1994-2000 Algorithmics Ltd. + */ + +#include "ieee754dp.h" + +union ieee754dp ieee754dp_neg(union ieee754dp x) +{ + union ieee754dp y; + + if (ieee754_csr.abs2008) { + y = x; + DPSIGN(y) = !DPSIGN(x); + } else { + unsigned int oldrm; + + oldrm = ieee754_csr.rm; + ieee754_csr.rm = FPU_CSR_RD; + y = ieee754dp_sub(ieee754dp_zero(0), x); + ieee754_csr.rm = oldrm; + } + return y; +} + +union ieee754dp ieee754dp_abs(union ieee754dp x) +{ + union ieee754dp y; + + if (ieee754_csr.abs2008) { + y = x; + DPSIGN(y) = 0; + } else { + unsigned int oldrm; + + oldrm = ieee754_csr.rm; + ieee754_csr.rm = FPU_CSR_RD; + if (DPSIGN(x)) + y = ieee754dp_sub(ieee754dp_zero(0), x); + else + y = ieee754dp_add(ieee754dp_zero(0), x); + ieee754_csr.rm = oldrm; + } + return y; +} diff --git a/arch/mips/math-emu/dp_sqrt.c b/arch/mips/math-emu/dp_sqrt.c new file mode 100644 index 000000000..1ee38f824 --- /dev/null +++ b/arch/mips/math-emu/dp_sqrt.c @@ -0,0 +1,152 @@ +// SPDX-License-Identifier: GPL-2.0-only +/* IEEE754 floating point arithmetic + * double precision square root + */ +/* + * MIPS floating point support + * Copyright (C) 1994-2000 Algorithmics Ltd. + */ + +#include "ieee754dp.h" + +static const unsigned int table[] = { + 0, 1204, 3062, 5746, 9193, 13348, 18162, 23592, + 29598, 36145, 43202, 50740, 58733, 67158, 75992, + 85215, 83599, 71378, 60428, 50647, 41945, 34246, + 27478, 21581, 16499, 12183, 8588, 5674, 3403, + 1742, 661, 130 +}; + +union ieee754dp ieee754dp_sqrt(union ieee754dp x) +{ + struct _ieee754_csr oldcsr; + union ieee754dp y, z, t; + unsigned int scalx, yh; + COMPXDP; + + EXPLODEXDP; + ieee754_clearcx(); + FLUSHXDP; + + /* x == INF or NAN? */ + switch (xc) { + case IEEE754_CLASS_SNAN: + return ieee754dp_nanxcpt(x); + + case IEEE754_CLASS_QNAN: + /* sqrt(Nan) = Nan */ + return x; + + case IEEE754_CLASS_ZERO: + /* sqrt(0) = 0 */ + return x; + + case IEEE754_CLASS_INF: + if (xs) { + /* sqrt(-Inf) = Nan */ + ieee754_setcx(IEEE754_INVALID_OPERATION); + return ieee754dp_indef(); + } + /* sqrt(+Inf) = Inf */ + return x; + + case IEEE754_CLASS_DNORM: + DPDNORMX; + fallthrough; + case IEEE754_CLASS_NORM: + if (xs) { + /* sqrt(-x) = Nan */ + ieee754_setcx(IEEE754_INVALID_OPERATION); + return ieee754dp_indef(); + } + break; + } + + /* save old csr; switch off INX enable & flag; set RN rounding */ + oldcsr = ieee754_csr; + ieee754_csr.mx &= ~IEEE754_INEXACT; + ieee754_csr.sx &= ~IEEE754_INEXACT; + ieee754_csr.rm = FPU_CSR_RN; + + /* adjust exponent to prevent overflow */ + scalx = 0; + if (xe > 512) { /* x > 2**-512? */ + xe -= 512; /* x = x / 2**512 */ + scalx += 256; + } else if (xe < -512) { /* x < 2**-512? */ + xe += 512; /* x = x * 2**512 */ + scalx -= 256; + } + + x = builddp(0, xe + DP_EBIAS, xm & ~DP_HIDDEN_BIT); + y = x; + + /* magic initial approximation to almost 8 sig. bits */ + yh = y.bits >> 32; + yh = (yh >> 1) + 0x1ff80000; + yh = yh - table[(yh >> 15) & 31]; + y.bits = ((u64) yh << 32) | (y.bits & 0xffffffff); + + /* Heron's rule once with correction to improve to ~18 sig. bits */ + /* t=x/y; y=y+t; py[n0]=py[n0]-0x00100006; py[n1]=0; */ + t = ieee754dp_div(x, y); + y = ieee754dp_add(y, t); + y.bits -= 0x0010000600000000LL; + y.bits &= 0xffffffff00000000LL; + + /* triple to almost 56 sig. bits: y ~= sqrt(x) to within 1 ulp */ + /* t=y*y; z=t; pt[n0]+=0x00100000; t+=z; z=(x-z)*y; */ + t = ieee754dp_mul(y, y); + z = t; + t.bexp += 0x001; + t = ieee754dp_add(t, z); + z = ieee754dp_mul(ieee754dp_sub(x, z), y); + + /* t=z/(t+x) ; pt[n0]+=0x00100000; y+=t; */ + t = ieee754dp_div(z, ieee754dp_add(t, x)); + t.bexp += 0x001; + y = ieee754dp_add(y, t); + + /* twiddle last bit to force y correctly rounded */ + + /* set RZ, clear INEX flag */ + ieee754_csr.rm = FPU_CSR_RZ; + ieee754_csr.sx &= ~IEEE754_INEXACT; + + /* t=x/y; ...chopped quotient, possibly inexact */ + t = ieee754dp_div(x, y); + + if (ieee754_csr.sx & IEEE754_INEXACT || t.bits != y.bits) { + + if (!(ieee754_csr.sx & IEEE754_INEXACT)) + /* t = t-ulp */ + t.bits -= 1; + + /* add inexact to result status */ + oldcsr.cx |= IEEE754_INEXACT; + oldcsr.sx |= IEEE754_INEXACT; + + switch (oldcsr.rm) { + case FPU_CSR_RU: + y.bits += 1; + fallthrough; + case FPU_CSR_RN: + t.bits += 1; + break; + } + + /* y=y+t; ...chopped sum */ + y = ieee754dp_add(y, t); + + /* adjust scalx for correctly rounded sqrt(x) */ + scalx -= 1; + } + + /* py[n0]=py[n0]+scalx; ...scale back y */ + y.bexp += scalx; + + /* restore rounding mode, possibly set inexact */ + ieee754_csr = oldcsr; + + return y; +} diff --git a/arch/mips/math-emu/dp_sub.c b/arch/mips/math-emu/dp_sub.c new file mode 100644 index 000000000..08474ad2a --- /dev/null +++ b/arch/mips/math-emu/dp_sub.c @@ -0,0 +1,172 @@ +// SPDX-License-Identifier: GPL-2.0-only +/* IEEE754 floating point arithmetic + * double precision: common utilities + */ +/* + * MIPS floating point support + * Copyright (C) 1994-2000 Algorithmics Ltd. + */ + +#include "ieee754dp.h" + +union ieee754dp ieee754dp_sub(union ieee754dp x, union ieee754dp y) +{ + int s; + + COMPXDP; + COMPYDP; + + EXPLODEXDP; + EXPLODEYDP; + + ieee754_clearcx(); + + FLUSHXDP; + FLUSHYDP; + + switch (CLPAIR(xc, yc)) { + case CLPAIR(IEEE754_CLASS_QNAN, IEEE754_CLASS_SNAN): + case CLPAIR(IEEE754_CLASS_ZERO, IEEE754_CLASS_SNAN): + case CLPAIR(IEEE754_CLASS_NORM, IEEE754_CLASS_SNAN): + case CLPAIR(IEEE754_CLASS_DNORM, IEEE754_CLASS_SNAN): + case CLPAIR(IEEE754_CLASS_INF, IEEE754_CLASS_SNAN): + return ieee754dp_nanxcpt(y); + + case CLPAIR(IEEE754_CLASS_SNAN, IEEE754_CLASS_SNAN): + case CLPAIR(IEEE754_CLASS_SNAN, IEEE754_CLASS_QNAN): + case CLPAIR(IEEE754_CLASS_SNAN, IEEE754_CLASS_ZERO): + case CLPAIR(IEEE754_CLASS_SNAN, IEEE754_CLASS_NORM): + case CLPAIR(IEEE754_CLASS_SNAN, IEEE754_CLASS_DNORM): + case CLPAIR(IEEE754_CLASS_SNAN, IEEE754_CLASS_INF): + return ieee754dp_nanxcpt(x); + + case CLPAIR(IEEE754_CLASS_ZERO, IEEE754_CLASS_QNAN): + case CLPAIR(IEEE754_CLASS_NORM, IEEE754_CLASS_QNAN): + case CLPAIR(IEEE754_CLASS_DNORM, IEEE754_CLASS_QNAN): + case CLPAIR(IEEE754_CLASS_INF, IEEE754_CLASS_QNAN): + return y; + + case CLPAIR(IEEE754_CLASS_QNAN, IEEE754_CLASS_QNAN): + case CLPAIR(IEEE754_CLASS_QNAN, IEEE754_CLASS_ZERO): + case CLPAIR(IEEE754_CLASS_QNAN, IEEE754_CLASS_NORM): + case CLPAIR(IEEE754_CLASS_QNAN, IEEE754_CLASS_DNORM): + case CLPAIR(IEEE754_CLASS_QNAN, IEEE754_CLASS_INF): + return x; + + + /* + * Infinity handling + */ + case CLPAIR(IEEE754_CLASS_INF, IEEE754_CLASS_INF): + if (xs != ys) + return x; + ieee754_setcx(IEEE754_INVALID_OPERATION); + return ieee754dp_indef(); + + case CLPAIR(IEEE754_CLASS_ZERO, IEEE754_CLASS_INF): + case CLPAIR(IEEE754_CLASS_DNORM, IEEE754_CLASS_INF): + case CLPAIR(IEEE754_CLASS_NORM, IEEE754_CLASS_INF): + return ieee754dp_inf(ys ^ 1); + + case CLPAIR(IEEE754_CLASS_INF, IEEE754_CLASS_ZERO): + case CLPAIR(IEEE754_CLASS_INF, IEEE754_CLASS_NORM): + case CLPAIR(IEEE754_CLASS_INF, IEEE754_CLASS_DNORM): + return x; + + /* + * Zero handling + */ + case CLPAIR(IEEE754_CLASS_ZERO, IEEE754_CLASS_ZERO): + if (xs != ys) + return x; + else + return ieee754dp_zero(ieee754_csr.rm == FPU_CSR_RD); + + case CLPAIR(IEEE754_CLASS_NORM, IEEE754_CLASS_ZERO): + case CLPAIR(IEEE754_CLASS_DNORM, IEEE754_CLASS_ZERO): + return x; + + case CLPAIR(IEEE754_CLASS_ZERO, IEEE754_CLASS_NORM): + case CLPAIR(IEEE754_CLASS_ZERO, IEEE754_CLASS_DNORM): + /* quick fix up */ + DPSIGN(y) ^= 1; + return y; + + case CLPAIR(IEEE754_CLASS_DNORM, IEEE754_CLASS_DNORM): + DPDNORMX; + fallthrough; + case CLPAIR(IEEE754_CLASS_NORM, IEEE754_CLASS_DNORM): + /* normalize ym,ye */ + DPDNORMY; + break; + + case CLPAIR(IEEE754_CLASS_DNORM, IEEE754_CLASS_NORM): + /* normalize xm,xe */ + DPDNORMX; + break; + + case CLPAIR(IEEE754_CLASS_NORM, IEEE754_CLASS_NORM): + break; + } + /* flip sign of y and handle as add */ + ys ^= 1; + + assert(xm & DP_HIDDEN_BIT); + assert(ym & DP_HIDDEN_BIT); + + + /* provide guard,round and stick bit dpace */ + xm <<= 3; + ym <<= 3; + + if (xe > ye) { + /* + * Have to shift y fraction right to align + */ + s = xe - ye; + ym = XDPSRS(ym, s); + ye += s; + } else if (ye > xe) { + /* + * Have to shift x fraction right to align + */ + s = ye - xe; + xm = XDPSRS(xm, s); + xe += s; + } + assert(xe == ye); + assert(xe <= DP_EMAX); + + if (xs == ys) { + /* generate 28 bit result of adding two 27 bit numbers + */ + xm = xm + ym; + + if (xm >> (DP_FBITS + 1 + 3)) { /* carry out */ + xm = XDPSRS1(xm); /* shift preserving sticky */ + xe++; + } + } else { + if (xm >= ym) { + xm = xm - ym; + } else { + xm = ym - xm; + xs = ys; + } + if (xm == 0) { + if (ieee754_csr.rm == FPU_CSR_RD) + return ieee754dp_zero(1); /* round negative inf. => sign = -1 */ + else + return ieee754dp_zero(0); /* other round modes => sign = 1 */ + } + + /* normalize to rounding precision + */ + while ((xm >> (DP_FBITS + 3)) == 0) { + xm <<= 1; + xe--; + } + } + + return ieee754dp_format(xs, xe, xm); +} diff --git a/arch/mips/math-emu/dp_tint.c b/arch/mips/math-emu/dp_tint.c new file mode 100644 index 000000000..0e6ad35e7 --- /dev/null +++ b/arch/mips/math-emu/dp_tint.c @@ -0,0 +1,96 @@ +// SPDX-License-Identifier: GPL-2.0-only +/* IEEE754 floating point arithmetic + * double precision: common utilities + */ +/* + * MIPS floating point support + * Copyright (C) 1994-2000 Algorithmics Ltd. + */ + +#include "ieee754dp.h" + +int ieee754dp_tint(union ieee754dp x) +{ + u64 residue; + int round; + int sticky; + int odd; + + COMPXDP; + + ieee754_clearcx(); + + EXPLODEXDP; + FLUSHXDP; + + switch (xc) { + case IEEE754_CLASS_SNAN: + case IEEE754_CLASS_QNAN: + ieee754_setcx(IEEE754_INVALID_OPERATION); + return ieee754si_indef(); + + case IEEE754_CLASS_INF: + ieee754_setcx(IEEE754_INVALID_OPERATION); + return ieee754si_overflow(xs); + + case IEEE754_CLASS_ZERO: + return 0; + + case IEEE754_CLASS_DNORM: + case IEEE754_CLASS_NORM: + break; + } + if (xe > 31) { + /* Set invalid. We will only use overflow for floating + point overflow */ + ieee754_setcx(IEEE754_INVALID_OPERATION); + return ieee754si_overflow(xs); + } + /* oh gawd */ + if (xe > DP_FBITS) { + xm <<= xe - DP_FBITS; + } else if (xe < DP_FBITS) { + if (xe < -1) { + residue = xm; + round = 0; + sticky = residue != 0; + xm = 0; + } else { + residue = xm << (64 - DP_FBITS + xe); + round = (residue >> 63) != 0; + sticky = (residue << 1) != 0; + xm >>= DP_FBITS - xe; + } + /* Note: At this point upper 32 bits of xm are guaranteed + to be zero */ + odd = (xm & 0x1) != 0x0; + switch (ieee754_csr.rm) { + case FPU_CSR_RN: + if (round && (sticky || odd)) + xm++; + break; + case FPU_CSR_RZ: + break; + case FPU_CSR_RU: /* toward +Infinity */ + if ((round || sticky) && !xs) + xm++; + break; + case FPU_CSR_RD: /* toward -Infinity */ + if ((round || sticky) && xs) + xm++; + break; + } + /* look for valid corner case 0x80000000 */ + if ((xm >> 31) != 0 && (xs == 0 || xm != 0x80000000)) { + /* This can happen after rounding */ + ieee754_setcx(IEEE754_INVALID_OPERATION); + return ieee754si_overflow(xs); + } + if (round || sticky) + ieee754_setcx(IEEE754_INEXACT); + } + if (xs) + return -xm; + else + return xm; +} diff --git a/arch/mips/math-emu/dp_tlong.c b/arch/mips/math-emu/dp_tlong.c new file mode 100644 index 000000000..c61ef02d4 --- /dev/null +++ b/arch/mips/math-emu/dp_tlong.c @@ -0,0 +1,100 @@ +// SPDX-License-Identifier: GPL-2.0-only +/* IEEE754 floating point arithmetic + * double precision: common utilities + */ +/* + * MIPS floating point support + * Copyright (C) 1994-2000 Algorithmics Ltd. + */ + +#include "ieee754dp.h" + +s64 ieee754dp_tlong(union ieee754dp x) +{ + u64 residue; + int round; + int sticky; + int odd; + + COMPXDP; + + ieee754_clearcx(); + + EXPLODEXDP; + FLUSHXDP; + + switch (xc) { + case IEEE754_CLASS_SNAN: + case IEEE754_CLASS_QNAN: + ieee754_setcx(IEEE754_INVALID_OPERATION); + return ieee754di_indef(); + + case IEEE754_CLASS_INF: + ieee754_setcx(IEEE754_INVALID_OPERATION); + return ieee754di_overflow(xs); + + case IEEE754_CLASS_ZERO: + return 0; + + case IEEE754_CLASS_DNORM: + case IEEE754_CLASS_NORM: + break; + } + if (xe >= 63) { + /* look for valid corner case */ + if (xe == 63 && xs && xm == DP_HIDDEN_BIT) + return -0x8000000000000000LL; + /* Set invalid. We will only use overflow for floating + point overflow */ + ieee754_setcx(IEEE754_INVALID_OPERATION); + return ieee754di_overflow(xs); + } + /* oh gawd */ + if (xe > DP_FBITS) { + xm <<= xe - DP_FBITS; + } else if (xe < DP_FBITS) { + if (xe < -1) { + residue = xm; + round = 0; + sticky = residue != 0; + xm = 0; + } else { + /* Shifting a u64 64 times does not work, + * so we do it in two steps. Be aware that xe + * may be -1 */ + residue = xm << (xe + 1); + residue <<= 63 - DP_FBITS; + round = (residue >> 63) != 0; + sticky = (residue << 1) != 0; + xm >>= DP_FBITS - xe; + } + odd = (xm & 0x1) != 0x0; + switch (ieee754_csr.rm) { + case FPU_CSR_RN: + if (round && (sticky || odd)) + xm++; + break; + case FPU_CSR_RZ: + break; + case FPU_CSR_RU: /* toward +Infinity */ + if ((round || sticky) && !xs) + xm++; + break; + case FPU_CSR_RD: /* toward -Infinity */ + if ((round || sticky) && xs) + xm++; + break; + } + if ((xm >> 63) != 0) { + /* This can happen after rounding */ + ieee754_setcx(IEEE754_INVALID_OPERATION); + return ieee754di_overflow(xs); + } + if (round || sticky) + ieee754_setcx(IEEE754_INEXACT); + } + if (xs) + return -xm; + else + return xm; +} diff --git a/arch/mips/math-emu/dsemul.c b/arch/mips/math-emu/dsemul.c new file mode 100644 index 000000000..e02bd20b6 --- /dev/null +++ b/arch/mips/math-emu/dsemul.c @@ -0,0 +1,305 @@ +// SPDX-License-Identifier: GPL-2.0 +#include <linux/err.h> +#include <linux/slab.h> +#include <linux/mm_types.h> +#include <linux/sched/task.h> + +#include <asm/branch.h> +#include <asm/cacheflush.h> +#include <asm/fpu_emulator.h> +#include <asm/inst.h> +#include <asm/mipsregs.h> +#include <linux/uaccess.h> + +/** + * struct emuframe - The 'emulation' frame structure + * @emul: The instruction to 'emulate'. + * @badinst: A break instruction to cause a return to the kernel. + * + * This structure defines the frames placed within the delay slot emulation + * page in response to a call to mips_dsemul(). Each thread may be allocated + * only one frame at any given time. The kernel stores within it the + * instruction to be 'emulated' followed by a break instruction, then + * executes the frame in user mode. The break causes a trap to the kernel + * which leads to do_dsemulret() being called unless the instruction in + * @emul causes a trap itself, is a branch, or a signal is delivered to + * the thread. In these cases the allocated frame will either be reused by + * a subsequent delay slot 'emulation', or be freed during signal delivery or + * upon thread exit. + * + * This approach is used because: + * + * - Actually emulating all instructions isn't feasible. We would need to + * be able to handle instructions from all revisions of the MIPS ISA, + * all ASEs & all vendor instruction set extensions. This would be a + * whole lot of work & continual maintenance burden as new instructions + * are introduced, and in the case of some vendor extensions may not + * even be possible. Thus we need to take the approach of actually + * executing the instruction. + * + * - We must execute the instruction within user context. If we were to + * execute the instruction in kernel mode then it would have access to + * kernel resources without very careful checks, leaving us with a + * high potential for security or stability issues to arise. + * + * - We used to place the frame on the users stack, but this requires + * that the stack be executable. This is bad for security so the + * per-process page is now used instead. + * + * - The instruction in @emul may be something entirely invalid for a + * delay slot. The user may (intentionally or otherwise) place a branch + * in a delay slot, or a kernel mode instruction, or something else + * which generates an exception. Thus we can't rely upon the break in + * @badinst always being hit. For this reason we track the index of the + * frame allocated to each thread, allowing us to clean it up at later + * points such as signal delivery or thread exit. + * + * - The user may generate a fake struct emuframe if they wish, invoking + * the BRK_MEMU break instruction themselves. We must therefore not + * trust that BRK_MEMU means there's actually a valid frame allocated + * to the thread, and must not allow the user to do anything they + * couldn't already. + */ +struct emuframe { + mips_instruction emul; + mips_instruction badinst; +}; + +static const int emupage_frame_count = PAGE_SIZE / sizeof(struct emuframe); + +static inline __user struct emuframe *dsemul_page(void) +{ + return (__user struct emuframe *)STACK_TOP; +} + +static int alloc_emuframe(void) +{ + mm_context_t *mm_ctx = ¤t->mm->context; + int idx; + +retry: + spin_lock(&mm_ctx->bd_emupage_lock); + + /* Ensure we have an allocation bitmap */ + if (!mm_ctx->bd_emupage_allocmap) { + mm_ctx->bd_emupage_allocmap = bitmap_zalloc(emupage_frame_count, + GFP_ATOMIC); + if (!mm_ctx->bd_emupage_allocmap) { + idx = BD_EMUFRAME_NONE; + goto out_unlock; + } + } + + /* Attempt to allocate a single bit/frame */ + idx = bitmap_find_free_region(mm_ctx->bd_emupage_allocmap, + emupage_frame_count, 0); + if (idx < 0) { + /* + * Failed to allocate a frame. We'll wait until one becomes + * available. We unlock the page so that other threads actually + * get the opportunity to free their frames, which means + * technically the result of bitmap_full may be incorrect. + * However the worst case is that we repeat all this and end up + * back here again. + */ + spin_unlock(&mm_ctx->bd_emupage_lock); + if (!wait_event_killable(mm_ctx->bd_emupage_queue, + !bitmap_full(mm_ctx->bd_emupage_allocmap, + emupage_frame_count))) + goto retry; + + /* Received a fatal signal - just give in */ + return BD_EMUFRAME_NONE; + } + + /* Success! */ + pr_debug("allocate emuframe %d to %d\n", idx, current->pid); +out_unlock: + spin_unlock(&mm_ctx->bd_emupage_lock); + return idx; +} + +static void free_emuframe(int idx, struct mm_struct *mm) +{ + mm_context_t *mm_ctx = &mm->context; + + spin_lock(&mm_ctx->bd_emupage_lock); + + pr_debug("free emuframe %d from %d\n", idx, current->pid); + bitmap_clear(mm_ctx->bd_emupage_allocmap, idx, 1); + + /* If some thread is waiting for a frame, now's its chance */ + wake_up(&mm_ctx->bd_emupage_queue); + + spin_unlock(&mm_ctx->bd_emupage_lock); +} + +static bool within_emuframe(struct pt_regs *regs) +{ + unsigned long base = (unsigned long)dsemul_page(); + + if (regs->cp0_epc < base) + return false; + if (regs->cp0_epc >= (base + PAGE_SIZE)) + return false; + + return true; +} + +bool dsemul_thread_cleanup(struct task_struct *tsk) +{ + int fr_idx; + + /* Clear any allocated frame, retrieving its index */ + fr_idx = atomic_xchg(&tsk->thread.bd_emu_frame, BD_EMUFRAME_NONE); + + /* If no frame was allocated, we're done */ + if (fr_idx == BD_EMUFRAME_NONE) + return false; + + task_lock(tsk); + + /* Free the frame that this thread had allocated */ + if (tsk->mm) + free_emuframe(fr_idx, tsk->mm); + + task_unlock(tsk); + return true; +} + +bool dsemul_thread_rollback(struct pt_regs *regs) +{ + struct emuframe __user *fr; + int fr_idx; + + /* Do nothing if we're not executing from a frame */ + if (!within_emuframe(regs)) + return false; + + /* Find the frame being executed */ + fr_idx = atomic_read(¤t->thread.bd_emu_frame); + if (fr_idx == BD_EMUFRAME_NONE) + return false; + fr = &dsemul_page()[fr_idx]; + + /* + * If the PC is at the emul instruction, roll back to the branch. If + * PC is at the badinst (break) instruction, we've already emulated the + * instruction so progress to the continue PC. If it's anything else + * then something is amiss & the user has branched into some other area + * of the emupage - we'll free the allocated frame anyway. + */ + if (msk_isa16_mode(regs->cp0_epc) == (unsigned long)&fr->emul) + regs->cp0_epc = current->thread.bd_emu_branch_pc; + else if (msk_isa16_mode(regs->cp0_epc) == (unsigned long)&fr->badinst) + regs->cp0_epc = current->thread.bd_emu_cont_pc; + + atomic_set(¤t->thread.bd_emu_frame, BD_EMUFRAME_NONE); + free_emuframe(fr_idx, current->mm); + return true; +} + +void dsemul_mm_cleanup(struct mm_struct *mm) +{ + mm_context_t *mm_ctx = &mm->context; + + bitmap_free(mm_ctx->bd_emupage_allocmap); +} + +int mips_dsemul(struct pt_regs *regs, mips_instruction ir, + unsigned long branch_pc, unsigned long cont_pc) +{ + int isa16 = get_isa16_mode(regs->cp0_epc); + mips_instruction break_math; + unsigned long fr_uaddr; + struct emuframe fr; + int fr_idx, ret; + + /* NOP is easy */ + if (ir == 0) + return -1; + + /* microMIPS instructions */ + if (isa16) { + union mips_instruction insn = { .word = ir }; + + /* NOP16 aka MOVE16 $0, $0 */ + if ((ir >> 16) == MM_NOP16) + return -1; + + /* ADDIUPC */ + if (insn.mm_a_format.opcode == mm_addiupc_op) { + unsigned int rs; + s32 v; + + rs = (((insn.mm_a_format.rs + 0xe) & 0xf) + 2); + v = regs->cp0_epc & ~3; + v += insn.mm_a_format.simmediate << 2; + regs->regs[rs] = (long)v; + return -1; + } + } + + pr_debug("dsemul 0x%08lx cont at 0x%08lx\n", regs->cp0_epc, cont_pc); + + /* Allocate a frame if we don't already have one */ + fr_idx = atomic_read(¤t->thread.bd_emu_frame); + if (fr_idx == BD_EMUFRAME_NONE) + fr_idx = alloc_emuframe(); + if (fr_idx == BD_EMUFRAME_NONE) + return SIGBUS; + + /* Retrieve the appropriately encoded break instruction */ + break_math = BREAK_MATH(isa16); + + /* Write the instructions to the frame */ + if (isa16) { + union mips_instruction _emul = { + .halfword = { ir >> 16, ir } + }; + union mips_instruction _badinst = { + .halfword = { break_math >> 16, break_math } + }; + + fr.emul = _emul.word; + fr.badinst = _badinst.word; + } else { + fr.emul = ir; + fr.badinst = break_math; + } + + /* Write the frame to user memory */ + fr_uaddr = (unsigned long)&dsemul_page()[fr_idx]; + ret = access_process_vm(current, fr_uaddr, &fr, sizeof(fr), + FOLL_FORCE | FOLL_WRITE); + if (unlikely(ret != sizeof(fr))) { + MIPS_FPU_EMU_INC_STATS(errors); + free_emuframe(fr_idx, current->mm); + return SIGBUS; + } + + /* Record the PC of the branch, PC to continue from & frame index */ + current->thread.bd_emu_branch_pc = branch_pc; + current->thread.bd_emu_cont_pc = cont_pc; + atomic_set(¤t->thread.bd_emu_frame, fr_idx); + + /* Change user register context to execute the frame */ + regs->cp0_epc = fr_uaddr | isa16; + + return 0; +} + +bool do_dsemulret(struct pt_regs *xcp) +{ + /* Cleanup the allocated frame, returning if there wasn't one */ + if (!dsemul_thread_cleanup(current)) { + MIPS_FPU_EMU_INC_STATS(errors); + return false; + } + + /* Set EPC to return to post-branch instruction */ + xcp->cp0_epc = current->thread.bd_emu_cont_pc; + pr_debug("dsemulret to 0x%08lx\n", xcp->cp0_epc); + MIPS_FPU_EMU_INC_STATS(ds_emul); + return true; +} diff --git a/arch/mips/math-emu/ieee754.c b/arch/mips/math-emu/ieee754.c new file mode 100644 index 000000000..0ba5dfbd4 --- /dev/null +++ b/arch/mips/math-emu/ieee754.c @@ -0,0 +1,83 @@ +// SPDX-License-Identifier: GPL-2.0-only +/* ieee754 floating point arithmetic + * single and double precision + * + * BUGS + * not much dp done + * doesn't generate IEEE754_INEXACT + */ +/* + * MIPS floating point support + * Copyright (C) 1994-2000 Algorithmics Ltd. + */ + +#include <linux/compiler.h> + +#include "ieee754.h" +#include "ieee754sp.h" +#include "ieee754dp.h" + +/* + * Special constants + */ + +/* + * Older GCC requires the inner braces for initialization of union ieee754dp's + * anonymous struct member. Without an error will result. + */ +#define xPCNST(s, b, m, ebias) \ +{ \ + { \ + .sign = (s), \ + .bexp = (b) + ebias, \ + .mant = (m) \ + } \ +} + +#define DPCNST(s, b, m) \ + xPCNST(s, b, m, DP_EBIAS) + +const union ieee754dp __ieee754dp_spcvals[] = { + DPCNST(0, DP_EMIN - 1, 0x0000000000000ULL), /* + zero */ + DPCNST(1, DP_EMIN - 1, 0x0000000000000ULL), /* - zero */ + DPCNST(0, 0, 0x0000000000000ULL), /* + 1.0 */ + DPCNST(1, 0, 0x0000000000000ULL), /* - 1.0 */ + DPCNST(0, 3, 0x4000000000000ULL), /* + 10.0 */ + DPCNST(1, 3, 0x4000000000000ULL), /* - 10.0 */ + DPCNST(0, DP_EMAX + 1, 0x0000000000000ULL), /* + infinity */ + DPCNST(1, DP_EMAX + 1, 0x0000000000000ULL), /* - infinity */ + DPCNST(0, DP_EMAX + 1, 0x7FFFFFFFFFFFFULL), /* + ind legacy qNaN */ + DPCNST(0, DP_EMAX + 1, 0x8000000000000ULL), /* + indef 2008 qNaN */ + DPCNST(0, DP_EMAX, 0xFFFFFFFFFFFFFULL), /* + max */ + DPCNST(1, DP_EMAX, 0xFFFFFFFFFFFFFULL), /* - max */ + DPCNST(0, DP_EMIN, 0x0000000000000ULL), /* + min normal */ + DPCNST(1, DP_EMIN, 0x0000000000000ULL), /* - min normal */ + DPCNST(0, DP_EMIN - 1, 0x0000000000001ULL), /* + min denormal */ + DPCNST(1, DP_EMIN - 1, 0x0000000000001ULL), /* - min denormal */ + DPCNST(0, 31, 0x0000000000000ULL), /* + 1.0e31 */ + DPCNST(0, 63, 0x0000000000000ULL), /* + 1.0e63 */ +}; + +#define SPCNST(s, b, m) \ + xPCNST(s, b, m, SP_EBIAS) + +const union ieee754sp __ieee754sp_spcvals[] = { + SPCNST(0, SP_EMIN - 1, 0x000000), /* + zero */ + SPCNST(1, SP_EMIN - 1, 0x000000), /* - zero */ + SPCNST(0, 0, 0x000000), /* + 1.0 */ + SPCNST(1, 0, 0x000000), /* - 1.0 */ + SPCNST(0, 3, 0x200000), /* + 10.0 */ + SPCNST(1, 3, 0x200000), /* - 10.0 */ + SPCNST(0, SP_EMAX + 1, 0x000000), /* + infinity */ + SPCNST(1, SP_EMAX + 1, 0x000000), /* - infinity */ + SPCNST(0, SP_EMAX + 1, 0x3FFFFF), /* + indef legacy quiet NaN */ + SPCNST(0, SP_EMAX + 1, 0x400000), /* + indef 2008 quiet NaN */ + SPCNST(0, SP_EMAX, 0x7FFFFF), /* + max normal */ + SPCNST(1, SP_EMAX, 0x7FFFFF), /* - max normal */ + SPCNST(0, SP_EMIN, 0x000000), /* + min normal */ + SPCNST(1, SP_EMIN, 0x000000), /* - min normal */ + SPCNST(0, SP_EMIN - 1, 0x000001), /* + min denormal */ + SPCNST(1, SP_EMIN - 1, 0x000001), /* - min denormal */ + SPCNST(0, 31, 0x000000), /* + 1.0e31 */ + SPCNST(0, 63, 0x000000), /* + 1.0e63 */ +}; diff --git a/arch/mips/math-emu/ieee754.h b/arch/mips/math-emu/ieee754.h new file mode 100644 index 000000000..090caa740 --- /dev/null +++ b/arch/mips/math-emu/ieee754.h @@ -0,0 +1,311 @@ +/* SPDX-License-Identifier: GPL-2.0-only */ +/* + * MIPS floating point support + * Copyright (C) 1994-2000 Algorithmics Ltd. + * + * Nov 7, 2000 + * Modification to allow integration with Linux kernel + * + * Kevin D. Kissell, kevink@mips.com and Carsten Langgard, carstenl@mips.com + * Copyright (C) 2000 MIPS Technologies, Inc. All rights reserved. + */ +#ifndef __ARCH_MIPS_MATH_EMU_IEEE754_H +#define __ARCH_MIPS_MATH_EMU_IEEE754_H + +#include <linux/compiler.h> +#include <asm/byteorder.h> +#include <linux/kernel.h> +#include <linux/types.h> +#include <linux/sched.h> +#include <asm/bitfield.h> + +union ieee754dp { + struct { + __BITFIELD_FIELD(unsigned int sign:1, + __BITFIELD_FIELD(unsigned int bexp:11, + __BITFIELD_FIELD(u64 mant:52, + ;))) + }; + u64 bits; +}; + +union ieee754sp { + struct { + __BITFIELD_FIELD(unsigned sign:1, + __BITFIELD_FIELD(unsigned bexp:8, + __BITFIELD_FIELD(unsigned mant:23, + ;))) + }; + u32 bits; +}; + +/* + * single precision (often aka float) +*/ +int ieee754sp_class(union ieee754sp x); + +union ieee754sp ieee754sp_abs(union ieee754sp x); +union ieee754sp ieee754sp_neg(union ieee754sp x); + +union ieee754sp ieee754sp_add(union ieee754sp x, union ieee754sp y); +union ieee754sp ieee754sp_sub(union ieee754sp x, union ieee754sp y); +union ieee754sp ieee754sp_mul(union ieee754sp x, union ieee754sp y); +union ieee754sp ieee754sp_div(union ieee754sp x, union ieee754sp y); + +union ieee754sp ieee754sp_fint(int x); +union ieee754sp ieee754sp_flong(s64 x); +union ieee754sp ieee754sp_fdp(union ieee754dp x); +union ieee754sp ieee754sp_rint(union ieee754sp x); + +int ieee754sp_tint(union ieee754sp x); +s64 ieee754sp_tlong(union ieee754sp x); + +int ieee754sp_cmp(union ieee754sp x, union ieee754sp y, int cop, int sig); + +union ieee754sp ieee754sp_sqrt(union ieee754sp x); + +union ieee754sp ieee754sp_maddf(union ieee754sp z, union ieee754sp x, + union ieee754sp y); +union ieee754sp ieee754sp_msubf(union ieee754sp z, union ieee754sp x, + union ieee754sp y); +union ieee754sp ieee754sp_madd(union ieee754sp z, union ieee754sp x, + union ieee754sp y); +union ieee754sp ieee754sp_msub(union ieee754sp z, union ieee754sp x, + union ieee754sp y); +union ieee754sp ieee754sp_nmadd(union ieee754sp z, union ieee754sp x, + union ieee754sp y); +union ieee754sp ieee754sp_nmsub(union ieee754sp z, union ieee754sp x, + union ieee754sp y); +int ieee754sp_2008class(union ieee754sp x); +union ieee754sp ieee754sp_fmin(union ieee754sp x, union ieee754sp y); +union ieee754sp ieee754sp_fmina(union ieee754sp x, union ieee754sp y); +union ieee754sp ieee754sp_fmax(union ieee754sp x, union ieee754sp y); +union ieee754sp ieee754sp_fmaxa(union ieee754sp x, union ieee754sp y); + +/* + * double precision (often aka double) +*/ +int ieee754dp_class(union ieee754dp x); + +union ieee754dp ieee754dp_add(union ieee754dp x, union ieee754dp y); +union ieee754dp ieee754dp_sub(union ieee754dp x, union ieee754dp y); +union ieee754dp ieee754dp_mul(union ieee754dp x, union ieee754dp y); +union ieee754dp ieee754dp_div(union ieee754dp x, union ieee754dp y); + +union ieee754dp ieee754dp_abs(union ieee754dp x); +union ieee754dp ieee754dp_neg(union ieee754dp x); + +union ieee754dp ieee754dp_fint(int x); +union ieee754dp ieee754dp_flong(s64 x); +union ieee754dp ieee754dp_fsp(union ieee754sp x); +union ieee754dp ieee754dp_rint(union ieee754dp x); + +int ieee754dp_tint(union ieee754dp x); +s64 ieee754dp_tlong(union ieee754dp x); + +int ieee754dp_cmp(union ieee754dp x, union ieee754dp y, int cop, int sig); + +union ieee754dp ieee754dp_sqrt(union ieee754dp x); + +union ieee754dp ieee754dp_maddf(union ieee754dp z, union ieee754dp x, + union ieee754dp y); +union ieee754dp ieee754dp_msubf(union ieee754dp z, union ieee754dp x, + union ieee754dp y); +union ieee754dp ieee754dp_madd(union ieee754dp z, union ieee754dp x, + union ieee754dp y); +union ieee754dp ieee754dp_msub(union ieee754dp z, union ieee754dp x, + union ieee754dp y); +union ieee754dp ieee754dp_nmadd(union ieee754dp z, union ieee754dp x, + union ieee754dp y); +union ieee754dp ieee754dp_nmsub(union ieee754dp z, union ieee754dp x, + union ieee754dp y); +int ieee754dp_2008class(union ieee754dp x); +union ieee754dp ieee754dp_fmin(union ieee754dp x, union ieee754dp y); +union ieee754dp ieee754dp_fmina(union ieee754dp x, union ieee754dp y); +union ieee754dp ieee754dp_fmax(union ieee754dp x, union ieee754dp y); +union ieee754dp ieee754dp_fmaxa(union ieee754dp x, union ieee754dp y); + + +/* 5 types of floating point number +*/ +enum { + IEEE754_CLASS_NORM = 0x00, + IEEE754_CLASS_ZERO = 0x01, + IEEE754_CLASS_DNORM = 0x02, + IEEE754_CLASS_INF = 0x03, + IEEE754_CLASS_SNAN = 0x04, + IEEE754_CLASS_QNAN = 0x05, +}; + +/* exception numbers */ +#define IEEE754_INEXACT 0x01 +#define IEEE754_UNDERFLOW 0x02 +#define IEEE754_OVERFLOW 0x04 +#define IEEE754_ZERO_DIVIDE 0x08 +#define IEEE754_INVALID_OPERATION 0x10 + +/* cmp operators +*/ +#define IEEE754_CLT 0x01 +#define IEEE754_CEQ 0x02 +#define IEEE754_CGT 0x04 +#define IEEE754_CUN 0x08 + +/* + * The control status register + */ +struct _ieee754_csr { + __BITFIELD_FIELD(unsigned fcc:7, /* condition[7:1] */ + __BITFIELD_FIELD(unsigned nod:1, /* set 1 for no denormals */ + __BITFIELD_FIELD(unsigned c:1, /* condition[0] */ + __BITFIELD_FIELD(unsigned pad0:3, + __BITFIELD_FIELD(unsigned abs2008:1, /* IEEE 754-2008 ABS/NEG.fmt */ + __BITFIELD_FIELD(unsigned nan2008:1, /* IEEE 754-2008 NaN mode */ + __BITFIELD_FIELD(unsigned cx:6, /* exceptions this operation */ + __BITFIELD_FIELD(unsigned mx:5, /* exception enable mask */ + __BITFIELD_FIELD(unsigned sx:5, /* exceptions total */ + __BITFIELD_FIELD(unsigned rm:2, /* current rounding mode */ + ;)))))))))) +}; +#define ieee754_csr (*(struct _ieee754_csr *)(¤t->thread.fpu.fcr31)) + +static inline unsigned int ieee754_getrm(void) +{ + return (ieee754_csr.rm); +} + +static inline unsigned int ieee754_setrm(unsigned int rm) +{ + return (ieee754_csr.rm = rm); +} + +/* + * get current exceptions + */ +static inline unsigned int ieee754_getcx(void) +{ + return (ieee754_csr.cx); +} + +/* test for current exception condition + */ +static inline int ieee754_cxtest(unsigned int n) +{ + return (ieee754_csr.cx & n); +} + +/* + * get sticky exceptions + */ +static inline unsigned int ieee754_getsx(void) +{ + return (ieee754_csr.sx); +} + +/* clear sticky conditions +*/ +static inline unsigned int ieee754_clrsx(void) +{ + return (ieee754_csr.sx = 0); +} + +/* test for sticky exception condition + */ +static inline int ieee754_sxtest(unsigned int n) +{ + return (ieee754_csr.sx & n); +} + +/* debugging */ +union ieee754sp ieee754sp_dump(char *s, union ieee754sp x); +union ieee754dp ieee754dp_dump(char *s, union ieee754dp x); + +#define IEEE754_SPCVAL_PZERO 0 /* +0.0 */ +#define IEEE754_SPCVAL_NZERO 1 /* -0.0 */ +#define IEEE754_SPCVAL_PONE 2 /* +1.0 */ +#define IEEE754_SPCVAL_NONE 3 /* -1.0 */ +#define IEEE754_SPCVAL_PTEN 4 /* +10.0 */ +#define IEEE754_SPCVAL_NTEN 5 /* -10.0 */ +#define IEEE754_SPCVAL_PINFINITY 6 /* +inf */ +#define IEEE754_SPCVAL_NINFINITY 7 /* -inf */ +#define IEEE754_SPCVAL_INDEF_LEG 8 /* legacy quiet NaN */ +#define IEEE754_SPCVAL_INDEF_2008 9 /* IEEE 754-2008 quiet NaN */ +#define IEEE754_SPCVAL_PMAX 10 /* +max norm */ +#define IEEE754_SPCVAL_NMAX 11 /* -max norm */ +#define IEEE754_SPCVAL_PMIN 12 /* +min norm */ +#define IEEE754_SPCVAL_NMIN 13 /* -min norm */ +#define IEEE754_SPCVAL_PMIND 14 /* +min denorm */ +#define IEEE754_SPCVAL_NMIND 15 /* -min denorm */ +#define IEEE754_SPCVAL_P1E31 16 /* + 1.0e31 */ +#define IEEE754_SPCVAL_P1E63 17 /* + 1.0e63 */ + +extern const union ieee754dp __ieee754dp_spcvals[]; +extern const union ieee754sp __ieee754sp_spcvals[]; +#define ieee754dp_spcvals ((const union ieee754dp *)__ieee754dp_spcvals) +#define ieee754sp_spcvals ((const union ieee754sp *)__ieee754sp_spcvals) + +/* + * Return infinity with given sign + */ +#define ieee754dp_inf(sn) (ieee754dp_spcvals[IEEE754_SPCVAL_PINFINITY+(sn)]) +#define ieee754dp_zero(sn) (ieee754dp_spcvals[IEEE754_SPCVAL_PZERO+(sn)]) +#define ieee754dp_one(sn) (ieee754dp_spcvals[IEEE754_SPCVAL_PONE+(sn)]) +#define ieee754dp_ten(sn) (ieee754dp_spcvals[IEEE754_SPCVAL_PTEN+(sn)]) +#define ieee754dp_indef() (ieee754dp_spcvals[IEEE754_SPCVAL_INDEF_LEG + \ + ieee754_csr.nan2008]) +#define ieee754dp_max(sn) (ieee754dp_spcvals[IEEE754_SPCVAL_PMAX+(sn)]) +#define ieee754dp_min(sn) (ieee754dp_spcvals[IEEE754_SPCVAL_PMIN+(sn)]) +#define ieee754dp_mind(sn) (ieee754dp_spcvals[IEEE754_SPCVAL_PMIND+(sn)]) +#define ieee754dp_1e31() (ieee754dp_spcvals[IEEE754_SPCVAL_P1E31]) +#define ieee754dp_1e63() (ieee754dp_spcvals[IEEE754_SPCVAL_P1E63]) + +#define ieee754sp_inf(sn) (ieee754sp_spcvals[IEEE754_SPCVAL_PINFINITY+(sn)]) +#define ieee754sp_zero(sn) (ieee754sp_spcvals[IEEE754_SPCVAL_PZERO+(sn)]) +#define ieee754sp_one(sn) (ieee754sp_spcvals[IEEE754_SPCVAL_PONE+(sn)]) +#define ieee754sp_ten(sn) (ieee754sp_spcvals[IEEE754_SPCVAL_PTEN+(sn)]) +#define ieee754sp_indef() (ieee754sp_spcvals[IEEE754_SPCVAL_INDEF_LEG + \ + ieee754_csr.nan2008]) +#define ieee754sp_max(sn) (ieee754sp_spcvals[IEEE754_SPCVAL_PMAX+(sn)]) +#define ieee754sp_min(sn) (ieee754sp_spcvals[IEEE754_SPCVAL_PMIN+(sn)]) +#define ieee754sp_mind(sn) (ieee754sp_spcvals[IEEE754_SPCVAL_PMIND+(sn)]) +#define ieee754sp_1e31() (ieee754sp_spcvals[IEEE754_SPCVAL_P1E31]) +#define ieee754sp_1e63() (ieee754sp_spcvals[IEEE754_SPCVAL_P1E63]) + +/* + * Indefinite integer value + */ +static inline int ieee754si_indef(void) +{ + return ieee754_csr.nan2008 ? 0 : INT_MAX; +} + +static inline s64 ieee754di_indef(void) +{ + return ieee754_csr.nan2008 ? 0 : S64_MAX; +} + +/* + * Overflow integer value + */ +static inline int ieee754si_overflow(int xs) +{ + return ieee754_csr.nan2008 && xs ? INT_MIN : INT_MAX; +} + +static inline s64 ieee754di_overflow(int xs) +{ + return ieee754_csr.nan2008 && xs ? S64_MIN : S64_MAX; +} + +/* result types for xctx.rt */ +#define IEEE754_RT_SP 0 +#define IEEE754_RT_DP 1 +#define IEEE754_RT_XP 2 +#define IEEE754_RT_SI 3 +#define IEEE754_RT_DI 4 + +/* compat */ +#define ieee754dp_fix(x) ieee754dp_tint(x) +#define ieee754sp_fix(x) ieee754sp_tint(x) + +#endif /* __ARCH_MIPS_MATH_EMU_IEEE754_H */ diff --git a/arch/mips/math-emu/ieee754d.c b/arch/mips/math-emu/ieee754d.c new file mode 100644 index 000000000..586c4db2d --- /dev/null +++ b/arch/mips/math-emu/ieee754d.c @@ -0,0 +1,99 @@ +// SPDX-License-Identifier: GPL-2.0-only +/* + * Some debug functions + * + * MIPS floating point support + * + * Copyright (C) 1994-2000 Algorithmics Ltd. + * + * Nov 7, 2000 + * Modified to build and operate in Linux kernel environment. + * + * Kevin D. Kissell, kevink@mips.com and Carsten Langgaard, carstenl@mips.com + * Copyright (C) 2000 MIPS Technologies, Inc. All rights reserved. + */ + +#include <linux/types.h> +#include <linux/printk.h> +#include "ieee754.h" +#include "ieee754sp.h" +#include "ieee754dp.h" + +union ieee754dp ieee754dp_dump(char *m, union ieee754dp x) +{ + int i; + + printk("%s", m); + printk("<%08x,%08x>\n", (unsigned) (x.bits >> 32), + (unsigned) x.bits); + printk("\t="); + switch (ieee754dp_class(x)) { + case IEEE754_CLASS_QNAN: + case IEEE754_CLASS_SNAN: + printk("Nan %c", DPSIGN(x) ? '-' : '+'); + for (i = DP_FBITS - 1; i >= 0; i--) + printk("%c", DPMANT(x) & DP_MBIT(i) ? '1' : '0'); + break; + case IEEE754_CLASS_INF: + printk("%cInfinity", DPSIGN(x) ? '-' : '+'); + break; + case IEEE754_CLASS_ZERO: + printk("%cZero", DPSIGN(x) ? '-' : '+'); + break; + case IEEE754_CLASS_DNORM: + printk("%c0.", DPSIGN(x) ? '-' : '+'); + for (i = DP_FBITS - 1; i >= 0; i--) + printk("%c", DPMANT(x) & DP_MBIT(i) ? '1' : '0'); + printk("e%d", DPBEXP(x) - DP_EBIAS); + break; + case IEEE754_CLASS_NORM: + printk("%c1.", DPSIGN(x) ? '-' : '+'); + for (i = DP_FBITS - 1; i >= 0; i--) + printk("%c", DPMANT(x) & DP_MBIT(i) ? '1' : '0'); + printk("e%d", DPBEXP(x) - DP_EBIAS); + break; + default: + printk("Illegal/Unknown IEEE754 value class"); + } + printk("\n"); + return x; +} + +union ieee754sp ieee754sp_dump(char *m, union ieee754sp x) +{ + int i; + + printk("%s=", m); + printk("<%08x>\n", (unsigned) x.bits); + printk("\t="); + switch (ieee754sp_class(x)) { + case IEEE754_CLASS_QNAN: + case IEEE754_CLASS_SNAN: + printk("Nan %c", SPSIGN(x) ? '-' : '+'); + for (i = SP_FBITS - 1; i >= 0; i--) + printk("%c", SPMANT(x) & SP_MBIT(i) ? '1' : '0'); + break; + case IEEE754_CLASS_INF: + printk("%cInfinity", SPSIGN(x) ? '-' : '+'); + break; + case IEEE754_CLASS_ZERO: + printk("%cZero", SPSIGN(x) ? '-' : '+'); + break; + case IEEE754_CLASS_DNORM: + printk("%c0.", SPSIGN(x) ? '-' : '+'); + for (i = SP_FBITS - 1; i >= 0; i--) + printk("%c", SPMANT(x) & SP_MBIT(i) ? '1' : '0'); + printk("e%d", SPBEXP(x) - SP_EBIAS); + break; + case IEEE754_CLASS_NORM: + printk("%c1.", SPSIGN(x) ? '-' : '+'); + for (i = SP_FBITS - 1; i >= 0; i--) + printk("%c", SPMANT(x) & SP_MBIT(i) ? '1' : '0'); + printk("e%d", SPBEXP(x) - SP_EBIAS); + break; + default: + printk("Illegal/Unknown IEEE754 value class"); + } + printk("\n"); + return x; +} diff --git a/arch/mips/math-emu/ieee754dp.c b/arch/mips/math-emu/ieee754dp.c new file mode 100644 index 000000000..07ef146e2 --- /dev/null +++ b/arch/mips/math-emu/ieee754dp.c @@ -0,0 +1,197 @@ +// SPDX-License-Identifier: GPL-2.0-only +/* IEEE754 floating point arithmetic + * double precision: common utilities + */ +/* + * MIPS floating point support + * Copyright (C) 1994-2000 Algorithmics Ltd. + */ + +#include <linux/compiler.h> + +#include "ieee754dp.h" + +int ieee754dp_class(union ieee754dp x) +{ + COMPXDP; + EXPLODEXDP; + return xc; +} + +static inline int ieee754dp_isnan(union ieee754dp x) +{ + return ieee754_class_nan(ieee754dp_class(x)); +} + +static inline int ieee754dp_issnan(union ieee754dp x) +{ + int qbit; + + assert(ieee754dp_isnan(x)); + qbit = (DPMANT(x) & DP_MBIT(DP_FBITS - 1)) == DP_MBIT(DP_FBITS - 1); + return ieee754_csr.nan2008 ^ qbit; +} + + +/* + * Raise the Invalid Operation IEEE 754 exception + * and convert the signaling NaN supplied to a quiet NaN. + */ +union ieee754dp __cold ieee754dp_nanxcpt(union ieee754dp r) +{ + assert(ieee754dp_issnan(r)); + + ieee754_setcx(IEEE754_INVALID_OPERATION); + if (ieee754_csr.nan2008) { + DPMANT(r) |= DP_MBIT(DP_FBITS - 1); + } else { + DPMANT(r) &= ~DP_MBIT(DP_FBITS - 1); + if (!ieee754dp_isnan(r)) + DPMANT(r) |= DP_MBIT(DP_FBITS - 2); + } + + return r; +} + +static u64 ieee754dp_get_rounding(int sn, u64 xm) +{ + /* inexact must round of 3 bits + */ + if (xm & (DP_MBIT(3) - 1)) { + switch (ieee754_csr.rm) { + case FPU_CSR_RZ: + break; + case FPU_CSR_RN: + xm += 0x3 + ((xm >> 3) & 1); + /* xm += (xm&0x8)?0x4:0x3 */ + break; + case FPU_CSR_RU: /* toward +Infinity */ + if (!sn) /* ?? */ + xm += 0x8; + break; + case FPU_CSR_RD: /* toward -Infinity */ + if (sn) /* ?? */ + xm += 0x8; + break; + } + } + return xm; +} + + +/* generate a normal/denormal number with over,under handling + * sn is sign + * xe is an unbiased exponent + * xm is 3bit extended precision value. + */ +union ieee754dp ieee754dp_format(int sn, int xe, u64 xm) +{ + assert(xm); /* we don't gen exact zeros (probably should) */ + + assert((xm >> (DP_FBITS + 1 + 3)) == 0); /* no excess */ + assert(xm & (DP_HIDDEN_BIT << 3)); + + if (xe < DP_EMIN) { + /* strip lower bits */ + int es = DP_EMIN - xe; + + if (ieee754_csr.nod) { + ieee754_setcx(IEEE754_UNDERFLOW); + ieee754_setcx(IEEE754_INEXACT); + + switch(ieee754_csr.rm) { + case FPU_CSR_RN: + case FPU_CSR_RZ: + return ieee754dp_zero(sn); + case FPU_CSR_RU: /* toward +Infinity */ + if (sn == 0) + return ieee754dp_min(0); + else + return ieee754dp_zero(1); + case FPU_CSR_RD: /* toward -Infinity */ + if (sn == 0) + return ieee754dp_zero(0); + else + return ieee754dp_min(1); + } + } + + if (xe == DP_EMIN - 1 && + ieee754dp_get_rounding(sn, xm) >> (DP_FBITS + 1 + 3)) + { + /* Not tiny after rounding */ + ieee754_setcx(IEEE754_INEXACT); + xm = ieee754dp_get_rounding(sn, xm); + xm >>= 1; + /* Clear grs bits */ + xm &= ~(DP_MBIT(3) - 1); + xe++; + } + else { + /* sticky right shift es bits + */ + xm = XDPSRS(xm, es); + xe += es; + assert((xm & (DP_HIDDEN_BIT << 3)) == 0); + assert(xe == DP_EMIN); + } + } + if (xm & (DP_MBIT(3) - 1)) { + ieee754_setcx(IEEE754_INEXACT); + if ((xm & (DP_HIDDEN_BIT << 3)) == 0) { + ieee754_setcx(IEEE754_UNDERFLOW); + } + + /* inexact must round of 3 bits + */ + xm = ieee754dp_get_rounding(sn, xm); + /* adjust exponent for rounding add overflowing + */ + if (xm >> (DP_FBITS + 3 + 1)) { + /* add causes mantissa overflow */ + xm >>= 1; + xe++; + } + } + /* strip grs bits */ + xm >>= 3; + + assert((xm >> (DP_FBITS + 1)) == 0); /* no excess */ + assert(xe >= DP_EMIN); + + if (xe > DP_EMAX) { + ieee754_setcx(IEEE754_OVERFLOW); + ieee754_setcx(IEEE754_INEXACT); + /* -O can be table indexed by (rm,sn) */ + switch (ieee754_csr.rm) { + case FPU_CSR_RN: + return ieee754dp_inf(sn); + case FPU_CSR_RZ: + return ieee754dp_max(sn); + case FPU_CSR_RU: /* toward +Infinity */ + if (sn == 0) + return ieee754dp_inf(0); + else + return ieee754dp_max(1); + case FPU_CSR_RD: /* toward -Infinity */ + if (sn == 0) + return ieee754dp_max(0); + else + return ieee754dp_inf(1); + } + } + /* gen norm/denorm/zero */ + + if ((xm & DP_HIDDEN_BIT) == 0) { + /* we underflow (tiny/zero) */ + assert(xe == DP_EMIN); + if (ieee754_csr.mx & IEEE754_UNDERFLOW) + ieee754_setcx(IEEE754_UNDERFLOW); + return builddp(sn, DP_EMIN - 1 + DP_EBIAS, xm); + } else { + assert((xm >> (DP_FBITS + 1)) == 0); /* no excess */ + assert(xm & DP_HIDDEN_BIT); + + return builddp(sn, xe + DP_EBIAS, xm & ~DP_HIDDEN_BIT); + } +} diff --git a/arch/mips/math-emu/ieee754dp.h b/arch/mips/math-emu/ieee754dp.h new file mode 100644 index 000000000..b7c43a99a --- /dev/null +++ b/arch/mips/math-emu/ieee754dp.h @@ -0,0 +1,73 @@ +/* SPDX-License-Identifier: GPL-2.0-only */ +/* + * IEEE754 floating point + * double precision internal header file + */ +/* + * MIPS floating point support + * Copyright (C) 1994-2000 Algorithmics Ltd. + */ + +#include <linux/compiler.h> + +#include "ieee754int.h" + +#define assert(expr) ((void)0) + +#define DP_EBIAS 1023 +#define DP_EMIN (-1022) +#define DP_EMAX 1023 +#define DP_FBITS 52 +#define DP_MBITS 52 + +#define DP_MBIT(x) ((u64)1 << (x)) +#define DP_HIDDEN_BIT DP_MBIT(DP_FBITS) +#define DP_SIGN_BIT DP_MBIT(63) + +#define DPSIGN(dp) (dp.sign) +#define DPBEXP(dp) (dp.bexp) +#define DPMANT(dp) (dp.mant) + +static inline int ieee754dp_finite(union ieee754dp x) +{ + return DPBEXP(x) != DP_EMAX + 1 + DP_EBIAS; +} + +/* 3bit extended double precision sticky right shift */ +#define XDPSRS(v,rs) \ + ((rs > (DP_FBITS+3))?1:((v) >> (rs)) | ((v) << (64-(rs)) != 0)) + +#define XDPSRSX1() \ + (xe++, (xm = (xm >> 1) | (xm & 1))) + +#define XDPSRS1(v) \ + (((v) >> 1) | ((v) & 1)) + +/* 32bit * 32bit => 64bit unsigned integer multiplication */ +#define DPXMULT(x, y) ((u64)(x) * (u64)y) + +/* convert denormal to normalized with extended exponent */ +#define DPDNORMx(m,e) \ + while ((m >> DP_FBITS) == 0) { m <<= 1; e--; } +#define DPDNORMX DPDNORMx(xm, xe) +#define DPDNORMY DPDNORMx(ym, ye) +#define DPDNORMZ DPDNORMx(zm, ze) + +static inline union ieee754dp builddp(int s, int bx, u64 m) +{ + union ieee754dp r; + + assert((s) == 0 || (s) == 1); + assert((bx) >= DP_EMIN - 1 + DP_EBIAS + && (bx) <= DP_EMAX + 1 + DP_EBIAS); + assert(((m) >> DP_FBITS) == 0); + + r.sign = s; + r.bexp = bx; + r.mant = m; + + return r; +} + +extern union ieee754dp __cold ieee754dp_nanxcpt(union ieee754dp); +extern union ieee754dp ieee754dp_format(int, int, u64); diff --git a/arch/mips/math-emu/ieee754int.h b/arch/mips/math-emu/ieee754int.h new file mode 100644 index 000000000..2c3b13546 --- /dev/null +++ b/arch/mips/math-emu/ieee754int.h @@ -0,0 +1,149 @@ +/* SPDX-License-Identifier: GPL-2.0-only */ +/* + * IEEE754 floating point + * common internal header file + */ +/* + * MIPS floating point support + * Copyright (C) 1994-2000 Algorithmics Ltd. + */ +#ifndef __IEEE754INT_H +#define __IEEE754INT_H + +#include "ieee754.h" + +#define CLPAIR(x, y) ((x)*6+(y)) + +enum maddf_flags { + MADDF_NEGATE_PRODUCT = 1 << 0, + MADDF_NEGATE_ADDITION = 1 << 1, +}; + +static inline void ieee754_clearcx(void) +{ + ieee754_csr.cx = 0; +} + +static inline void ieee754_setcx(const unsigned int flags) +{ + ieee754_csr.cx |= flags; + ieee754_csr.sx |= flags; +} + +static inline int ieee754_setandtestcx(const unsigned int x) +{ + ieee754_setcx(x); + + return ieee754_csr.mx & x; +} + +static inline int ieee754_class_nan(int xc) +{ + return xc >= IEEE754_CLASS_SNAN; +} + +#define COMPXSP \ + unsigned int xm; int xe; int xs __maybe_unused; int xc + +#define COMPYSP \ + unsigned int ym; int ye; int ys; int yc + +#define COMPZSP \ + unsigned int zm; int ze; int zs; int zc + +#define EXPLODESP(v, vc, vs, ve, vm) \ +{ \ + vs = SPSIGN(v); \ + ve = SPBEXP(v); \ + vm = SPMANT(v); \ + if (ve == SP_EMAX+1+SP_EBIAS) { \ + if (vm == 0) \ + vc = IEEE754_CLASS_INF; \ + else if (ieee754_csr.nan2008 ^ !(vm & SP_MBIT(SP_FBITS - 1))) \ + vc = IEEE754_CLASS_QNAN; \ + else \ + vc = IEEE754_CLASS_SNAN; \ + } else if (ve == SP_EMIN-1+SP_EBIAS) { \ + if (vm) { \ + ve = SP_EMIN; \ + vc = IEEE754_CLASS_DNORM; \ + } else \ + vc = IEEE754_CLASS_ZERO; \ + } else { \ + ve -= SP_EBIAS; \ + vm |= SP_HIDDEN_BIT; \ + vc = IEEE754_CLASS_NORM; \ + } \ +} +#define EXPLODEXSP EXPLODESP(x, xc, xs, xe, xm) +#define EXPLODEYSP EXPLODESP(y, yc, ys, ye, ym) +#define EXPLODEZSP EXPLODESP(z, zc, zs, ze, zm) + + +#define COMPXDP \ + u64 xm; int xe; int xs __maybe_unused; int xc + +#define COMPYDP \ + u64 ym; int ye; int ys; int yc + +#define COMPZDP \ + u64 zm; int ze; int zs; int zc + +#define EXPLODEDP(v, vc, vs, ve, vm) \ +{ \ + vm = DPMANT(v); \ + vs = DPSIGN(v); \ + ve = DPBEXP(v); \ + if (ve == DP_EMAX+1+DP_EBIAS) { \ + if (vm == 0) \ + vc = IEEE754_CLASS_INF; \ + else if (ieee754_csr.nan2008 ^ !(vm & DP_MBIT(DP_FBITS - 1))) \ + vc = IEEE754_CLASS_QNAN; \ + else \ + vc = IEEE754_CLASS_SNAN; \ + } else if (ve == DP_EMIN-1+DP_EBIAS) { \ + if (vm) { \ + ve = DP_EMIN; \ + vc = IEEE754_CLASS_DNORM; \ + } else \ + vc = IEEE754_CLASS_ZERO; \ + } else { \ + ve -= DP_EBIAS; \ + vm |= DP_HIDDEN_BIT; \ + vc = IEEE754_CLASS_NORM; \ + } \ +} +#define EXPLODEXDP EXPLODEDP(x, xc, xs, xe, xm) +#define EXPLODEYDP EXPLODEDP(y, yc, ys, ye, ym) +#define EXPLODEZDP EXPLODEDP(z, zc, zs, ze, zm) + +#define FLUSHDP(v, vc, vs, ve, vm) \ + if (vc==IEEE754_CLASS_DNORM) { \ + if (ieee754_csr.nod) { \ + ieee754_setcx(IEEE754_INEXACT); \ + vc = IEEE754_CLASS_ZERO; \ + ve = DP_EMIN-1+DP_EBIAS; \ + vm = 0; \ + v = ieee754dp_zero(vs); \ + } \ + } + +#define FLUSHSP(v, vc, vs, ve, vm) \ + if (vc==IEEE754_CLASS_DNORM) { \ + if (ieee754_csr.nod) { \ + ieee754_setcx(IEEE754_INEXACT); \ + vc = IEEE754_CLASS_ZERO; \ + ve = SP_EMIN-1+SP_EBIAS; \ + vm = 0; \ + v = ieee754sp_zero(vs); \ + } \ + } + +#define FLUSHXDP FLUSHDP(x, xc, xs, xe, xm) +#define FLUSHYDP FLUSHDP(y, yc, ys, ye, ym) +#define FLUSHZDP FLUSHDP(z, zc, zs, ze, zm) +#define FLUSHXSP FLUSHSP(x, xc, xs, xe, xm) +#define FLUSHYSP FLUSHSP(y, yc, ys, ye, ym) +#define FLUSHZSP FLUSHSP(z, zc, zs, ze, zm) + +#endif /* __IEEE754INT_H */ diff --git a/arch/mips/math-emu/ieee754sp.c b/arch/mips/math-emu/ieee754sp.c new file mode 100644 index 000000000..0b6267bc8 --- /dev/null +++ b/arch/mips/math-emu/ieee754sp.c @@ -0,0 +1,196 @@ +// SPDX-License-Identifier: GPL-2.0-only +/* IEEE754 floating point arithmetic + * single precision + */ +/* + * MIPS floating point support + * Copyright (C) 1994-2000 Algorithmics Ltd. + */ + +#include <linux/compiler.h> + +#include "ieee754sp.h" + +int ieee754sp_class(union ieee754sp x) +{ + COMPXSP; + EXPLODEXSP; + return xc; +} + +static inline int ieee754sp_isnan(union ieee754sp x) +{ + return ieee754_class_nan(ieee754sp_class(x)); +} + +static inline int ieee754sp_issnan(union ieee754sp x) +{ + int qbit; + + assert(ieee754sp_isnan(x)); + qbit = (SPMANT(x) & SP_MBIT(SP_FBITS - 1)) == SP_MBIT(SP_FBITS - 1); + return ieee754_csr.nan2008 ^ qbit; +} + + +/* + * Raise the Invalid Operation IEEE 754 exception + * and convert the signaling NaN supplied to a quiet NaN. + */ +union ieee754sp __cold ieee754sp_nanxcpt(union ieee754sp r) +{ + assert(ieee754sp_issnan(r)); + + ieee754_setcx(IEEE754_INVALID_OPERATION); + if (ieee754_csr.nan2008) { + SPMANT(r) |= SP_MBIT(SP_FBITS - 1); + } else { + SPMANT(r) &= ~SP_MBIT(SP_FBITS - 1); + if (!ieee754sp_isnan(r)) + SPMANT(r) |= SP_MBIT(SP_FBITS - 2); + } + + return r; +} + +static unsigned int ieee754sp_get_rounding(int sn, unsigned int xm) +{ + /* inexact must round of 3 bits + */ + if (xm & (SP_MBIT(3) - 1)) { + switch (ieee754_csr.rm) { + case FPU_CSR_RZ: + break; + case FPU_CSR_RN: + xm += 0x3 + ((xm >> 3) & 1); + /* xm += (xm&0x8)?0x4:0x3 */ + break; + case FPU_CSR_RU: /* toward +Infinity */ + if (!sn) /* ?? */ + xm += 0x8; + break; + case FPU_CSR_RD: /* toward -Infinity */ + if (sn) /* ?? */ + xm += 0x8; + break; + } + } + return xm; +} + + +/* generate a normal/denormal number with over,under handling + * sn is sign + * xe is an unbiased exponent + * xm is 3bit extended precision value. + */ +union ieee754sp ieee754sp_format(int sn, int xe, unsigned int xm) +{ + assert(xm); /* we don't gen exact zeros (probably should) */ + + assert((xm >> (SP_FBITS + 1 + 3)) == 0); /* no excess */ + assert(xm & (SP_HIDDEN_BIT << 3)); + + if (xe < SP_EMIN) { + /* strip lower bits */ + int es = SP_EMIN - xe; + + if (ieee754_csr.nod) { + ieee754_setcx(IEEE754_UNDERFLOW); + ieee754_setcx(IEEE754_INEXACT); + + switch(ieee754_csr.rm) { + case FPU_CSR_RN: + case FPU_CSR_RZ: + return ieee754sp_zero(sn); + case FPU_CSR_RU: /* toward +Infinity */ + if (sn == 0) + return ieee754sp_min(0); + else + return ieee754sp_zero(1); + case FPU_CSR_RD: /* toward -Infinity */ + if (sn == 0) + return ieee754sp_zero(0); + else + return ieee754sp_min(1); + } + } + + if (xe == SP_EMIN - 1 && + ieee754sp_get_rounding(sn, xm) >> (SP_FBITS + 1 + 3)) + { + /* Not tiny after rounding */ + ieee754_setcx(IEEE754_INEXACT); + xm = ieee754sp_get_rounding(sn, xm); + xm >>= 1; + /* Clear grs bits */ + xm &= ~(SP_MBIT(3) - 1); + xe++; + } else { + /* sticky right shift es bits + */ + xm = XSPSRS(xm, es); + xe += es; + assert((xm & (SP_HIDDEN_BIT << 3)) == 0); + assert(xe == SP_EMIN); + } + } + if (xm & (SP_MBIT(3) - 1)) { + ieee754_setcx(IEEE754_INEXACT); + if ((xm & (SP_HIDDEN_BIT << 3)) == 0) { + ieee754_setcx(IEEE754_UNDERFLOW); + } + + /* inexact must round of 3 bits + */ + xm = ieee754sp_get_rounding(sn, xm); + /* adjust exponent for rounding add overflowing + */ + if (xm >> (SP_FBITS + 1 + 3)) { + /* add causes mantissa overflow */ + xm >>= 1; + xe++; + } + } + /* strip grs bits */ + xm >>= 3; + + assert((xm >> (SP_FBITS + 1)) == 0); /* no excess */ + assert(xe >= SP_EMIN); + + if (xe > SP_EMAX) { + ieee754_setcx(IEEE754_OVERFLOW); + ieee754_setcx(IEEE754_INEXACT); + /* -O can be table indexed by (rm,sn) */ + switch (ieee754_csr.rm) { + case FPU_CSR_RN: + return ieee754sp_inf(sn); + case FPU_CSR_RZ: + return ieee754sp_max(sn); + case FPU_CSR_RU: /* toward +Infinity */ + if (sn == 0) + return ieee754sp_inf(0); + else + return ieee754sp_max(1); + case FPU_CSR_RD: /* toward -Infinity */ + if (sn == 0) + return ieee754sp_max(0); + else + return ieee754sp_inf(1); + } + } + /* gen norm/denorm/zero */ + + if ((xm & SP_HIDDEN_BIT) == 0) { + /* we underflow (tiny/zero) */ + assert(xe == SP_EMIN); + if (ieee754_csr.mx & IEEE754_UNDERFLOW) + ieee754_setcx(IEEE754_UNDERFLOW); + return buildsp(sn, SP_EMIN - 1 + SP_EBIAS, xm); + } else { + assert((xm >> (SP_FBITS + 1)) == 0); /* no excess */ + assert(xm & SP_HIDDEN_BIT); + + return buildsp(sn, xe + SP_EBIAS, xm & ~SP_HIDDEN_BIT); + } +} diff --git a/arch/mips/math-emu/ieee754sp.h b/arch/mips/math-emu/ieee754sp.h new file mode 100644 index 000000000..79040f890 --- /dev/null +++ b/arch/mips/math-emu/ieee754sp.h @@ -0,0 +1,77 @@ +/* SPDX-License-Identifier: GPL-2.0-only */ +/* + * IEEE754 floating point + * double precision internal header file + */ +/* + * MIPS floating point support + * Copyright (C) 1994-2000 Algorithmics Ltd. + */ + +#include <linux/compiler.h> + +#include "ieee754int.h" + +#define assert(expr) ((void)0) + +#define SP_EBIAS 127 +#define SP_EMIN (-126) +#define SP_EMAX 127 +#define SP_FBITS 23 +#define SP_MBITS 23 + +#define SP_MBIT(x) ((u32)1 << (x)) +#define SP_HIDDEN_BIT SP_MBIT(SP_FBITS) +#define SP_SIGN_BIT SP_MBIT(31) + +#define SPSIGN(sp) (sp.sign) +#define SPBEXP(sp) (sp.bexp) +#define SPMANT(sp) (sp.mant) + +static inline int ieee754sp_finite(union ieee754sp x) +{ + return SPBEXP(x) != SP_EMAX + 1 + SP_EBIAS; +} + +/* 64 bit right shift with rounding */ +#define XSPSRS64(v, rs) \ + (((rs) >= 64) ? ((v) != 0) : ((v) >> (rs)) | ((v) << (64-(rs)) != 0)) + +/* 3bit extended single precision sticky right shift */ +#define XSPSRS(v, rs) \ + ((rs > (SP_FBITS+3))?1:((v) >> (rs)) | ((v) << (32-(rs)) != 0)) + +#define XSPSRS1(m) \ + ((m >> 1) | (m & 1)) + +#define SPXSRSX1() \ + (xe++, (xm = XSPSRS1(xm))) + +#define SPXSRSY1() \ + (ye++, (ym = XSPSRS1(ym))) + +/* convert denormal to normalized with extended exponent */ +#define SPDNORMx(m,e) \ + while ((m >> SP_FBITS) == 0) { m <<= 1; e--; } +#define SPDNORMX SPDNORMx(xm, xe) +#define SPDNORMY SPDNORMx(ym, ye) +#define SPDNORMZ SPDNORMx(zm, ze) + +static inline union ieee754sp buildsp(int s, int bx, unsigned int m) +{ + union ieee754sp r; + + assert((s) == 0 || (s) == 1); + assert((bx) >= SP_EMIN - 1 + SP_EBIAS + && (bx) <= SP_EMAX + 1 + SP_EBIAS); + assert(((m) >> SP_FBITS) == 0); + + r.sign = s; + r.bexp = bx; + r.mant = m; + + return r; +} + +extern union ieee754sp __cold ieee754sp_nanxcpt(union ieee754sp); +extern union ieee754sp ieee754sp_format(int, int, unsigned); diff --git a/arch/mips/math-emu/me-debugfs.c b/arch/mips/math-emu/me-debugfs.c new file mode 100644 index 000000000..d5ad76b2b --- /dev/null +++ b/arch/mips/math-emu/me-debugfs.c @@ -0,0 +1,353 @@ +// SPDX-License-Identifier: GPL-2.0 +#include <linux/cpumask.h> +#include <linux/debugfs.h> +#include <linux/fs.h> +#include <linux/init.h> +#include <linux/percpu.h> +#include <linux/types.h> +#include <asm/debug.h> +#include <asm/fpu_emulator.h> +#include <asm/local.h> + +DEFINE_PER_CPU(struct mips_fpu_emulator_stats, fpuemustats); + +static int fpuemu_stat_get(void *data, u64 *val) +{ + int cpu; + unsigned long sum = 0; + + for_each_online_cpu(cpu) { + struct mips_fpu_emulator_stats *ps; + local_t *pv; + + ps = &per_cpu(fpuemustats, cpu); + pv = (void *)ps + (unsigned long)data; + sum += local_read(pv); + } + *val = sum; + return 0; +} +DEFINE_SIMPLE_ATTRIBUTE(fops_fpuemu_stat, fpuemu_stat_get, NULL, "%llu\n"); + +/* + * Used to obtain names for a debugfs instruction counter, given field name + * in fpuemustats structure. For example, for input "cmp_sueq_d", the output + * would be "cmp.sueq.d". This is needed since dots are not allowed to be + * used in structure field names, and are, on the other hand, desired to be + * used in debugfs item names to be clearly associated to corresponding + * MIPS FPU instructions. + */ +static void adjust_instruction_counter_name(char *out_name, char *in_name) +{ + int i = 0; + + strcpy(out_name, in_name); + while (in_name[i] != '\0') { + if (out_name[i] == '_') + out_name[i] = '.'; + i++; + } +} + +static int fpuemustats_clear_show(struct seq_file *s, void *unused) +{ + __this_cpu_write((fpuemustats).emulated, 0); + __this_cpu_write((fpuemustats).loads, 0); + __this_cpu_write((fpuemustats).stores, 0); + __this_cpu_write((fpuemustats).branches, 0); + __this_cpu_write((fpuemustats).cp1ops, 0); + __this_cpu_write((fpuemustats).cp1xops, 0); + __this_cpu_write((fpuemustats).errors, 0); + __this_cpu_write((fpuemustats).ieee754_inexact, 0); + __this_cpu_write((fpuemustats).ieee754_underflow, 0); + __this_cpu_write((fpuemustats).ieee754_overflow, 0); + __this_cpu_write((fpuemustats).ieee754_zerodiv, 0); + __this_cpu_write((fpuemustats).ieee754_invalidop, 0); + __this_cpu_write((fpuemustats).ds_emul, 0); + + __this_cpu_write((fpuemustats).abs_s, 0); + __this_cpu_write((fpuemustats).abs_d, 0); + __this_cpu_write((fpuemustats).add_s, 0); + __this_cpu_write((fpuemustats).add_d, 0); + __this_cpu_write((fpuemustats).bc1eqz, 0); + __this_cpu_write((fpuemustats).bc1nez, 0); + __this_cpu_write((fpuemustats).ceil_w_s, 0); + __this_cpu_write((fpuemustats).ceil_w_d, 0); + __this_cpu_write((fpuemustats).ceil_l_s, 0); + __this_cpu_write((fpuemustats).ceil_l_d, 0); + __this_cpu_write((fpuemustats).class_s, 0); + __this_cpu_write((fpuemustats).class_d, 0); + __this_cpu_write((fpuemustats).cmp_af_s, 0); + __this_cpu_write((fpuemustats).cmp_af_d, 0); + __this_cpu_write((fpuemustats).cmp_eq_s, 0); + __this_cpu_write((fpuemustats).cmp_eq_d, 0); + __this_cpu_write((fpuemustats).cmp_le_s, 0); + __this_cpu_write((fpuemustats).cmp_le_d, 0); + __this_cpu_write((fpuemustats).cmp_lt_s, 0); + __this_cpu_write((fpuemustats).cmp_lt_d, 0); + __this_cpu_write((fpuemustats).cmp_ne_s, 0); + __this_cpu_write((fpuemustats).cmp_ne_d, 0); + __this_cpu_write((fpuemustats).cmp_or_s, 0); + __this_cpu_write((fpuemustats).cmp_or_d, 0); + __this_cpu_write((fpuemustats).cmp_ueq_s, 0); + __this_cpu_write((fpuemustats).cmp_ueq_d, 0); + __this_cpu_write((fpuemustats).cmp_ule_s, 0); + __this_cpu_write((fpuemustats).cmp_ule_d, 0); + __this_cpu_write((fpuemustats).cmp_ult_s, 0); + __this_cpu_write((fpuemustats).cmp_ult_d, 0); + __this_cpu_write((fpuemustats).cmp_un_s, 0); + __this_cpu_write((fpuemustats).cmp_un_d, 0); + __this_cpu_write((fpuemustats).cmp_une_s, 0); + __this_cpu_write((fpuemustats).cmp_une_d, 0); + __this_cpu_write((fpuemustats).cmp_saf_s, 0); + __this_cpu_write((fpuemustats).cmp_saf_d, 0); + __this_cpu_write((fpuemustats).cmp_seq_s, 0); + __this_cpu_write((fpuemustats).cmp_seq_d, 0); + __this_cpu_write((fpuemustats).cmp_sle_s, 0); + __this_cpu_write((fpuemustats).cmp_sle_d, 0); + __this_cpu_write((fpuemustats).cmp_slt_s, 0); + __this_cpu_write((fpuemustats).cmp_slt_d, 0); + __this_cpu_write((fpuemustats).cmp_sne_s, 0); + __this_cpu_write((fpuemustats).cmp_sne_d, 0); + __this_cpu_write((fpuemustats).cmp_sor_s, 0); + __this_cpu_write((fpuemustats).cmp_sor_d, 0); + __this_cpu_write((fpuemustats).cmp_sueq_s, 0); + __this_cpu_write((fpuemustats).cmp_sueq_d, 0); + __this_cpu_write((fpuemustats).cmp_sule_s, 0); + __this_cpu_write((fpuemustats).cmp_sule_d, 0); + __this_cpu_write((fpuemustats).cmp_sult_s, 0); + __this_cpu_write((fpuemustats).cmp_sult_d, 0); + __this_cpu_write((fpuemustats).cmp_sun_s, 0); + __this_cpu_write((fpuemustats).cmp_sun_d, 0); + __this_cpu_write((fpuemustats).cmp_sune_s, 0); + __this_cpu_write((fpuemustats).cmp_sune_d, 0); + __this_cpu_write((fpuemustats).cvt_d_l, 0); + __this_cpu_write((fpuemustats).cvt_d_s, 0); + __this_cpu_write((fpuemustats).cvt_d_w, 0); + __this_cpu_write((fpuemustats).cvt_l_s, 0); + __this_cpu_write((fpuemustats).cvt_l_d, 0); + __this_cpu_write((fpuemustats).cvt_s_d, 0); + __this_cpu_write((fpuemustats).cvt_s_l, 0); + __this_cpu_write((fpuemustats).cvt_s_w, 0); + __this_cpu_write((fpuemustats).cvt_w_s, 0); + __this_cpu_write((fpuemustats).cvt_w_d, 0); + __this_cpu_write((fpuemustats).div_s, 0); + __this_cpu_write((fpuemustats).div_d, 0); + __this_cpu_write((fpuemustats).floor_w_s, 0); + __this_cpu_write((fpuemustats).floor_w_d, 0); + __this_cpu_write((fpuemustats).floor_l_s, 0); + __this_cpu_write((fpuemustats).floor_l_d, 0); + __this_cpu_write((fpuemustats).maddf_s, 0); + __this_cpu_write((fpuemustats).maddf_d, 0); + __this_cpu_write((fpuemustats).max_s, 0); + __this_cpu_write((fpuemustats).max_d, 0); + __this_cpu_write((fpuemustats).maxa_s, 0); + __this_cpu_write((fpuemustats).maxa_d, 0); + __this_cpu_write((fpuemustats).min_s, 0); + __this_cpu_write((fpuemustats).min_d, 0); + __this_cpu_write((fpuemustats).mina_s, 0); + __this_cpu_write((fpuemustats).mina_d, 0); + __this_cpu_write((fpuemustats).mov_s, 0); + __this_cpu_write((fpuemustats).mov_d, 0); + __this_cpu_write((fpuemustats).msubf_s, 0); + __this_cpu_write((fpuemustats).msubf_d, 0); + __this_cpu_write((fpuemustats).mul_s, 0); + __this_cpu_write((fpuemustats).mul_d, 0); + __this_cpu_write((fpuemustats).neg_s, 0); + __this_cpu_write((fpuemustats).neg_d, 0); + __this_cpu_write((fpuemustats).recip_s, 0); + __this_cpu_write((fpuemustats).recip_d, 0); + __this_cpu_write((fpuemustats).rint_s, 0); + __this_cpu_write((fpuemustats).rint_d, 0); + __this_cpu_write((fpuemustats).round_w_s, 0); + __this_cpu_write((fpuemustats).round_w_d, 0); + __this_cpu_write((fpuemustats).round_l_s, 0); + __this_cpu_write((fpuemustats).round_l_d, 0); + __this_cpu_write((fpuemustats).rsqrt_s, 0); + __this_cpu_write((fpuemustats).rsqrt_d, 0); + __this_cpu_write((fpuemustats).sel_s, 0); + __this_cpu_write((fpuemustats).sel_d, 0); + __this_cpu_write((fpuemustats).seleqz_s, 0); + __this_cpu_write((fpuemustats).seleqz_d, 0); + __this_cpu_write((fpuemustats).selnez_s, 0); + __this_cpu_write((fpuemustats).selnez_d, 0); + __this_cpu_write((fpuemustats).sqrt_s, 0); + __this_cpu_write((fpuemustats).sqrt_d, 0); + __this_cpu_write((fpuemustats).sub_s, 0); + __this_cpu_write((fpuemustats).sub_d, 0); + __this_cpu_write((fpuemustats).trunc_w_s, 0); + __this_cpu_write((fpuemustats).trunc_w_d, 0); + __this_cpu_write((fpuemustats).trunc_l_s, 0); + __this_cpu_write((fpuemustats).trunc_l_d, 0); + + return 0; +} + +DEFINE_SHOW_ATTRIBUTE(fpuemustats_clear); + +static int __init debugfs_fpuemu(void) +{ + struct dentry *fpuemu_debugfs_base_dir; + struct dentry *fpuemu_debugfs_inst_dir; + char name[32]; + + fpuemu_debugfs_base_dir = debugfs_create_dir("fpuemustats", + mips_debugfs_dir); + + debugfs_create_file("fpuemustats_clear", 0444, mips_debugfs_dir, NULL, + &fpuemustats_clear_fops); + +#define FPU_EMU_STAT_OFFSET(m) \ + offsetof(struct mips_fpu_emulator_stats, m) + +#define FPU_STAT_CREATE(m) \ +do { \ + debugfs_create_file(#m, 0444, fpuemu_debugfs_base_dir, \ + (void *)FPU_EMU_STAT_OFFSET(m), \ + &fops_fpuemu_stat); \ +} while (0) + + FPU_STAT_CREATE(emulated); + FPU_STAT_CREATE(loads); + FPU_STAT_CREATE(stores); + FPU_STAT_CREATE(branches); + FPU_STAT_CREATE(cp1ops); + FPU_STAT_CREATE(cp1xops); + FPU_STAT_CREATE(errors); + FPU_STAT_CREATE(ieee754_inexact); + FPU_STAT_CREATE(ieee754_underflow); + FPU_STAT_CREATE(ieee754_overflow); + FPU_STAT_CREATE(ieee754_zerodiv); + FPU_STAT_CREATE(ieee754_invalidop); + FPU_STAT_CREATE(ds_emul); + + fpuemu_debugfs_inst_dir = debugfs_create_dir("instructions", + fpuemu_debugfs_base_dir); + +#define FPU_STAT_CREATE_EX(m) \ +do { \ + adjust_instruction_counter_name(name, #m); \ + \ + debugfs_create_file(name, 0444, fpuemu_debugfs_inst_dir, \ + (void *)FPU_EMU_STAT_OFFSET(m), \ + &fops_fpuemu_stat); \ +} while (0) + + FPU_STAT_CREATE_EX(abs_s); + FPU_STAT_CREATE_EX(abs_d); + FPU_STAT_CREATE_EX(add_s); + FPU_STAT_CREATE_EX(add_d); + FPU_STAT_CREATE_EX(bc1eqz); + FPU_STAT_CREATE_EX(bc1nez); + FPU_STAT_CREATE_EX(ceil_w_s); + FPU_STAT_CREATE_EX(ceil_w_d); + FPU_STAT_CREATE_EX(ceil_l_s); + FPU_STAT_CREATE_EX(ceil_l_d); + FPU_STAT_CREATE_EX(class_s); + FPU_STAT_CREATE_EX(class_d); + FPU_STAT_CREATE_EX(cmp_af_s); + FPU_STAT_CREATE_EX(cmp_af_d); + FPU_STAT_CREATE_EX(cmp_eq_s); + FPU_STAT_CREATE_EX(cmp_eq_d); + FPU_STAT_CREATE_EX(cmp_le_s); + FPU_STAT_CREATE_EX(cmp_le_d); + FPU_STAT_CREATE_EX(cmp_lt_s); + FPU_STAT_CREATE_EX(cmp_lt_d); + FPU_STAT_CREATE_EX(cmp_ne_s); + FPU_STAT_CREATE_EX(cmp_ne_d); + FPU_STAT_CREATE_EX(cmp_or_s); + FPU_STAT_CREATE_EX(cmp_or_d); + FPU_STAT_CREATE_EX(cmp_ueq_s); + FPU_STAT_CREATE_EX(cmp_ueq_d); + FPU_STAT_CREATE_EX(cmp_ule_s); + FPU_STAT_CREATE_EX(cmp_ule_d); + FPU_STAT_CREATE_EX(cmp_ult_s); + FPU_STAT_CREATE_EX(cmp_ult_d); + FPU_STAT_CREATE_EX(cmp_un_s); + FPU_STAT_CREATE_EX(cmp_un_d); + FPU_STAT_CREATE_EX(cmp_une_s); + FPU_STAT_CREATE_EX(cmp_une_d); + FPU_STAT_CREATE_EX(cmp_saf_s); + FPU_STAT_CREATE_EX(cmp_saf_d); + FPU_STAT_CREATE_EX(cmp_seq_s); + FPU_STAT_CREATE_EX(cmp_seq_d); + FPU_STAT_CREATE_EX(cmp_sle_s); + FPU_STAT_CREATE_EX(cmp_sle_d); + FPU_STAT_CREATE_EX(cmp_slt_s); + FPU_STAT_CREATE_EX(cmp_slt_d); + FPU_STAT_CREATE_EX(cmp_sne_s); + FPU_STAT_CREATE_EX(cmp_sne_d); + FPU_STAT_CREATE_EX(cmp_sor_s); + FPU_STAT_CREATE_EX(cmp_sor_d); + FPU_STAT_CREATE_EX(cmp_sueq_s); + FPU_STAT_CREATE_EX(cmp_sueq_d); + FPU_STAT_CREATE_EX(cmp_sule_s); + FPU_STAT_CREATE_EX(cmp_sule_d); + FPU_STAT_CREATE_EX(cmp_sult_s); + FPU_STAT_CREATE_EX(cmp_sult_d); + FPU_STAT_CREATE_EX(cmp_sun_s); + FPU_STAT_CREATE_EX(cmp_sun_d); + FPU_STAT_CREATE_EX(cmp_sune_s); + FPU_STAT_CREATE_EX(cmp_sune_d); + FPU_STAT_CREATE_EX(cvt_d_l); + FPU_STAT_CREATE_EX(cvt_d_s); + FPU_STAT_CREATE_EX(cvt_d_w); + FPU_STAT_CREATE_EX(cvt_l_s); + FPU_STAT_CREATE_EX(cvt_l_d); + FPU_STAT_CREATE_EX(cvt_s_d); + FPU_STAT_CREATE_EX(cvt_s_l); + FPU_STAT_CREATE_EX(cvt_s_w); + FPU_STAT_CREATE_EX(cvt_w_s); + FPU_STAT_CREATE_EX(cvt_w_d); + FPU_STAT_CREATE_EX(div_s); + FPU_STAT_CREATE_EX(div_d); + FPU_STAT_CREATE_EX(floor_w_s); + FPU_STAT_CREATE_EX(floor_w_d); + FPU_STAT_CREATE_EX(floor_l_s); + FPU_STAT_CREATE_EX(floor_l_d); + FPU_STAT_CREATE_EX(maddf_s); + FPU_STAT_CREATE_EX(maddf_d); + FPU_STAT_CREATE_EX(max_s); + FPU_STAT_CREATE_EX(max_d); + FPU_STAT_CREATE_EX(maxa_s); + FPU_STAT_CREATE_EX(maxa_d); + FPU_STAT_CREATE_EX(min_s); + FPU_STAT_CREATE_EX(min_d); + FPU_STAT_CREATE_EX(mina_s); + FPU_STAT_CREATE_EX(mina_d); + FPU_STAT_CREATE_EX(mov_s); + FPU_STAT_CREATE_EX(mov_d); + FPU_STAT_CREATE_EX(msubf_s); + FPU_STAT_CREATE_EX(msubf_d); + FPU_STAT_CREATE_EX(mul_s); + FPU_STAT_CREATE_EX(mul_d); + FPU_STAT_CREATE_EX(neg_s); + FPU_STAT_CREATE_EX(neg_d); + FPU_STAT_CREATE_EX(recip_s); + FPU_STAT_CREATE_EX(recip_d); + FPU_STAT_CREATE_EX(rint_s); + FPU_STAT_CREATE_EX(rint_d); + FPU_STAT_CREATE_EX(round_w_s); + FPU_STAT_CREATE_EX(round_w_d); + FPU_STAT_CREATE_EX(round_l_s); + FPU_STAT_CREATE_EX(round_l_d); + FPU_STAT_CREATE_EX(rsqrt_s); + FPU_STAT_CREATE_EX(rsqrt_d); + FPU_STAT_CREATE_EX(sel_s); + FPU_STAT_CREATE_EX(sel_d); + FPU_STAT_CREATE_EX(seleqz_s); + FPU_STAT_CREATE_EX(seleqz_d); + FPU_STAT_CREATE_EX(selnez_s); + FPU_STAT_CREATE_EX(selnez_d); + FPU_STAT_CREATE_EX(sqrt_s); + FPU_STAT_CREATE_EX(sqrt_d); + FPU_STAT_CREATE_EX(sub_s); + FPU_STAT_CREATE_EX(sub_d); + FPU_STAT_CREATE_EX(trunc_w_s); + FPU_STAT_CREATE_EX(trunc_w_d); + FPU_STAT_CREATE_EX(trunc_l_s); + FPU_STAT_CREATE_EX(trunc_l_d); + + return 0; +} +arch_initcall(debugfs_fpuemu); diff --git a/arch/mips/math-emu/sp_2008class.c b/arch/mips/math-emu/sp_2008class.c new file mode 100644 index 000000000..b9adab6c2 --- /dev/null +++ b/arch/mips/math-emu/sp_2008class.c @@ -0,0 +1,52 @@ +// SPDX-License-Identifier: GPL-2.0-only +/* + * IEEE754 floating point arithmetic + * single precision: CLASS.f + * FPR[fd] = class(FPR[fs]) + * + * MIPS floating point support + * Copyright (C) 2015 Imagination Technologies, Ltd. + * Author: Markos Chandras <markos.chandras@imgtec.com> + */ + +#include "ieee754sp.h" + +int ieee754sp_2008class(union ieee754sp x) +{ + COMPXSP; + + EXPLODEXSP; + + /* + * 10 bit mask as follows: + * + * bit0 = SNAN + * bit1 = QNAN + * bit2 = -INF + * bit3 = -NORM + * bit4 = -DNORM + * bit5 = -ZERO + * bit6 = INF + * bit7 = NORM + * bit8 = DNORM + * bit9 = ZERO + */ + + switch(xc) { + case IEEE754_CLASS_SNAN: + return 0x01; + case IEEE754_CLASS_QNAN: + return 0x02; + case IEEE754_CLASS_INF: + return 0x04 << (xs ? 0 : 4); + case IEEE754_CLASS_NORM: + return 0x08 << (xs ? 0 : 4); + case IEEE754_CLASS_DNORM: + return 0x10 << (xs ? 0 : 4); + case IEEE754_CLASS_ZERO: + return 0x20 << (xs ? 0 : 4); + default: + pr_err("Unknown class: %d\n", xc); + return 0; + } +} diff --git a/arch/mips/math-emu/sp_add.c b/arch/mips/math-emu/sp_add.c new file mode 100644 index 000000000..715cd0534 --- /dev/null +++ b/arch/mips/math-emu/sp_add.c @@ -0,0 +1,164 @@ +// SPDX-License-Identifier: GPL-2.0-only +/* IEEE754 floating point arithmetic + * single precision + */ +/* + * MIPS floating point support + * Copyright (C) 1994-2000 Algorithmics Ltd. + */ + +#include "ieee754sp.h" + +union ieee754sp ieee754sp_add(union ieee754sp x, union ieee754sp y) +{ + int s; + + COMPXSP; + COMPYSP; + + EXPLODEXSP; + EXPLODEYSP; + + ieee754_clearcx(); + + FLUSHXSP; + FLUSHYSP; + + switch (CLPAIR(xc, yc)) { + case CLPAIR(IEEE754_CLASS_QNAN, IEEE754_CLASS_SNAN): + case CLPAIR(IEEE754_CLASS_ZERO, IEEE754_CLASS_SNAN): + case CLPAIR(IEEE754_CLASS_NORM, IEEE754_CLASS_SNAN): + case CLPAIR(IEEE754_CLASS_DNORM, IEEE754_CLASS_SNAN): + case CLPAIR(IEEE754_CLASS_INF, IEEE754_CLASS_SNAN): + return ieee754sp_nanxcpt(y); + + case CLPAIR(IEEE754_CLASS_SNAN, IEEE754_CLASS_SNAN): + case CLPAIR(IEEE754_CLASS_SNAN, IEEE754_CLASS_QNAN): + case CLPAIR(IEEE754_CLASS_SNAN, IEEE754_CLASS_ZERO): + case CLPAIR(IEEE754_CLASS_SNAN, IEEE754_CLASS_NORM): + case CLPAIR(IEEE754_CLASS_SNAN, IEEE754_CLASS_DNORM): + case CLPAIR(IEEE754_CLASS_SNAN, IEEE754_CLASS_INF): + return ieee754sp_nanxcpt(x); + + case CLPAIR(IEEE754_CLASS_ZERO, IEEE754_CLASS_QNAN): + case CLPAIR(IEEE754_CLASS_NORM, IEEE754_CLASS_QNAN): + case CLPAIR(IEEE754_CLASS_DNORM, IEEE754_CLASS_QNAN): + case CLPAIR(IEEE754_CLASS_INF, IEEE754_CLASS_QNAN): + return y; + + case CLPAIR(IEEE754_CLASS_QNAN, IEEE754_CLASS_QNAN): + case CLPAIR(IEEE754_CLASS_QNAN, IEEE754_CLASS_ZERO): + case CLPAIR(IEEE754_CLASS_QNAN, IEEE754_CLASS_NORM): + case CLPAIR(IEEE754_CLASS_QNAN, IEEE754_CLASS_DNORM): + case CLPAIR(IEEE754_CLASS_QNAN, IEEE754_CLASS_INF): + return x; + + + /* + * Infinity handling + */ + case CLPAIR(IEEE754_CLASS_INF, IEEE754_CLASS_INF): + if (xs == ys) + return x; + ieee754_setcx(IEEE754_INVALID_OPERATION); + return ieee754sp_indef(); + + case CLPAIR(IEEE754_CLASS_NORM, IEEE754_CLASS_INF): + case CLPAIR(IEEE754_CLASS_ZERO, IEEE754_CLASS_INF): + case CLPAIR(IEEE754_CLASS_DNORM, IEEE754_CLASS_INF): + return y; + + case CLPAIR(IEEE754_CLASS_INF, IEEE754_CLASS_ZERO): + case CLPAIR(IEEE754_CLASS_INF, IEEE754_CLASS_NORM): + case CLPAIR(IEEE754_CLASS_INF, IEEE754_CLASS_DNORM): + return x; + + /* + * Zero handling + */ + case CLPAIR(IEEE754_CLASS_ZERO, IEEE754_CLASS_ZERO): + if (xs == ys) + return x; + else + return ieee754sp_zero(ieee754_csr.rm == FPU_CSR_RD); + + case CLPAIR(IEEE754_CLASS_NORM, IEEE754_CLASS_ZERO): + case CLPAIR(IEEE754_CLASS_DNORM, IEEE754_CLASS_ZERO): + return x; + + case CLPAIR(IEEE754_CLASS_ZERO, IEEE754_CLASS_NORM): + case CLPAIR(IEEE754_CLASS_ZERO, IEEE754_CLASS_DNORM): + return y; + + case CLPAIR(IEEE754_CLASS_DNORM, IEEE754_CLASS_DNORM): + SPDNORMX; + fallthrough; + case CLPAIR(IEEE754_CLASS_NORM, IEEE754_CLASS_DNORM): + SPDNORMY; + break; + + case CLPAIR(IEEE754_CLASS_DNORM, IEEE754_CLASS_NORM): + SPDNORMX; + break; + + case CLPAIR(IEEE754_CLASS_NORM, IEEE754_CLASS_NORM): + break; + } + assert(xm & SP_HIDDEN_BIT); + assert(ym & SP_HIDDEN_BIT); + + /* + * Provide guard, round and stick bit space. + */ + xm <<= 3; + ym <<= 3; + + if (xe > ye) { + /* + * Have to shift y fraction right to align. + */ + s = xe - ye; + ym = XSPSRS(ym, s); + ye += s; + } else if (ye > xe) { + /* + * Have to shift x fraction right to align. + */ + s = ye - xe; + xm = XSPSRS(xm, s); + xe += s; + } + assert(xe == ye); + assert(xe <= SP_EMAX); + + if (xs == ys) { + /* + * Generate 28 bit result of adding two 27 bit numbers + * leaving result in xm, xs and xe. + */ + xm = xm + ym; + + if (xm >> (SP_FBITS + 1 + 3)) { /* carry out */ + SPXSRSX1(); + } + } else { + if (xm >= ym) { + xm = xm - ym; + } else { + xm = ym - xm; + xs = ys; + } + if (xm == 0) + return ieee754sp_zero(ieee754_csr.rm == FPU_CSR_RD); + + /* + * Normalize in extended single precision + */ + while ((xm >> (SP_FBITS + 3)) == 0) { + xm <<= 1; + xe--; + } + } + + return ieee754sp_format(xs, xe, xm); +} diff --git a/arch/mips/math-emu/sp_cmp.c b/arch/mips/math-emu/sp_cmp.c new file mode 100644 index 000000000..64a37362a --- /dev/null +++ b/arch/mips/math-emu/sp_cmp.c @@ -0,0 +1,47 @@ +// SPDX-License-Identifier: GPL-2.0-only +/* IEEE754 floating point arithmetic + * single precision + */ +/* + * MIPS floating point support + * Copyright (C) 1994-2000 Algorithmics Ltd. + */ + +#include "ieee754sp.h" + +int ieee754sp_cmp(union ieee754sp x, union ieee754sp y, int cmp, int sig) +{ + int vx; + int vy; + + COMPXSP; + COMPYSP; + + EXPLODEXSP; + EXPLODEYSP; + FLUSHXSP; + FLUSHYSP; + ieee754_clearcx(); /* Even clear inexact flag here */ + + if (ieee754_class_nan(xc) || ieee754_class_nan(yc)) { + if (sig || + xc == IEEE754_CLASS_SNAN || yc == IEEE754_CLASS_SNAN) + ieee754_setcx(IEEE754_INVALID_OPERATION); + return (cmp & IEEE754_CUN) != 0; + } else { + vx = x.bits; + vy = y.bits; + + if (vx < 0) + vx = -vx ^ SP_SIGN_BIT; + if (vy < 0) + vy = -vy ^ SP_SIGN_BIT; + + if (vx < vy) + return (cmp & IEEE754_CLT) != 0; + else if (vx == vy) + return (cmp & IEEE754_CEQ) != 0; + else + return (cmp & IEEE754_CGT) != 0; + } +} diff --git a/arch/mips/math-emu/sp_div.c b/arch/mips/math-emu/sp_div.c new file mode 100644 index 000000000..2bfa266fd --- /dev/null +++ b/arch/mips/math-emu/sp_div.c @@ -0,0 +1,142 @@ +// SPDX-License-Identifier: GPL-2.0-only +/* IEEE754 floating point arithmetic + * single precision + */ +/* + * MIPS floating point support + * Copyright (C) 1994-2000 Algorithmics Ltd. + */ + +#include "ieee754sp.h" + +union ieee754sp ieee754sp_div(union ieee754sp x, union ieee754sp y) +{ + unsigned int rm; + int re; + unsigned int bm; + + COMPXSP; + COMPYSP; + + EXPLODEXSP; + EXPLODEYSP; + + ieee754_clearcx(); + + FLUSHXSP; + FLUSHYSP; + + switch (CLPAIR(xc, yc)) { + case CLPAIR(IEEE754_CLASS_QNAN, IEEE754_CLASS_SNAN): + case CLPAIR(IEEE754_CLASS_ZERO, IEEE754_CLASS_SNAN): + case CLPAIR(IEEE754_CLASS_NORM, IEEE754_CLASS_SNAN): + case CLPAIR(IEEE754_CLASS_DNORM, IEEE754_CLASS_SNAN): + case CLPAIR(IEEE754_CLASS_INF, IEEE754_CLASS_SNAN): + return ieee754sp_nanxcpt(y); + + case CLPAIR(IEEE754_CLASS_SNAN, IEEE754_CLASS_SNAN): + case CLPAIR(IEEE754_CLASS_SNAN, IEEE754_CLASS_QNAN): + case CLPAIR(IEEE754_CLASS_SNAN, IEEE754_CLASS_ZERO): + case CLPAIR(IEEE754_CLASS_SNAN, IEEE754_CLASS_NORM): + case CLPAIR(IEEE754_CLASS_SNAN, IEEE754_CLASS_DNORM): + case CLPAIR(IEEE754_CLASS_SNAN, IEEE754_CLASS_INF): + return ieee754sp_nanxcpt(x); + + case CLPAIR(IEEE754_CLASS_ZERO, IEEE754_CLASS_QNAN): + case CLPAIR(IEEE754_CLASS_NORM, IEEE754_CLASS_QNAN): + case CLPAIR(IEEE754_CLASS_DNORM, IEEE754_CLASS_QNAN): + case CLPAIR(IEEE754_CLASS_INF, IEEE754_CLASS_QNAN): + return y; + + case CLPAIR(IEEE754_CLASS_QNAN, IEEE754_CLASS_QNAN): + case CLPAIR(IEEE754_CLASS_QNAN, IEEE754_CLASS_ZERO): + case CLPAIR(IEEE754_CLASS_QNAN, IEEE754_CLASS_NORM): + case CLPAIR(IEEE754_CLASS_QNAN, IEEE754_CLASS_DNORM): + case CLPAIR(IEEE754_CLASS_QNAN, IEEE754_CLASS_INF): + return x; + + + /* + * Infinity handling + */ + case CLPAIR(IEEE754_CLASS_INF, IEEE754_CLASS_INF): + ieee754_setcx(IEEE754_INVALID_OPERATION); + return ieee754sp_indef(); + + case CLPAIR(IEEE754_CLASS_NORM, IEEE754_CLASS_INF): + case CLPAIR(IEEE754_CLASS_ZERO, IEEE754_CLASS_INF): + case CLPAIR(IEEE754_CLASS_DNORM, IEEE754_CLASS_INF): + return ieee754sp_zero(xs ^ ys); + + case CLPAIR(IEEE754_CLASS_INF, IEEE754_CLASS_ZERO): + case CLPAIR(IEEE754_CLASS_INF, IEEE754_CLASS_NORM): + case CLPAIR(IEEE754_CLASS_INF, IEEE754_CLASS_DNORM): + return ieee754sp_inf(xs ^ ys); + + /* + * Zero handling + */ + case CLPAIR(IEEE754_CLASS_ZERO, IEEE754_CLASS_ZERO): + ieee754_setcx(IEEE754_INVALID_OPERATION); + return ieee754sp_indef(); + + case CLPAIR(IEEE754_CLASS_NORM, IEEE754_CLASS_ZERO): + case CLPAIR(IEEE754_CLASS_DNORM, IEEE754_CLASS_ZERO): + ieee754_setcx(IEEE754_ZERO_DIVIDE); + return ieee754sp_inf(xs ^ ys); + + case CLPAIR(IEEE754_CLASS_ZERO, IEEE754_CLASS_NORM): + case CLPAIR(IEEE754_CLASS_ZERO, IEEE754_CLASS_DNORM): + return ieee754sp_zero(xs == ys ? 0 : 1); + + case CLPAIR(IEEE754_CLASS_DNORM, IEEE754_CLASS_DNORM): + SPDNORMX; + fallthrough; + case CLPAIR(IEEE754_CLASS_NORM, IEEE754_CLASS_DNORM): + SPDNORMY; + break; + + case CLPAIR(IEEE754_CLASS_DNORM, IEEE754_CLASS_NORM): + SPDNORMX; + break; + + case CLPAIR(IEEE754_CLASS_NORM, IEEE754_CLASS_NORM): + break; + } + assert(xm & SP_HIDDEN_BIT); + assert(ym & SP_HIDDEN_BIT); + + /* provide rounding space */ + xm <<= 3; + ym <<= 3; + + /* now the dirty work */ + + rm = 0; + re = xe - ye; + + for (bm = SP_MBIT(SP_FBITS + 2); bm; bm >>= 1) { + if (xm >= ym) { + xm -= ym; + rm |= bm; + if (xm == 0) + break; + } + xm <<= 1; + } + + rm <<= 1; + if (xm) + rm |= 1; /* have remainder, set sticky */ + + assert(rm); + + /* normalise rm to rounding precision ? + */ + while ((rm >> (SP_FBITS + 3)) == 0) { + rm <<= 1; + re--; + } + + return ieee754sp_format(xs == ys ? 0 : 1, re, rm); +} diff --git a/arch/mips/math-emu/sp_fdp.c b/arch/mips/math-emu/sp_fdp.c new file mode 100644 index 000000000..56417497c --- /dev/null +++ b/arch/mips/math-emu/sp_fdp.c @@ -0,0 +1,73 @@ +// SPDX-License-Identifier: GPL-2.0-only +/* IEEE754 floating point arithmetic + * single precision + */ +/* + * MIPS floating point support + * Copyright (C) 1994-2000 Algorithmics Ltd. + */ + +#include "ieee754sp.h" +#include "ieee754dp.h" + +static inline union ieee754sp ieee754sp_nan_fdp(int xs, u64 xm) +{ + return buildsp(xs, SP_EMAX + 1 + SP_EBIAS, + xm >> (DP_FBITS - SP_FBITS)); +} + +union ieee754sp ieee754sp_fdp(union ieee754dp x) +{ + union ieee754sp y; + u32 rm; + + COMPXDP; + COMPYSP; + + EXPLODEXDP; + + ieee754_clearcx(); + + FLUSHXDP; + + switch (xc) { + case IEEE754_CLASS_SNAN: + x = ieee754dp_nanxcpt(x); + EXPLODEXDP; + fallthrough; + case IEEE754_CLASS_QNAN: + y = ieee754sp_nan_fdp(xs, xm); + if (!ieee754_csr.nan2008) { + EXPLODEYSP; + if (!ieee754_class_nan(yc)) + y = ieee754sp_indef(); + } + return y; + + case IEEE754_CLASS_INF: + return ieee754sp_inf(xs); + + case IEEE754_CLASS_ZERO: + return ieee754sp_zero(xs); + + case IEEE754_CLASS_DNORM: + /* can't possibly be sp representable */ + ieee754_setcx(IEEE754_UNDERFLOW); + ieee754_setcx(IEEE754_INEXACT); + if ((ieee754_csr.rm == FPU_CSR_RU && !xs) || + (ieee754_csr.rm == FPU_CSR_RD && xs)) + return ieee754sp_mind(xs); + return ieee754sp_zero(xs); + + case IEEE754_CLASS_NORM: + break; + } + + /* + * Convert from DP_FBITS to SP_FBITS+3 with sticky right shift. + */ + rm = (xm >> (DP_FBITS - (SP_FBITS + 3))) | + ((xm << (64 - (DP_FBITS - (SP_FBITS + 3)))) != 0); + + return ieee754sp_format(xs, xe, rm); +} diff --git a/arch/mips/math-emu/sp_fint.c b/arch/mips/math-emu/sp_fint.c new file mode 100644 index 000000000..6068e3caa --- /dev/null +++ b/arch/mips/math-emu/sp_fint.c @@ -0,0 +1,53 @@ +// SPDX-License-Identifier: GPL-2.0-only +/* IEEE754 floating point arithmetic + * single precision + */ +/* + * MIPS floating point support + * Copyright (C) 1994-2000 Algorithmics Ltd. + */ + +#include "ieee754sp.h" + +union ieee754sp ieee754sp_fint(int x) +{ + unsigned int xm; + int xe; + int xs; + + ieee754_clearcx(); + + if (x == 0) + return ieee754sp_zero(0); + if (x == 1 || x == -1) + return ieee754sp_one(x < 0); + if (x == 10 || x == -10) + return ieee754sp_ten(x < 0); + + xs = (x < 0); + if (xs) { + if (x == (1 << 31)) + xm = ((unsigned) 1 << 31); /* max neg can't be safely negated */ + else + xm = -x; + } else { + xm = x; + } + xe = SP_FBITS + 3; + + if (xm >> (SP_FBITS + 1 + 3)) { + /* shunt out overflow bits + */ + while (xm >> (SP_FBITS + 1 + 3)) { + SPXSRSX1(); + } + } else { + /* normalize in grs extended single precision + */ + while ((xm >> (SP_FBITS + 3)) == 0) { + xm <<= 1; + xe--; + } + } + return ieee754sp_format(xs, xe, xm); +} diff --git a/arch/mips/math-emu/sp_flong.c b/arch/mips/math-emu/sp_flong.c new file mode 100644 index 000000000..1b223fb5a --- /dev/null +++ b/arch/mips/math-emu/sp_flong.c @@ -0,0 +1,52 @@ +// SPDX-License-Identifier: GPL-2.0-only +/* IEEE754 floating point arithmetic + * single precision + */ +/* + * MIPS floating point support + * Copyright (C) 1994-2000 Algorithmics Ltd. + */ + +#include "ieee754sp.h" + +union ieee754sp ieee754sp_flong(s64 x) +{ + u64 xm; /* <--- need 64-bit mantissa temp */ + int xe; + int xs; + + ieee754_clearcx(); + + if (x == 0) + return ieee754sp_zero(0); + if (x == 1 || x == -1) + return ieee754sp_one(x < 0); + if (x == 10 || x == -10) + return ieee754sp_ten(x < 0); + + xs = (x < 0); + if (xs) { + if (x == (1ULL << 63)) + xm = (1ULL << 63); /* max neg can't be safely negated */ + else + xm = -x; + } else { + xm = x; + } + xe = SP_FBITS + 3; + + if (xm >> (SP_FBITS + 1 + 3)) { + /* shunt out overflow bits + */ + while (xm >> (SP_FBITS + 1 + 3)) { + SPXSRSX1(); + } + } else { + /* normalize in grs extended single precision */ + while ((xm >> (SP_FBITS + 3)) == 0) { + xm <<= 1; + xe--; + } + } + return ieee754sp_format(xs, xe, xm); +} diff --git a/arch/mips/math-emu/sp_fmax.c b/arch/mips/math-emu/sp_fmax.c new file mode 100644 index 000000000..3fb16a1df --- /dev/null +++ b/arch/mips/math-emu/sp_fmax.c @@ -0,0 +1,252 @@ +// SPDX-License-Identifier: GPL-2.0-only +/* + * IEEE754 floating point arithmetic + * single precision: MAX{,A}.f + * MAX : Scalar Floating-Point Maximum + * MAXA: Scalar Floating-Point argument with Maximum Absolute Value + * + * MAX.S : FPR[fd] = maxNum(FPR[fs],FPR[ft]) + * MAXA.S: FPR[fd] = maxNumMag(FPR[fs],FPR[ft]) + * + * MIPS floating point support + * Copyright (C) 2015 Imagination Technologies, Ltd. + * Author: Markos Chandras <markos.chandras@imgtec.com> + */ + +#include "ieee754sp.h" + +union ieee754sp ieee754sp_fmax(union ieee754sp x, union ieee754sp y) +{ + COMPXSP; + COMPYSP; + + EXPLODEXSP; + EXPLODEYSP; + + FLUSHXSP; + FLUSHYSP; + + ieee754_clearcx(); + + switch (CLPAIR(xc, yc)) { + case CLPAIR(IEEE754_CLASS_QNAN, IEEE754_CLASS_SNAN): + case CLPAIR(IEEE754_CLASS_ZERO, IEEE754_CLASS_SNAN): + case CLPAIR(IEEE754_CLASS_NORM, IEEE754_CLASS_SNAN): + case CLPAIR(IEEE754_CLASS_DNORM, IEEE754_CLASS_SNAN): + case CLPAIR(IEEE754_CLASS_INF, IEEE754_CLASS_SNAN): + return ieee754sp_nanxcpt(y); + + case CLPAIR(IEEE754_CLASS_SNAN, IEEE754_CLASS_SNAN): + case CLPAIR(IEEE754_CLASS_SNAN, IEEE754_CLASS_QNAN): + case CLPAIR(IEEE754_CLASS_SNAN, IEEE754_CLASS_ZERO): + case CLPAIR(IEEE754_CLASS_SNAN, IEEE754_CLASS_NORM): + case CLPAIR(IEEE754_CLASS_SNAN, IEEE754_CLASS_DNORM): + case CLPAIR(IEEE754_CLASS_SNAN, IEEE754_CLASS_INF): + return ieee754sp_nanxcpt(x); + + /* + * Quiet NaN handling + */ + + /* + * The case of both inputs quiet NaNs + */ + case CLPAIR(IEEE754_CLASS_QNAN, IEEE754_CLASS_QNAN): + return x; + + /* + * The cases of exactly one input quiet NaN (numbers + * are here preferred as returned values to NaNs) + */ + case CLPAIR(IEEE754_CLASS_ZERO, IEEE754_CLASS_QNAN): + case CLPAIR(IEEE754_CLASS_NORM, IEEE754_CLASS_QNAN): + case CLPAIR(IEEE754_CLASS_DNORM, IEEE754_CLASS_QNAN): + case CLPAIR(IEEE754_CLASS_INF, IEEE754_CLASS_QNAN): + return x; + + case CLPAIR(IEEE754_CLASS_QNAN, IEEE754_CLASS_ZERO): + case CLPAIR(IEEE754_CLASS_QNAN, IEEE754_CLASS_NORM): + case CLPAIR(IEEE754_CLASS_QNAN, IEEE754_CLASS_DNORM): + case CLPAIR(IEEE754_CLASS_QNAN, IEEE754_CLASS_INF): + return y; + + /* + * Infinity and zero handling + */ + case CLPAIR(IEEE754_CLASS_INF, IEEE754_CLASS_ZERO): + case CLPAIR(IEEE754_CLASS_INF, IEEE754_CLASS_NORM): + case CLPAIR(IEEE754_CLASS_INF, IEEE754_CLASS_DNORM): + case CLPAIR(IEEE754_CLASS_NORM, IEEE754_CLASS_ZERO): + case CLPAIR(IEEE754_CLASS_DNORM, IEEE754_CLASS_ZERO): + return xs ? y : x; + + case CLPAIR(IEEE754_CLASS_INF, IEEE754_CLASS_INF): + case CLPAIR(IEEE754_CLASS_NORM, IEEE754_CLASS_INF): + case CLPAIR(IEEE754_CLASS_DNORM, IEEE754_CLASS_INF): + case CLPAIR(IEEE754_CLASS_ZERO, IEEE754_CLASS_INF): + case CLPAIR(IEEE754_CLASS_ZERO, IEEE754_CLASS_NORM): + case CLPAIR(IEEE754_CLASS_ZERO, IEEE754_CLASS_DNORM): + return ys ? x : y; + + case CLPAIR(IEEE754_CLASS_ZERO, IEEE754_CLASS_ZERO): + return ieee754sp_zero(xs & ys); + + case CLPAIR(IEEE754_CLASS_DNORM, IEEE754_CLASS_DNORM): + SPDNORMX; + fallthrough; + case CLPAIR(IEEE754_CLASS_NORM, IEEE754_CLASS_DNORM): + SPDNORMY; + break; + + case CLPAIR(IEEE754_CLASS_DNORM, IEEE754_CLASS_NORM): + SPDNORMX; + } + + /* Finally get to do some computation */ + + assert(xm & SP_HIDDEN_BIT); + assert(ym & SP_HIDDEN_BIT); + + /* Compare signs */ + if (xs > ys) + return y; + else if (xs < ys) + return x; + + /* Signs of inputs are equal, let's compare exponents */ + if (xs == 0) { + /* Inputs are both positive */ + if (xe > ye) + return x; + else if (xe < ye) + return y; + } else { + /* Inputs are both negative */ + if (xe > ye) + return y; + else if (xe < ye) + return x; + } + + /* Signs and exponents of inputs are equal, let's compare mantissas */ + if (xs == 0) { + /* Inputs are both positive, with equal signs and exponents */ + if (xm <= ym) + return y; + return x; + } + /* Inputs are both negative, with equal signs and exponents */ + if (xm <= ym) + return x; + return y; +} + +union ieee754sp ieee754sp_fmaxa(union ieee754sp x, union ieee754sp y) +{ + COMPXSP; + COMPYSP; + + EXPLODEXSP; + EXPLODEYSP; + + FLUSHXSP; + FLUSHYSP; + + ieee754_clearcx(); + + switch (CLPAIR(xc, yc)) { + case CLPAIR(IEEE754_CLASS_QNAN, IEEE754_CLASS_SNAN): + case CLPAIR(IEEE754_CLASS_ZERO, IEEE754_CLASS_SNAN): + case CLPAIR(IEEE754_CLASS_NORM, IEEE754_CLASS_SNAN): + case CLPAIR(IEEE754_CLASS_DNORM, IEEE754_CLASS_SNAN): + case CLPAIR(IEEE754_CLASS_INF, IEEE754_CLASS_SNAN): + return ieee754sp_nanxcpt(y); + + case CLPAIR(IEEE754_CLASS_SNAN, IEEE754_CLASS_SNAN): + case CLPAIR(IEEE754_CLASS_SNAN, IEEE754_CLASS_QNAN): + case CLPAIR(IEEE754_CLASS_SNAN, IEEE754_CLASS_ZERO): + case CLPAIR(IEEE754_CLASS_SNAN, IEEE754_CLASS_NORM): + case CLPAIR(IEEE754_CLASS_SNAN, IEEE754_CLASS_DNORM): + case CLPAIR(IEEE754_CLASS_SNAN, IEEE754_CLASS_INF): + return ieee754sp_nanxcpt(x); + + /* + * Quiet NaN handling + */ + + /* + * The case of both inputs quiet NaNs + */ + case CLPAIR(IEEE754_CLASS_QNAN, IEEE754_CLASS_QNAN): + return x; + + /* + * The cases of exactly one input quiet NaN (numbers + * are here preferred as returned values to NaNs) + */ + case CLPAIR(IEEE754_CLASS_ZERO, IEEE754_CLASS_QNAN): + case CLPAIR(IEEE754_CLASS_NORM, IEEE754_CLASS_QNAN): + case CLPAIR(IEEE754_CLASS_DNORM, IEEE754_CLASS_QNAN): + case CLPAIR(IEEE754_CLASS_INF, IEEE754_CLASS_QNAN): + return x; + + case CLPAIR(IEEE754_CLASS_QNAN, IEEE754_CLASS_ZERO): + case CLPAIR(IEEE754_CLASS_QNAN, IEEE754_CLASS_NORM): + case CLPAIR(IEEE754_CLASS_QNAN, IEEE754_CLASS_DNORM): + case CLPAIR(IEEE754_CLASS_QNAN, IEEE754_CLASS_INF): + return y; + + /* + * Infinity and zero handling + */ + case CLPAIR(IEEE754_CLASS_INF, IEEE754_CLASS_INF): + return ieee754sp_inf(xs & ys); + + case CLPAIR(IEEE754_CLASS_INF, IEEE754_CLASS_ZERO): + case CLPAIR(IEEE754_CLASS_INF, IEEE754_CLASS_NORM): + case CLPAIR(IEEE754_CLASS_INF, IEEE754_CLASS_DNORM): + case CLPAIR(IEEE754_CLASS_NORM, IEEE754_CLASS_ZERO): + case CLPAIR(IEEE754_CLASS_DNORM, IEEE754_CLASS_ZERO): + return x; + + case CLPAIR(IEEE754_CLASS_NORM, IEEE754_CLASS_INF): + case CLPAIR(IEEE754_CLASS_DNORM, IEEE754_CLASS_INF): + case CLPAIR(IEEE754_CLASS_ZERO, IEEE754_CLASS_INF): + case CLPAIR(IEEE754_CLASS_ZERO, IEEE754_CLASS_NORM): + case CLPAIR(IEEE754_CLASS_ZERO, IEEE754_CLASS_DNORM): + return y; + + case CLPAIR(IEEE754_CLASS_ZERO, IEEE754_CLASS_ZERO): + return ieee754sp_zero(xs & ys); + + case CLPAIR(IEEE754_CLASS_DNORM, IEEE754_CLASS_DNORM): + SPDNORMX; + fallthrough; + case CLPAIR(IEEE754_CLASS_NORM, IEEE754_CLASS_DNORM): + SPDNORMY; + break; + + case CLPAIR(IEEE754_CLASS_DNORM, IEEE754_CLASS_NORM): + SPDNORMX; + } + + /* Finally get to do some computation */ + + assert(xm & SP_HIDDEN_BIT); + assert(ym & SP_HIDDEN_BIT); + + /* Compare exponent */ + if (xe > ye) + return x; + else if (xe < ye) + return y; + + /* Compare mantissa */ + if (xm < ym) + return y; + else if (xm > ym) + return x; + else if (xs == 0) + return x; + return y; +} diff --git a/arch/mips/math-emu/sp_fmin.c b/arch/mips/math-emu/sp_fmin.c new file mode 100644 index 000000000..ad2599d4a --- /dev/null +++ b/arch/mips/math-emu/sp_fmin.c @@ -0,0 +1,252 @@ +// SPDX-License-Identifier: GPL-2.0-only +/* + * IEEE754 floating point arithmetic + * single precision: MIN{,A}.f + * MIN : Scalar Floating-Point Minimum + * MINA: Scalar Floating-Point argument with Minimum Absolute Value + * + * MIN.S : FPR[fd] = minNum(FPR[fs],FPR[ft]) + * MINA.S: FPR[fd] = maxNumMag(FPR[fs],FPR[ft]) + * + * MIPS floating point support + * Copyright (C) 2015 Imagination Technologies, Ltd. + * Author: Markos Chandras <markos.chandras@imgtec.com> + */ + +#include "ieee754sp.h" + +union ieee754sp ieee754sp_fmin(union ieee754sp x, union ieee754sp y) +{ + COMPXSP; + COMPYSP; + + EXPLODEXSP; + EXPLODEYSP; + + FLUSHXSP; + FLUSHYSP; + + ieee754_clearcx(); + + switch (CLPAIR(xc, yc)) { + case CLPAIR(IEEE754_CLASS_QNAN, IEEE754_CLASS_SNAN): + case CLPAIR(IEEE754_CLASS_ZERO, IEEE754_CLASS_SNAN): + case CLPAIR(IEEE754_CLASS_NORM, IEEE754_CLASS_SNAN): + case CLPAIR(IEEE754_CLASS_DNORM, IEEE754_CLASS_SNAN): + case CLPAIR(IEEE754_CLASS_INF, IEEE754_CLASS_SNAN): + return ieee754sp_nanxcpt(y); + + case CLPAIR(IEEE754_CLASS_SNAN, IEEE754_CLASS_SNAN): + case CLPAIR(IEEE754_CLASS_SNAN, IEEE754_CLASS_QNAN): + case CLPAIR(IEEE754_CLASS_SNAN, IEEE754_CLASS_ZERO): + case CLPAIR(IEEE754_CLASS_SNAN, IEEE754_CLASS_NORM): + case CLPAIR(IEEE754_CLASS_SNAN, IEEE754_CLASS_DNORM): + case CLPAIR(IEEE754_CLASS_SNAN, IEEE754_CLASS_INF): + return ieee754sp_nanxcpt(x); + + /* + * Quiet NaN handling + */ + + /* + * The case of both inputs quiet NaNs + */ + case CLPAIR(IEEE754_CLASS_QNAN, IEEE754_CLASS_QNAN): + return x; + + /* + * The cases of exactly one input quiet NaN (numbers + * are here preferred as returned values to NaNs) + */ + case CLPAIR(IEEE754_CLASS_ZERO, IEEE754_CLASS_QNAN): + case CLPAIR(IEEE754_CLASS_NORM, IEEE754_CLASS_QNAN): + case CLPAIR(IEEE754_CLASS_DNORM, IEEE754_CLASS_QNAN): + case CLPAIR(IEEE754_CLASS_INF, IEEE754_CLASS_QNAN): + return x; + + case CLPAIR(IEEE754_CLASS_QNAN, IEEE754_CLASS_ZERO): + case CLPAIR(IEEE754_CLASS_QNAN, IEEE754_CLASS_NORM): + case CLPAIR(IEEE754_CLASS_QNAN, IEEE754_CLASS_DNORM): + case CLPAIR(IEEE754_CLASS_QNAN, IEEE754_CLASS_INF): + return y; + + /* + * Infinity and zero handling + */ + case CLPAIR(IEEE754_CLASS_INF, IEEE754_CLASS_ZERO): + case CLPAIR(IEEE754_CLASS_INF, IEEE754_CLASS_NORM): + case CLPAIR(IEEE754_CLASS_INF, IEEE754_CLASS_DNORM): + case CLPAIR(IEEE754_CLASS_NORM, IEEE754_CLASS_ZERO): + case CLPAIR(IEEE754_CLASS_DNORM, IEEE754_CLASS_ZERO): + return xs ? x : y; + + case CLPAIR(IEEE754_CLASS_INF, IEEE754_CLASS_INF): + case CLPAIR(IEEE754_CLASS_NORM, IEEE754_CLASS_INF): + case CLPAIR(IEEE754_CLASS_DNORM, IEEE754_CLASS_INF): + case CLPAIR(IEEE754_CLASS_ZERO, IEEE754_CLASS_INF): + case CLPAIR(IEEE754_CLASS_ZERO, IEEE754_CLASS_NORM): + case CLPAIR(IEEE754_CLASS_ZERO, IEEE754_CLASS_DNORM): + return ys ? y : x; + + case CLPAIR(IEEE754_CLASS_ZERO, IEEE754_CLASS_ZERO): + return ieee754sp_zero(xs | ys); + + case CLPAIR(IEEE754_CLASS_DNORM, IEEE754_CLASS_DNORM): + SPDNORMX; + fallthrough; + case CLPAIR(IEEE754_CLASS_NORM, IEEE754_CLASS_DNORM): + SPDNORMY; + break; + + case CLPAIR(IEEE754_CLASS_DNORM, IEEE754_CLASS_NORM): + SPDNORMX; + } + + /* Finally get to do some computation */ + + assert(xm & SP_HIDDEN_BIT); + assert(ym & SP_HIDDEN_BIT); + + /* Compare signs */ + if (xs > ys) + return x; + else if (xs < ys) + return y; + + /* Signs of inputs are the same, let's compare exponents */ + if (xs == 0) { + /* Inputs are both positive */ + if (xe > ye) + return y; + else if (xe < ye) + return x; + } else { + /* Inputs are both negative */ + if (xe > ye) + return x; + else if (xe < ye) + return y; + } + + /* Signs and exponents of inputs are equal, let's compare mantissas */ + if (xs == 0) { + /* Inputs are both positive, with equal signs and exponents */ + if (xm <= ym) + return x; + return y; + } + /* Inputs are both negative, with equal signs and exponents */ + if (xm <= ym) + return y; + return x; +} + +union ieee754sp ieee754sp_fmina(union ieee754sp x, union ieee754sp y) +{ + COMPXSP; + COMPYSP; + + EXPLODEXSP; + EXPLODEYSP; + + FLUSHXSP; + FLUSHYSP; + + ieee754_clearcx(); + + switch (CLPAIR(xc, yc)) { + case CLPAIR(IEEE754_CLASS_QNAN, IEEE754_CLASS_SNAN): + case CLPAIR(IEEE754_CLASS_ZERO, IEEE754_CLASS_SNAN): + case CLPAIR(IEEE754_CLASS_NORM, IEEE754_CLASS_SNAN): + case CLPAIR(IEEE754_CLASS_DNORM, IEEE754_CLASS_SNAN): + case CLPAIR(IEEE754_CLASS_INF, IEEE754_CLASS_SNAN): + return ieee754sp_nanxcpt(y); + + case CLPAIR(IEEE754_CLASS_SNAN, IEEE754_CLASS_SNAN): + case CLPAIR(IEEE754_CLASS_SNAN, IEEE754_CLASS_QNAN): + case CLPAIR(IEEE754_CLASS_SNAN, IEEE754_CLASS_ZERO): + case CLPAIR(IEEE754_CLASS_SNAN, IEEE754_CLASS_NORM): + case CLPAIR(IEEE754_CLASS_SNAN, IEEE754_CLASS_DNORM): + case CLPAIR(IEEE754_CLASS_SNAN, IEEE754_CLASS_INF): + return ieee754sp_nanxcpt(x); + + /* + * Quiet NaN handling + */ + + /* + * The case of both inputs quiet NaNs + */ + case CLPAIR(IEEE754_CLASS_QNAN, IEEE754_CLASS_QNAN): + return x; + + /* + * The cases of exactly one input quiet NaN (numbers + * are here preferred as returned values to NaNs) + */ + case CLPAIR(IEEE754_CLASS_ZERO, IEEE754_CLASS_QNAN): + case CLPAIR(IEEE754_CLASS_NORM, IEEE754_CLASS_QNAN): + case CLPAIR(IEEE754_CLASS_DNORM, IEEE754_CLASS_QNAN): + case CLPAIR(IEEE754_CLASS_INF, IEEE754_CLASS_QNAN): + return x; + + case CLPAIR(IEEE754_CLASS_QNAN, IEEE754_CLASS_ZERO): + case CLPAIR(IEEE754_CLASS_QNAN, IEEE754_CLASS_NORM): + case CLPAIR(IEEE754_CLASS_QNAN, IEEE754_CLASS_DNORM): + case CLPAIR(IEEE754_CLASS_QNAN, IEEE754_CLASS_INF): + return y; + + /* + * Infinity and zero handling + */ + case CLPAIR(IEEE754_CLASS_INF, IEEE754_CLASS_INF): + return ieee754sp_inf(xs | ys); + + case CLPAIR(IEEE754_CLASS_INF, IEEE754_CLASS_ZERO): + case CLPAIR(IEEE754_CLASS_INF, IEEE754_CLASS_NORM): + case CLPAIR(IEEE754_CLASS_INF, IEEE754_CLASS_DNORM): + case CLPAIR(IEEE754_CLASS_NORM, IEEE754_CLASS_ZERO): + case CLPAIR(IEEE754_CLASS_DNORM, IEEE754_CLASS_ZERO): + return y; + + case CLPAIR(IEEE754_CLASS_NORM, IEEE754_CLASS_INF): + case CLPAIR(IEEE754_CLASS_DNORM, IEEE754_CLASS_INF): + case CLPAIR(IEEE754_CLASS_ZERO, IEEE754_CLASS_INF): + case CLPAIR(IEEE754_CLASS_ZERO, IEEE754_CLASS_NORM): + case CLPAIR(IEEE754_CLASS_ZERO, IEEE754_CLASS_DNORM): + return x; + + case CLPAIR(IEEE754_CLASS_ZERO, IEEE754_CLASS_ZERO): + return ieee754sp_zero(xs | ys); + + case CLPAIR(IEEE754_CLASS_DNORM, IEEE754_CLASS_DNORM): + SPDNORMX; + fallthrough; + case CLPAIR(IEEE754_CLASS_NORM, IEEE754_CLASS_DNORM): + SPDNORMY; + break; + + case CLPAIR(IEEE754_CLASS_DNORM, IEEE754_CLASS_NORM): + SPDNORMX; + } + + /* Finally get to do some computation */ + + assert(xm & SP_HIDDEN_BIT); + assert(ym & SP_HIDDEN_BIT); + + /* Compare exponent */ + if (xe > ye) + return y; + else if (xe < ye) + return x; + + /* Compare mantissa */ + if (xm < ym) + return x; + else if (xm > ym) + return y; + else if (xs == 1) + return x; + return y; +} diff --git a/arch/mips/math-emu/sp_maddf.c b/arch/mips/math-emu/sp_maddf.c new file mode 100644 index 000000000..473ee222d --- /dev/null +++ b/arch/mips/math-emu/sp_maddf.c @@ -0,0 +1,278 @@ +// SPDX-License-Identifier: GPL-2.0-only +/* + * IEEE754 floating point arithmetic + * single precision: MADDF.f (Fused Multiply Add) + * MADDF.fmt: FPR[fd] = FPR[fd] + (FPR[fs] x FPR[ft]) + * + * MIPS floating point support + * Copyright (C) 2015 Imagination Technologies, Ltd. + * Author: Markos Chandras <markos.chandras@imgtec.com> + */ + +#include "ieee754sp.h" + + +static union ieee754sp _sp_maddf(union ieee754sp z, union ieee754sp x, + union ieee754sp y, enum maddf_flags flags) +{ + int re; + int rs; + unsigned int rm; + u64 rm64; + u64 zm64; + int s; + + COMPXSP; + COMPYSP; + COMPZSP; + + EXPLODEXSP; + EXPLODEYSP; + EXPLODEZSP; + + FLUSHXSP; + FLUSHYSP; + FLUSHZSP; + + ieee754_clearcx(); + + rs = xs ^ ys; + if (flags & MADDF_NEGATE_PRODUCT) + rs ^= 1; + if (flags & MADDF_NEGATE_ADDITION) + zs ^= 1; + + /* + * Handle the cases when at least one of x, y or z is a NaN. + * Order of precedence is sNaN, qNaN and z, x, y. + */ + if (zc == IEEE754_CLASS_SNAN) + return ieee754sp_nanxcpt(z); + if (xc == IEEE754_CLASS_SNAN) + return ieee754sp_nanxcpt(x); + if (yc == IEEE754_CLASS_SNAN) + return ieee754sp_nanxcpt(y); + if (zc == IEEE754_CLASS_QNAN) + return z; + if (xc == IEEE754_CLASS_QNAN) + return x; + if (yc == IEEE754_CLASS_QNAN) + return y; + + if (zc == IEEE754_CLASS_DNORM) + SPDNORMZ; + /* ZERO z cases are handled separately below */ + + switch (CLPAIR(xc, yc)) { + + + /* + * Infinity handling + */ + case CLPAIR(IEEE754_CLASS_INF, IEEE754_CLASS_ZERO): + case CLPAIR(IEEE754_CLASS_ZERO, IEEE754_CLASS_INF): + ieee754_setcx(IEEE754_INVALID_OPERATION); + return ieee754sp_indef(); + + case CLPAIR(IEEE754_CLASS_NORM, IEEE754_CLASS_INF): + case CLPAIR(IEEE754_CLASS_DNORM, IEEE754_CLASS_INF): + case CLPAIR(IEEE754_CLASS_INF, IEEE754_CLASS_NORM): + case CLPAIR(IEEE754_CLASS_INF, IEEE754_CLASS_DNORM): + case CLPAIR(IEEE754_CLASS_INF, IEEE754_CLASS_INF): + if ((zc == IEEE754_CLASS_INF) && (zs != rs)) { + /* + * Cases of addition of infinities with opposite signs + * or subtraction of infinities with same signs. + */ + ieee754_setcx(IEEE754_INVALID_OPERATION); + return ieee754sp_indef(); + } + /* + * z is here either not an infinity, or an infinity having the + * same sign as product (x*y). The result must be an infinity, + * and its sign is determined only by the sign of product (x*y). + */ + return ieee754sp_inf(rs); + + case CLPAIR(IEEE754_CLASS_ZERO, IEEE754_CLASS_ZERO): + case CLPAIR(IEEE754_CLASS_ZERO, IEEE754_CLASS_NORM): + case CLPAIR(IEEE754_CLASS_ZERO, IEEE754_CLASS_DNORM): + case CLPAIR(IEEE754_CLASS_NORM, IEEE754_CLASS_ZERO): + case CLPAIR(IEEE754_CLASS_DNORM, IEEE754_CLASS_ZERO): + if (zc == IEEE754_CLASS_INF) + return ieee754sp_inf(zs); + if (zc == IEEE754_CLASS_ZERO) { + /* Handle cases +0 + (-0) and similar ones. */ + if (zs == rs) + /* + * Cases of addition of zeros of equal signs + * or subtraction of zeroes of opposite signs. + * The sign of the resulting zero is in any + * such case determined only by the sign of z. + */ + return z; + + return ieee754sp_zero(ieee754_csr.rm == FPU_CSR_RD); + } + /* x*y is here 0, and z is not 0, so just return z */ + return z; + + case CLPAIR(IEEE754_CLASS_DNORM, IEEE754_CLASS_DNORM): + SPDNORMX; + fallthrough; + case CLPAIR(IEEE754_CLASS_NORM, IEEE754_CLASS_DNORM): + if (zc == IEEE754_CLASS_INF) + return ieee754sp_inf(zs); + SPDNORMY; + break; + + case CLPAIR(IEEE754_CLASS_DNORM, IEEE754_CLASS_NORM): + if (zc == IEEE754_CLASS_INF) + return ieee754sp_inf(zs); + SPDNORMX; + break; + + case CLPAIR(IEEE754_CLASS_NORM, IEEE754_CLASS_NORM): + if (zc == IEEE754_CLASS_INF) + return ieee754sp_inf(zs); + /* continue to real computations */ + } + + /* Finally get to do some computation */ + + /* + * Do the multiplication bit first + * + * rm = xm * ym, re = xe + ye basically + * + * At this point xm and ym should have been normalized. + */ + + /* rm = xm * ym, re = xe+ye basically */ + assert(xm & SP_HIDDEN_BIT); + assert(ym & SP_HIDDEN_BIT); + + re = xe + ye; + + /* Multiple 24 bit xm and ym to give 48 bit results */ + rm64 = (uint64_t)xm * ym; + + /* Shunt to top of word */ + rm64 = rm64 << 16; + + /* Put explicit bit at bit 62 if necessary */ + if ((int64_t) rm64 < 0) { + rm64 = rm64 >> 1; + re++; + } + + assert(rm64 & (1 << 62)); + + if (zc == IEEE754_CLASS_ZERO) { + /* + * Move explicit bit from bit 62 to bit 26 since the + * ieee754sp_format code expects the mantissa to be + * 27 bits wide (24 + 3 rounding bits). + */ + rm = XSPSRS64(rm64, (62 - 26)); + return ieee754sp_format(rs, re, rm); + } + + /* Move explicit bit from bit 23 to bit 62 */ + zm64 = (uint64_t)zm << (62 - 23); + assert(zm64 & (1 << 62)); + + /* Make the exponents the same */ + if (ze > re) { + /* + * Have to shift r fraction right to align. + */ + s = ze - re; + rm64 = XSPSRS64(rm64, s); + re += s; + } else if (re > ze) { + /* + * Have to shift z fraction right to align. + */ + s = re - ze; + zm64 = XSPSRS64(zm64, s); + ze += s; + } + assert(ze == re); + assert(ze <= SP_EMAX); + + /* Do the addition */ + if (zs == rs) { + /* + * Generate 64 bit result by adding two 63 bit numbers + * leaving result in zm64, zs and ze. + */ + zm64 = zm64 + rm64; + if ((int64_t)zm64 < 0) { /* carry out */ + zm64 = XSPSRS1(zm64); + ze++; + } + } else { + if (zm64 >= rm64) { + zm64 = zm64 - rm64; + } else { + zm64 = rm64 - zm64; + zs = rs; + } + if (zm64 == 0) + return ieee754sp_zero(ieee754_csr.rm == FPU_CSR_RD); + + /* + * Put explicit bit at bit 62 if necessary. + */ + while ((zm64 >> 62) == 0) { + zm64 <<= 1; + ze--; + } + } + + /* + * Move explicit bit from bit 62 to bit 26 since the + * ieee754sp_format code expects the mantissa to be + * 27 bits wide (24 + 3 rounding bits). + */ + zm = XSPSRS64(zm64, (62 - 26)); + + return ieee754sp_format(zs, ze, zm); +} + +union ieee754sp ieee754sp_maddf(union ieee754sp z, union ieee754sp x, + union ieee754sp y) +{ + return _sp_maddf(z, x, y, 0); +} + +union ieee754sp ieee754sp_msubf(union ieee754sp z, union ieee754sp x, + union ieee754sp y) +{ + return _sp_maddf(z, x, y, MADDF_NEGATE_PRODUCT); +} + +union ieee754sp ieee754sp_madd(union ieee754sp z, union ieee754sp x, + union ieee754sp y) +{ + return _sp_maddf(z, x, y, 0); +} + +union ieee754sp ieee754sp_msub(union ieee754sp z, union ieee754sp x, + union ieee754sp y) +{ + return _sp_maddf(z, x, y, MADDF_NEGATE_ADDITION); +} + +union ieee754sp ieee754sp_nmadd(union ieee754sp z, union ieee754sp x, + union ieee754sp y) +{ + return _sp_maddf(z, x, y, MADDF_NEGATE_PRODUCT|MADDF_NEGATE_ADDITION); +} + +union ieee754sp ieee754sp_nmsub(union ieee754sp z, union ieee754sp x, + union ieee754sp y) +{ + return _sp_maddf(z, x, y, MADDF_NEGATE_PRODUCT); +} diff --git a/arch/mips/math-emu/sp_mul.c b/arch/mips/math-emu/sp_mul.c new file mode 100644 index 000000000..26cfd6302 --- /dev/null +++ b/arch/mips/math-emu/sp_mul.c @@ -0,0 +1,154 @@ +// SPDX-License-Identifier: GPL-2.0-only +/* IEEE754 floating point arithmetic + * single precision + */ +/* + * MIPS floating point support + * Copyright (C) 1994-2000 Algorithmics Ltd. + */ + +#include "ieee754sp.h" + +union ieee754sp ieee754sp_mul(union ieee754sp x, union ieee754sp y) +{ + int re; + int rs; + unsigned int rm; + unsigned short lxm; + unsigned short hxm; + unsigned short lym; + unsigned short hym; + unsigned int lrm; + unsigned int hrm; + unsigned int t; + unsigned int at; + + COMPXSP; + COMPYSP; + + EXPLODEXSP; + EXPLODEYSP; + + ieee754_clearcx(); + + FLUSHXSP; + FLUSHYSP; + + switch (CLPAIR(xc, yc)) { + case CLPAIR(IEEE754_CLASS_QNAN, IEEE754_CLASS_SNAN): + case CLPAIR(IEEE754_CLASS_ZERO, IEEE754_CLASS_SNAN): + case CLPAIR(IEEE754_CLASS_NORM, IEEE754_CLASS_SNAN): + case CLPAIR(IEEE754_CLASS_DNORM, IEEE754_CLASS_SNAN): + case CLPAIR(IEEE754_CLASS_INF, IEEE754_CLASS_SNAN): + return ieee754sp_nanxcpt(y); + + case CLPAIR(IEEE754_CLASS_SNAN, IEEE754_CLASS_SNAN): + case CLPAIR(IEEE754_CLASS_SNAN, IEEE754_CLASS_QNAN): + case CLPAIR(IEEE754_CLASS_SNAN, IEEE754_CLASS_ZERO): + case CLPAIR(IEEE754_CLASS_SNAN, IEEE754_CLASS_NORM): + case CLPAIR(IEEE754_CLASS_SNAN, IEEE754_CLASS_DNORM): + case CLPAIR(IEEE754_CLASS_SNAN, IEEE754_CLASS_INF): + return ieee754sp_nanxcpt(x); + + case CLPAIR(IEEE754_CLASS_ZERO, IEEE754_CLASS_QNAN): + case CLPAIR(IEEE754_CLASS_NORM, IEEE754_CLASS_QNAN): + case CLPAIR(IEEE754_CLASS_DNORM, IEEE754_CLASS_QNAN): + case CLPAIR(IEEE754_CLASS_INF, IEEE754_CLASS_QNAN): + return y; + + case CLPAIR(IEEE754_CLASS_QNAN, IEEE754_CLASS_QNAN): + case CLPAIR(IEEE754_CLASS_QNAN, IEEE754_CLASS_ZERO): + case CLPAIR(IEEE754_CLASS_QNAN, IEEE754_CLASS_NORM): + case CLPAIR(IEEE754_CLASS_QNAN, IEEE754_CLASS_DNORM): + case CLPAIR(IEEE754_CLASS_QNAN, IEEE754_CLASS_INF): + return x; + + + /* + * Infinity handling + */ + case CLPAIR(IEEE754_CLASS_INF, IEEE754_CLASS_ZERO): + case CLPAIR(IEEE754_CLASS_ZERO, IEEE754_CLASS_INF): + ieee754_setcx(IEEE754_INVALID_OPERATION); + return ieee754sp_indef(); + + case CLPAIR(IEEE754_CLASS_NORM, IEEE754_CLASS_INF): + case CLPAIR(IEEE754_CLASS_DNORM, IEEE754_CLASS_INF): + case CLPAIR(IEEE754_CLASS_INF, IEEE754_CLASS_NORM): + case CLPAIR(IEEE754_CLASS_INF, IEEE754_CLASS_DNORM): + case CLPAIR(IEEE754_CLASS_INF, IEEE754_CLASS_INF): + return ieee754sp_inf(xs ^ ys); + + case CLPAIR(IEEE754_CLASS_ZERO, IEEE754_CLASS_ZERO): + case CLPAIR(IEEE754_CLASS_ZERO, IEEE754_CLASS_NORM): + case CLPAIR(IEEE754_CLASS_ZERO, IEEE754_CLASS_DNORM): + case CLPAIR(IEEE754_CLASS_NORM, IEEE754_CLASS_ZERO): + case CLPAIR(IEEE754_CLASS_DNORM, IEEE754_CLASS_ZERO): + return ieee754sp_zero(xs ^ ys); + + + case CLPAIR(IEEE754_CLASS_DNORM, IEEE754_CLASS_DNORM): + SPDNORMX; + fallthrough; + case CLPAIR(IEEE754_CLASS_NORM, IEEE754_CLASS_DNORM): + SPDNORMY; + break; + + case CLPAIR(IEEE754_CLASS_DNORM, IEEE754_CLASS_NORM): + SPDNORMX; + break; + + case CLPAIR(IEEE754_CLASS_NORM, IEEE754_CLASS_NORM): + break; + } + /* rm = xm * ym, re = xe+ye basically */ + assert(xm & SP_HIDDEN_BIT); + assert(ym & SP_HIDDEN_BIT); + + re = xe + ye; + rs = xs ^ ys; + + /* shunt to top of word */ + xm <<= 32 - (SP_FBITS + 1); + ym <<= 32 - (SP_FBITS + 1); + + /* + * Multiply 32 bits xm, ym to give high 32 bits rm with stickness. + */ + lxm = xm & 0xffff; + hxm = xm >> 16; + lym = ym & 0xffff; + hym = ym >> 16; + + lrm = lxm * lym; /* 16 * 16 => 32 */ + hrm = hxm * hym; /* 16 * 16 => 32 */ + + t = lxm * hym; /* 16 * 16 => 32 */ + at = lrm + (t << 16); + hrm += at < lrm; + lrm = at; + hrm = hrm + (t >> 16); + + t = hxm * lym; /* 16 * 16 => 32 */ + at = lrm + (t << 16); + hrm += at < lrm; + lrm = at; + hrm = hrm + (t >> 16); + + rm = hrm | (lrm != 0); + + /* + * Sticky shift down to normal rounding precision. + */ + if ((int) rm < 0) { + rm = (rm >> (32 - (SP_FBITS + 1 + 3))) | + ((rm << (SP_FBITS + 1 + 3)) != 0); + re++; + } else { + rm = (rm >> (32 - (SP_FBITS + 1 + 3 + 1))) | + ((rm << (SP_FBITS + 1 + 3 + 1)) != 0); + } + assert(rm & (SP_HIDDEN_BIT << 3)); + + return ieee754sp_format(rs, re, rm); +} diff --git a/arch/mips/math-emu/sp_rint.c b/arch/mips/math-emu/sp_rint.c new file mode 100644 index 000000000..d5f75fe21 --- /dev/null +++ b/arch/mips/math-emu/sp_rint.c @@ -0,0 +1,79 @@ +// SPDX-License-Identifier: GPL-2.0-only +/* IEEE754 floating point arithmetic + * single precision + */ +/* + * MIPS floating point support + * Copyright (C) 1994-2000 Algorithmics Ltd. + * Copyright (C) 2017 Imagination Technologies, Ltd. + * Author: Aleksandar Markovic <aleksandar.markovic@imgtec.com> + */ + +#include "ieee754sp.h" + +union ieee754sp ieee754sp_rint(union ieee754sp x) +{ + union ieee754sp ret; + u32 residue; + int sticky; + int round; + int odd; + + COMPXDP; /* <-- DP needed for 64-bit mantissa tmp */ + + ieee754_clearcx(); + + EXPLODEXSP; + FLUSHXSP; + + if (xc == IEEE754_CLASS_SNAN) + return ieee754sp_nanxcpt(x); + + if ((xc == IEEE754_CLASS_QNAN) || + (xc == IEEE754_CLASS_INF) || + (xc == IEEE754_CLASS_ZERO)) + return x; + + if (xe >= SP_FBITS) + return x; + + if (xe < -1) { + residue = xm; + round = 0; + sticky = residue != 0; + xm = 0; + } else { + residue = xm << (xe + 1); + residue <<= 31 - SP_FBITS; + round = (residue >> 31) != 0; + sticky = (residue << 1) != 0; + xm >>= SP_FBITS - xe; + } + + odd = (xm & 0x1) != 0x0; + + switch (ieee754_csr.rm) { + case FPU_CSR_RN: /* toward nearest */ + if (round && (sticky || odd)) + xm++; + break; + case FPU_CSR_RZ: /* toward zero */ + break; + case FPU_CSR_RU: /* toward +infinity */ + if ((round || sticky) && !xs) + xm++; + break; + case FPU_CSR_RD: /* toward -infinity */ + if ((round || sticky) && xs) + xm++; + break; + } + + if (round || sticky) + ieee754_setcx(IEEE754_INEXACT); + + ret = ieee754sp_flong(xm); + SPSIGN(ret) = xs; + + return ret; +} diff --git a/arch/mips/math-emu/sp_simple.c b/arch/mips/math-emu/sp_simple.c new file mode 100644 index 000000000..b9e91da7d --- /dev/null +++ b/arch/mips/math-emu/sp_simple.c @@ -0,0 +1,49 @@ +// SPDX-License-Identifier: GPL-2.0-only +/* IEEE754 floating point arithmetic + * single precision + */ +/* + * MIPS floating point support + * Copyright (C) 1994-2000 Algorithmics Ltd. + */ + +#include "ieee754sp.h" + +union ieee754sp ieee754sp_neg(union ieee754sp x) +{ + union ieee754sp y; + + if (ieee754_csr.abs2008) { + y = x; + SPSIGN(y) = !SPSIGN(x); + } else { + unsigned int oldrm; + + oldrm = ieee754_csr.rm; + ieee754_csr.rm = FPU_CSR_RD; + y = ieee754sp_sub(ieee754sp_zero(0), x); + ieee754_csr.rm = oldrm; + } + return y; +} + +union ieee754sp ieee754sp_abs(union ieee754sp x) +{ + union ieee754sp y; + + if (ieee754_csr.abs2008) { + y = x; + SPSIGN(y) = 0; + } else { + unsigned int oldrm; + + oldrm = ieee754_csr.rm; + ieee754_csr.rm = FPU_CSR_RD; + if (SPSIGN(x)) + y = ieee754sp_sub(ieee754sp_zero(0), x); + else + y = ieee754sp_add(ieee754sp_zero(0), x); + ieee754_csr.rm = oldrm; + } + return y; +} diff --git a/arch/mips/math-emu/sp_sqrt.c b/arch/mips/math-emu/sp_sqrt.c new file mode 100644 index 000000000..e9bb60121 --- /dev/null +++ b/arch/mips/math-emu/sp_sqrt.c @@ -0,0 +1,103 @@ +// SPDX-License-Identifier: GPL-2.0-only +/* IEEE754 floating point arithmetic + * single precision square root + */ +/* + * MIPS floating point support + * Copyright (C) 1994-2000 Algorithmics Ltd. + */ + +#include "ieee754sp.h" + +union ieee754sp ieee754sp_sqrt(union ieee754sp x) +{ + int ix, s, q, m, t, i; + unsigned int r; + COMPXSP; + + /* take care of Inf and NaN */ + + EXPLODEXSP; + ieee754_clearcx(); + FLUSHXSP; + + /* x == INF or NAN? */ + switch (xc) { + case IEEE754_CLASS_SNAN: + return ieee754sp_nanxcpt(x); + + case IEEE754_CLASS_QNAN: + /* sqrt(Nan) = Nan */ + return x; + + case IEEE754_CLASS_ZERO: + /* sqrt(0) = 0 */ + return x; + + case IEEE754_CLASS_INF: + if (xs) { + /* sqrt(-Inf) = Nan */ + ieee754_setcx(IEEE754_INVALID_OPERATION); + return ieee754sp_indef(); + } + /* sqrt(+Inf) = Inf */ + return x; + + case IEEE754_CLASS_DNORM: + case IEEE754_CLASS_NORM: + if (xs) { + /* sqrt(-x) = Nan */ + ieee754_setcx(IEEE754_INVALID_OPERATION); + return ieee754sp_indef(); + } + break; + } + + ix = x.bits; + + /* normalize x */ + m = (ix >> 23); + if (m == 0) { /* subnormal x */ + for (i = 0; (ix & 0x00800000) == 0; i++) + ix <<= 1; + m -= i - 1; + } + m -= 127; /* unbias exponent */ + ix = (ix & 0x007fffff) | 0x00800000; + if (m & 1) /* odd m, double x to make it even */ + ix += ix; + m >>= 1; /* m = [m/2] */ + + /* generate sqrt(x) bit by bit */ + ix += ix; + s = 0; + q = 0; /* q = sqrt(x) */ + r = 0x01000000; /* r = moving bit from right to left */ + + while (r != 0) { + t = s + r; + if (t <= ix) { + s = t + r; + ix -= t; + q += r; + } + ix += ix; + r >>= 1; + } + + if (ix != 0) { + ieee754_setcx(IEEE754_INEXACT); + switch (ieee754_csr.rm) { + case FPU_CSR_RU: + q += 2; + break; + case FPU_CSR_RN: + q += (q & 1); + break; + } + } + ix = (q >> 1) + 0x3f000000; + ix += (m << 23); + x.bits = ix; + return x; +} diff --git a/arch/mips/math-emu/sp_sub.c b/arch/mips/math-emu/sp_sub.c new file mode 100644 index 000000000..16c8e9ae6 --- /dev/null +++ b/arch/mips/math-emu/sp_sub.c @@ -0,0 +1,168 @@ +// SPDX-License-Identifier: GPL-2.0-only +/* IEEE754 floating point arithmetic + * single precision + */ +/* + * MIPS floating point support + * Copyright (C) 1994-2000 Algorithmics Ltd. + */ + +#include "ieee754sp.h" + +union ieee754sp ieee754sp_sub(union ieee754sp x, union ieee754sp y) +{ + int s; + + COMPXSP; + COMPYSP; + + EXPLODEXSP; + EXPLODEYSP; + + ieee754_clearcx(); + + FLUSHXSP; + FLUSHYSP; + + switch (CLPAIR(xc, yc)) { + case CLPAIR(IEEE754_CLASS_QNAN, IEEE754_CLASS_SNAN): + case CLPAIR(IEEE754_CLASS_ZERO, IEEE754_CLASS_SNAN): + case CLPAIR(IEEE754_CLASS_NORM, IEEE754_CLASS_SNAN): + case CLPAIR(IEEE754_CLASS_DNORM, IEEE754_CLASS_SNAN): + case CLPAIR(IEEE754_CLASS_INF, IEEE754_CLASS_SNAN): + return ieee754sp_nanxcpt(y); + + case CLPAIR(IEEE754_CLASS_SNAN, IEEE754_CLASS_SNAN): + case CLPAIR(IEEE754_CLASS_SNAN, IEEE754_CLASS_QNAN): + case CLPAIR(IEEE754_CLASS_SNAN, IEEE754_CLASS_ZERO): + case CLPAIR(IEEE754_CLASS_SNAN, IEEE754_CLASS_NORM): + case CLPAIR(IEEE754_CLASS_SNAN, IEEE754_CLASS_DNORM): + case CLPAIR(IEEE754_CLASS_SNAN, IEEE754_CLASS_INF): + return ieee754sp_nanxcpt(x); + + case CLPAIR(IEEE754_CLASS_ZERO, IEEE754_CLASS_QNAN): + case CLPAIR(IEEE754_CLASS_NORM, IEEE754_CLASS_QNAN): + case CLPAIR(IEEE754_CLASS_DNORM, IEEE754_CLASS_QNAN): + case CLPAIR(IEEE754_CLASS_INF, IEEE754_CLASS_QNAN): + return y; + + case CLPAIR(IEEE754_CLASS_QNAN, IEEE754_CLASS_QNAN): + case CLPAIR(IEEE754_CLASS_QNAN, IEEE754_CLASS_ZERO): + case CLPAIR(IEEE754_CLASS_QNAN, IEEE754_CLASS_NORM): + case CLPAIR(IEEE754_CLASS_QNAN, IEEE754_CLASS_DNORM): + case CLPAIR(IEEE754_CLASS_QNAN, IEEE754_CLASS_INF): + return x; + + + /* + * Infinity handling + */ + case CLPAIR(IEEE754_CLASS_INF, IEEE754_CLASS_INF): + if (xs != ys) + return x; + ieee754_setcx(IEEE754_INVALID_OPERATION); + return ieee754sp_indef(); + + case CLPAIR(IEEE754_CLASS_ZERO, IEEE754_CLASS_INF): + case CLPAIR(IEEE754_CLASS_DNORM, IEEE754_CLASS_INF): + case CLPAIR(IEEE754_CLASS_NORM, IEEE754_CLASS_INF): + return ieee754sp_inf(ys ^ 1); + + case CLPAIR(IEEE754_CLASS_INF, IEEE754_CLASS_ZERO): + case CLPAIR(IEEE754_CLASS_INF, IEEE754_CLASS_NORM): + case CLPAIR(IEEE754_CLASS_INF, IEEE754_CLASS_DNORM): + return x; + + /* + * Zero handling + */ + case CLPAIR(IEEE754_CLASS_ZERO, IEEE754_CLASS_ZERO): + if (xs != ys) + return x; + else + return ieee754sp_zero(ieee754_csr.rm == FPU_CSR_RD); + + case CLPAIR(IEEE754_CLASS_NORM, IEEE754_CLASS_ZERO): + case CLPAIR(IEEE754_CLASS_DNORM, IEEE754_CLASS_ZERO): + return x; + + case CLPAIR(IEEE754_CLASS_ZERO, IEEE754_CLASS_NORM): + case CLPAIR(IEEE754_CLASS_ZERO, IEEE754_CLASS_DNORM): + /* quick fix up */ + SPSIGN(y) ^= 1; + return y; + + case CLPAIR(IEEE754_CLASS_DNORM, IEEE754_CLASS_DNORM): + SPDNORMX; + fallthrough; + case CLPAIR(IEEE754_CLASS_NORM, IEEE754_CLASS_DNORM): + SPDNORMY; + break; + + case CLPAIR(IEEE754_CLASS_DNORM, IEEE754_CLASS_NORM): + SPDNORMX; + break; + + case CLPAIR(IEEE754_CLASS_NORM, IEEE754_CLASS_NORM): + break; + } + /* flip sign of y and handle as add */ + ys ^= 1; + + assert(xm & SP_HIDDEN_BIT); + assert(ym & SP_HIDDEN_BIT); + + + /* provide guard,round and stick bit space */ + xm <<= 3; + ym <<= 3; + + if (xe > ye) { + /* + * have to shift y fraction right to align + */ + s = xe - ye; + ym = XSPSRS(ym, s); + ye += s; + } else if (ye > xe) { + /* + * have to shift x fraction right to align + */ + s = ye - xe; + xm = XSPSRS(xm, s); + xe += s; + } + assert(xe == ye); + assert(xe <= SP_EMAX); + + if (xs == ys) { + /* generate 28 bit result of adding two 27 bit numbers + */ + xm = xm + ym; + + if (xm >> (SP_FBITS + 1 + 3)) { /* carry out */ + SPXSRSX1(); /* shift preserving sticky */ + } + } else { + if (xm >= ym) { + xm = xm - ym; + } else { + xm = ym - xm; + xs = ys; + } + if (xm == 0) { + if (ieee754_csr.rm == FPU_CSR_RD) + return ieee754sp_zero(1); /* round negative inf. => sign = -1 */ + else + return ieee754sp_zero(0); /* other round modes => sign = 1 */ + } + /* normalize to rounding precision + */ + while ((xm >> (SP_FBITS + 3)) == 0) { + xm <<= 1; + xe--; + } + } + + return ieee754sp_format(xs, xe, xm); +} diff --git a/arch/mips/math-emu/sp_tint.c b/arch/mips/math-emu/sp_tint.c new file mode 100644 index 000000000..f7a5cf5e1 --- /dev/null +++ b/arch/mips/math-emu/sp_tint.c @@ -0,0 +1,100 @@ +// SPDX-License-Identifier: GPL-2.0-only +/* IEEE754 floating point arithmetic + * single precision + */ +/* + * MIPS floating point support + * Copyright (C) 1994-2000 Algorithmics Ltd. + */ + +#include "ieee754sp.h" + +int ieee754sp_tint(union ieee754sp x) +{ + u32 residue; + int round; + int sticky; + int odd; + + COMPXSP; + + ieee754_clearcx(); + + EXPLODEXSP; + FLUSHXSP; + + switch (xc) { + case IEEE754_CLASS_SNAN: + case IEEE754_CLASS_QNAN: + ieee754_setcx(IEEE754_INVALID_OPERATION); + return ieee754si_indef(); + + case IEEE754_CLASS_INF: + ieee754_setcx(IEEE754_INVALID_OPERATION); + return ieee754si_overflow(xs); + + case IEEE754_CLASS_ZERO: + return 0; + + case IEEE754_CLASS_DNORM: + case IEEE754_CLASS_NORM: + break; + } + if (xe >= 31) { + /* look for valid corner case */ + if (xe == 31 && xs && xm == SP_HIDDEN_BIT) + return -0x80000000; + /* Set invalid. We will only use overflow for floating + point overflow */ + ieee754_setcx(IEEE754_INVALID_OPERATION); + return ieee754si_overflow(xs); + } + /* oh gawd */ + if (xe > SP_FBITS) { + xm <<= xe - SP_FBITS; + } else { + if (xe < -1) { + residue = xm; + round = 0; + sticky = residue != 0; + xm = 0; + } else { + /* Shifting a u32 32 times does not work, + * so we do it in two steps. Be aware that xe + * may be -1 */ + residue = xm << (xe + 1); + residue <<= 31 - SP_FBITS; + round = (residue >> 31) != 0; + sticky = (residue << 1) != 0; + xm >>= SP_FBITS - xe; + } + odd = (xm & 0x1) != 0x0; + switch (ieee754_csr.rm) { + case FPU_CSR_RN: + if (round && (sticky || odd)) + xm++; + break; + case FPU_CSR_RZ: + break; + case FPU_CSR_RU: /* toward +Infinity */ + if ((round || sticky) && !xs) + xm++; + break; + case FPU_CSR_RD: /* toward -Infinity */ + if ((round || sticky) && xs) + xm++; + break; + } + if ((xm >> 31) != 0) { + /* This can happen after rounding */ + ieee754_setcx(IEEE754_INVALID_OPERATION); + return ieee754si_overflow(xs); + } + if (round || sticky) + ieee754_setcx(IEEE754_INEXACT); + } + if (xs) + return -xm; + else + return xm; +} diff --git a/arch/mips/math-emu/sp_tlong.c b/arch/mips/math-emu/sp_tlong.c new file mode 100644 index 000000000..adc191304 --- /dev/null +++ b/arch/mips/math-emu/sp_tlong.c @@ -0,0 +1,96 @@ +// SPDX-License-Identifier: GPL-2.0-only +/* IEEE754 floating point arithmetic + * single precision + */ +/* + * MIPS floating point support + * Copyright (C) 1994-2000 Algorithmics Ltd. + */ + +#include "ieee754sp.h" + +s64 ieee754sp_tlong(union ieee754sp x) +{ + u32 residue; + int round; + int sticky; + int odd; + + COMPXDP; /* <-- need 64-bit mantissa tmp */ + + ieee754_clearcx(); + + EXPLODEXSP; + FLUSHXSP; + + switch (xc) { + case IEEE754_CLASS_SNAN: + case IEEE754_CLASS_QNAN: + ieee754_setcx(IEEE754_INVALID_OPERATION); + return ieee754di_indef(); + + case IEEE754_CLASS_INF: + ieee754_setcx(IEEE754_INVALID_OPERATION); + return ieee754di_overflow(xs); + + case IEEE754_CLASS_ZERO: + return 0; + + case IEEE754_CLASS_DNORM: + case IEEE754_CLASS_NORM: + break; + } + if (xe >= 63) { + /* look for valid corner case */ + if (xe == 63 && xs && xm == SP_HIDDEN_BIT) + return -0x8000000000000000LL; + /* Set invalid. We will only use overflow for floating + point overflow */ + ieee754_setcx(IEEE754_INVALID_OPERATION); + return ieee754di_overflow(xs); + } + /* oh gawd */ + if (xe > SP_FBITS) { + xm <<= xe - SP_FBITS; + } else if (xe < SP_FBITS) { + if (xe < -1) { + residue = xm; + round = 0; + sticky = residue != 0; + xm = 0; + } else { + residue = xm << (32 - SP_FBITS + xe); + round = (residue >> 31) != 0; + sticky = (residue << 1) != 0; + xm >>= SP_FBITS - xe; + } + odd = (xm & 0x1) != 0x0; + switch (ieee754_csr.rm) { + case FPU_CSR_RN: + if (round && (sticky || odd)) + xm++; + break; + case FPU_CSR_RZ: + break; + case FPU_CSR_RU: /* toward +Infinity */ + if ((round || sticky) && !xs) + xm++; + break; + case FPU_CSR_RD: /* toward -Infinity */ + if ((round || sticky) && xs) + xm++; + break; + } + if ((xm >> 63) != 0) { + /* This can happen after rounding */ + ieee754_setcx(IEEE754_INVALID_OPERATION); + return ieee754di_overflow(xs); + } + if (round || sticky) + ieee754_setcx(IEEE754_INEXACT); + } + if (xs) + return -xm; + else + return xm; +} |