summaryrefslogtreecommitdiffstats
path: root/src/math/exp_s390x.s
diff options
context:
space:
mode:
Diffstat (limited to '')
-rw-r--r--src/math/exp_s390x.s177
1 files changed, 177 insertions, 0 deletions
diff --git a/src/math/exp_s390x.s b/src/math/exp_s390x.s
new file mode 100644
index 0000000..e0ec823
--- /dev/null
+++ b/src/math/exp_s390x.s
@@ -0,0 +1,177 @@
+// Copyright 2017 The Go Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+#include "textflag.h"
+
+// Minimax polynomial approximation and other constants
+DATA ·exprodataL22<> + 0(SB)/8, $800.0E+00
+DATA ·exprodataL22<> + 8(SB)/8, $1.0000000000000022e+00
+DATA ·exprodataL22<> + 16(SB)/8, $0.500000000000004237e+00
+DATA ·exprodataL22<> + 24(SB)/8, $0.166666666630345592e+00
+DATA ·exprodataL22<> + 32(SB)/8, $0.138926439368309441e-02
+DATA ·exprodataL22<> + 40(SB)/8, $0.833349307718286047e-02
+DATA ·exprodataL22<> + 48(SB)/8, $0.416666664838056960e-01
+DATA ·exprodataL22<> + 56(SB)/8, $-.231904681384629956E-16
+DATA ·exprodataL22<> + 64(SB)/8, $-.693147180559945286E+00
+DATA ·exprodataL22<> + 72(SB)/8, $0.144269504088896339E+01
+DATA ·exprodataL22<> + 80(SB)/8, $704.0E+00
+GLOBL ·exprodataL22<> + 0(SB), RODATA, $88
+
+DATA ·expxinf<> + 0(SB)/8, $0x7ff0000000000000
+GLOBL ·expxinf<> + 0(SB), RODATA, $8
+DATA ·expx4ff<> + 0(SB)/8, $0x4ff0000000000000
+GLOBL ·expx4ff<> + 0(SB), RODATA, $8
+DATA ·expx2ff<> + 0(SB)/8, $0x2ff0000000000000
+GLOBL ·expx2ff<> + 0(SB), RODATA, $8
+DATA ·expxaddexp<> + 0(SB)/8, $0xc2f0000100003fef
+GLOBL ·expxaddexp<> + 0(SB), RODATA, $8
+
+// Log multipliers table
+DATA ·exptexp<> + 0(SB)/8, $0.442737824274138381E-01
+DATA ·exptexp<> + 8(SB)/8, $0.263602189790660309E-01
+DATA ·exptexp<> + 16(SB)/8, $0.122565642281703586E-01
+DATA ·exptexp<> + 24(SB)/8, $0.143757052860721398E-02
+DATA ·exptexp<> + 32(SB)/8, $-.651375034121276075E-02
+DATA ·exptexp<> + 40(SB)/8, $-.119317678849450159E-01
+DATA ·exptexp<> + 48(SB)/8, $-.150868749549871069E-01
+DATA ·exptexp<> + 56(SB)/8, $-.161992609578469234E-01
+DATA ·exptexp<> + 64(SB)/8, $-.154492360403337917E-01
+DATA ·exptexp<> + 72(SB)/8, $-.129850717389178721E-01
+DATA ·exptexp<> + 80(SB)/8, $-.892902649276657891E-02
+DATA ·exptexp<> + 88(SB)/8, $-.338202636596794887E-02
+DATA ·exptexp<> + 96(SB)/8, $0.357266307045684762E-02
+DATA ·exptexp<> + 104(SB)/8, $0.118665304327406698E-01
+DATA ·exptexp<> + 112(SB)/8, $0.214434994118118914E-01
+DATA ·exptexp<> + 120(SB)/8, $0.322580645161290314E-01
+GLOBL ·exptexp<> + 0(SB), RODATA, $128
+
+// Exp returns e**x, the base-e exponential of x.
+//
+// Special cases are:
+// Exp(+Inf) = +Inf
+// Exp(NaN) = NaN
+// Very large values overflow to 0 or +Inf.
+// Very small values underflow to 1.
+// The algorithm used is minimax polynomial approximation using a table of
+// polynomial coefficients determined with a Remez exchange algorithm.
+
+TEXT ·expAsm(SB), NOSPLIT, $0-16
+ FMOVD x+0(FP), F0
+ MOVD $·exprodataL22<>+0(SB), R5
+ LTDBR F0, F0
+ BLTU L20
+ FMOVD F0, F2
+L2:
+ WORD $0xED205050 //cdb %f2,.L23-.L22(%r5)
+ BYTE $0x00
+ BYTE $0x19
+ BGE L16
+ BVS L16
+ WFCEDBS V2, V2, V2
+ BVS LEXITTAGexp
+ MOVD $·expxaddexp<>+0(SB), R1
+ FMOVD 72(R5), F6
+ FMOVD 0(R1), F2
+ WFMSDB V0, V6, V2, V6
+ FMOVD 64(R5), F4
+ FADD F6, F2
+ FMOVD 56(R5), F1
+ FMADD F4, F2, F0
+ FMOVD 48(R5), F3
+ WFMADB V2, V1, V0, V2
+ FMOVD 40(R5), F1
+ FMOVD 32(R5), F4
+ FMUL F0, F0
+ WFMADB V2, V4, V1, V4
+ LGDR F6, R1
+ FMOVD 24(R5), F1
+ WFMADB V2, V3, V1, V3
+ FMOVD 16(R5), F1
+ WFMADB V0, V4, V3, V4
+ FMOVD 8(R5), F3
+ WFMADB V2, V1, V3, V1
+ RISBGZ $57, $60, $3, R1, R3
+ WFMADB V0, V4, V1, V0
+ MOVD $·exptexp<>+0(SB), R2
+ WORD $0x68432000 //ld %f4,0(%r3,%r2)
+ FMADD F4, F2, F2
+ SLD $48, R1, R2
+ WFMADB V2, V0, V4, V2
+ LDGR R2, F0
+ FMADD F0, F2, F0
+ FMOVD F0, ret+8(FP)
+ RET
+L16:
+ WFCEDBS V2, V2, V4
+ BVS LEXITTAGexp
+ WORD $0xED205000 //cdb %f2,.L33-.L22(%r5)
+ BYTE $0x00
+ BYTE $0x19
+ BLT L6
+ WFCEDBS V2, V0, V0
+ BVS L13
+ MOVD $·expxinf<>+0(SB), R1
+ FMOVD 0(R1), F0
+ FMOVD F0, ret+8(FP)
+ RET
+L20:
+ WORD $0xB3130020 //lcdbr %f2,%f0
+ BR L2
+L6:
+ MOVD $·expxaddexp<>+0(SB), R1
+ FMOVD 72(R5), F3
+ FMOVD 0(R1), F4
+ WFMSDB V0, V3, V4, V3
+ FMOVD 64(R5), F6
+ FADD F3, F4
+ FMOVD 56(R5), F5
+ WFMADB V4, V6, V0, V6
+ FMOVD 32(R5), F1
+ WFMADB V4, V5, V6, V4
+ FMOVD 40(R5), F5
+ FMUL F6, F6
+ WFMADB V4, V1, V5, V1
+ FMOVD 48(R5), F7
+ LGDR F3, R1
+ FMOVD 24(R5), F5
+ WFMADB V4, V7, V5, V7
+ FMOVD 16(R5), F5
+ WFMADB V6, V1, V7, V1
+ FMOVD 8(R5), F7
+ WFMADB V4, V5, V7, V5
+ RISBGZ $57, $60, $3, R1, R3
+ WFMADB V6, V1, V5, V6
+ MOVD $·exptexp<>+0(SB), R2
+ WFCHDBS V2, V0, V0
+ WORD $0x68132000 //ld %f1,0(%r3,%r2)
+ FMADD F1, F4, F4
+ MOVD $0x4086000000000000, R2
+ WFMADB V4, V6, V1, V4
+ BEQ L21
+ ADDW $0xF000, R1
+ RISBGN $0, $15, $48, R1, R2
+ LDGR R2, F0
+ FMADD F0, F4, F0
+ MOVD $·expx4ff<>+0(SB), R3
+ FMOVD 0(R3), F2
+ FMUL F2, F0
+ FMOVD F0, ret+8(FP)
+ RET
+L13:
+ FMOVD $0, F0
+ FMOVD F0, ret+8(FP)
+ RET
+L21:
+ ADDW $0x1000, R1
+ RISBGN $0, $15, $48, R1, R2
+ LDGR R2, F0
+ FMADD F0, F4, F0
+ MOVD $·expx2ff<>+0(SB), R3
+ FMOVD 0(R3), F2
+ FMUL F2, F0
+ FMOVD F0, ret+8(FP)
+ RET
+LEXITTAGexp:
+ FMOVD F0, ret+8(FP)
+ RET