summaryrefslogtreecommitdiffstats
path: root/src/math/expm1_s390x.s
diff options
context:
space:
mode:
Diffstat (limited to 'src/math/expm1_s390x.s')
-rw-r--r--src/math/expm1_s390x.s194
1 files changed, 194 insertions, 0 deletions
diff --git a/src/math/expm1_s390x.s b/src/math/expm1_s390x.s
new file mode 100644
index 0000000..16c861b
--- /dev/null
+++ b/src/math/expm1_s390x.s
@@ -0,0 +1,194 @@
+// Copyright 2017 The Go Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+#include "textflag.h"
+
+// Minimax polynomial approximation and other constants
+DATA ·expm1rodataL22<> + 0(SB)/8, $-1.0
+DATA ·expm1rodataL22<> + 8(SB)/8, $800.0E+00
+DATA ·expm1rodataL22<> + 16(SB)/8, $1.0
+DATA ·expm1rodataL22<> + 24(SB)/8, $-.231904681384629956E-16
+DATA ·expm1rodataL22<> + 32(SB)/8, $0.50000000000000029671E+00
+DATA ·expm1rodataL22<> + 40(SB)/8, $0.16666666666666676570E+00
+DATA ·expm1rodataL22<> + 48(SB)/8, $0.83333333323590973444E-02
+DATA ·expm1rodataL22<> + 56(SB)/8, $0.13889096526400683566E-02
+DATA ·expm1rodataL22<> + 64(SB)/8, $0.41666666661701152924E-01
+DATA ·expm1rodataL22<> + 72(SB)/8, $0.19841562053987360264E-03
+DATA ·expm1rodataL22<> + 80(SB)/8, $-.693147180559945286E+00
+DATA ·expm1rodataL22<> + 88(SB)/8, $0.144269504088896339E+01
+DATA ·expm1rodataL22<> + 96(SB)/8, $704.0E+00
+GLOBL ·expm1rodataL22<> + 0(SB), RODATA, $104
+
+DATA ·expm1xmone<> + 0(SB)/8, $0xbff0000000000000
+GLOBL ·expm1xmone<> + 0(SB), RODATA, $8
+DATA ·expm1xinf<> + 0(SB)/8, $0x7ff0000000000000
+GLOBL ·expm1xinf<> + 0(SB), RODATA, $8
+DATA ·expm1x4ff<> + 0(SB)/8, $0x4ff0000000000000
+GLOBL ·expm1x4ff<> + 0(SB), RODATA, $8
+DATA ·expm1x2ff<> + 0(SB)/8, $0x2ff0000000000000
+GLOBL ·expm1x2ff<> + 0(SB), RODATA, $8
+DATA ·expm1xaddexp<> + 0(SB)/8, $0xc2f0000100003ff0
+GLOBL ·expm1xaddexp<> + 0(SB), RODATA, $8
+
+// Log multipliers table
+DATA ·expm1tab<> + 0(SB)/8, $0.0
+DATA ·expm1tab<> + 8(SB)/8, $-.171540871271399150E-01
+DATA ·expm1tab<> + 16(SB)/8, $-.306597931864376363E-01
+DATA ·expm1tab<> + 24(SB)/8, $-.410200970469965021E-01
+DATA ·expm1tab<> + 32(SB)/8, $-.486343079978231466E-01
+DATA ·expm1tab<> + 40(SB)/8, $-.538226193725835820E-01
+DATA ·expm1tab<> + 48(SB)/8, $-.568439602538111520E-01
+DATA ·expm1tab<> + 56(SB)/8, $-.579091847395528847E-01
+DATA ·expm1tab<> + 64(SB)/8, $-.571909584179366341E-01
+DATA ·expm1tab<> + 72(SB)/8, $-.548312665987204407E-01
+DATA ·expm1tab<> + 80(SB)/8, $-.509471843643441085E-01
+DATA ·expm1tab<> + 88(SB)/8, $-.456353588448863359E-01
+DATA ·expm1tab<> + 96(SB)/8, $-.389755254243262365E-01
+DATA ·expm1tab<> + 104(SB)/8, $-.310332908285244231E-01
+DATA ·expm1tab<> + 112(SB)/8, $-.218623539150173528E-01
+DATA ·expm1tab<> + 120(SB)/8, $-.115062908917949451E-01
+GLOBL ·expm1tab<> + 0(SB), RODATA, $128
+
+// Expm1 returns e**x - 1, the base-e exponential of x minus 1.
+// It is more accurate than Exp(x) - 1 when x is near zero.
+//
+// Special cases are:
+// Expm1(+Inf) = +Inf
+// Expm1(-Inf) = -1
+// Expm1(NaN) = NaN
+// Very large values overflow to -1 or +Inf.
+// The algorithm used is minimax polynomial approximation using a table of
+// polynomial coefficients determined with a Remez exchange algorithm.
+
+TEXT ·expm1Asm(SB), NOSPLIT, $0-16
+ FMOVD x+0(FP), F0
+ MOVD $·expm1rodataL22<>+0(SB), R5
+ LTDBR F0, F0
+ BLTU L20
+ FMOVD F0, F2
+L2:
+ WORD $0xED205060 //cdb %f2,.L23-.L22(%r5)
+ BYTE $0x00
+ BYTE $0x19
+ BGE L16
+ BVS L16
+ WFCEDBS V2, V2, V2
+ BVS LEXITTAGexpm1
+ MOVD $·expm1xaddexp<>+0(SB), R1
+ FMOVD 88(R5), F1
+ FMOVD 0(R1), F2
+ WFMSDB V0, V1, V2, V1
+ FMOVD 80(R5), F6
+ WFADB V1, V2, V4
+ FMOVD 72(R5), F2
+ FMADD F6, F4, F0
+ FMOVD 64(R5), F3
+ FMOVD 56(R5), F6
+ FMOVD 48(R5), F5
+ FMADD F2, F0, F6
+ WFMADB V0, V5, V3, V5
+ WFMDB V0, V0, V2
+ LGDR F1, R1
+ WFMADB V6, V2, V5, V6
+ FMOVD 40(R5), F3
+ FMOVD 32(R5), F5
+ WFMADB V0, V3, V5, V3
+ FMOVD 24(R5), F5
+ WFMADB V2, V6, V3, V2
+ FMADD F5, F4, F0
+ FMOVD 16(R5), F6
+ WFMADB V0, V2, V6, V2
+ RISBGZ $57, $60, $3, R1, R3
+ WORD $0xB3130022 //lcdbr %f2,%f2
+ MOVD $·expm1tab<>+0(SB), R2
+ WORD $0x68432000 //ld %f4,0(%r3,%r2)
+ FMADD F4, F0, F0
+ SLD $48, R1, R2
+ WFMSDB V2, V0, V4, V0
+ LDGR R2, F4
+ WORD $0xB3130000 //lcdbr %f0,%f0
+ FSUB F4, F6
+ WFMSDB V0, V4, V6, V0
+ FMOVD F0, ret+8(FP)
+ RET
+L16:
+ WFCEDBS V2, V2, V4
+ BVS LEXITTAGexpm1
+ WORD $0xED205008 //cdb %f2,.L34-.L22(%r5)
+ BYTE $0x00
+ BYTE $0x19
+ BLT L6
+ WFCEDBS V2, V0, V0
+ BVS L7
+ MOVD $·expm1xinf<>+0(SB), R1
+ FMOVD 0(R1), F0
+ FMOVD F0, ret+8(FP)
+ RET
+L20:
+ WORD $0xB3130020 //lcdbr %f2,%f0
+ BR L2
+L6:
+ MOVD $·expm1xaddexp<>+0(SB), R1
+ FMOVD 88(R5), F5
+ FMOVD 0(R1), F4
+ WFMSDB V0, V5, V4, V5
+ FMOVD 80(R5), F3
+ WFADB V5, V4, V1
+ VLEG $0, 48(R5), V16
+ WFMADB V1, V3, V0, V3
+ FMOVD 56(R5), F4
+ FMOVD 64(R5), F7
+ FMOVD 72(R5), F6
+ WFMADB V3, V16, V7, V16
+ WFMADB V3, V6, V4, V6
+ WFMDB V3, V3, V4
+ MOVD $·expm1tab<>+0(SB), R2
+ WFMADB V6, V4, V16, V6
+ VLEG $0, 32(R5), V16
+ FMOVD 40(R5), F7
+ WFMADB V3, V7, V16, V7
+ VLEG $0, 24(R5), V16
+ WFMADB V4, V6, V7, V4
+ WFMADB V1, V16, V3, V1
+ FMOVD 16(R5), F6
+ FMADD F4, F1, F6
+ LGDR F5, R1
+ WORD $0xB3130066 //lcdbr %f6,%f6
+ RISBGZ $57, $60, $3, R1, R3
+ WORD $0x68432000 //ld %f4,0(%r3,%r2)
+ FMADD F4, F1, F1
+ MOVD $0x4086000000000000, R2
+ FMSUB F1, F6, F4
+ WORD $0xB3130044 //lcdbr %f4,%f4
+ WFCHDBS V2, V0, V0
+ BEQ L21
+ ADDW $0xF000, R1
+ RISBGN $0, $15, $48, R1, R2
+ LDGR R2, F0
+ FMADD F0, F4, F0
+ MOVD $·expm1x4ff<>+0(SB), R3
+ FMOVD 0(R5), F4
+ FMOVD 0(R3), F2
+ WFMADB V2, V0, V4, V0
+ FMOVD F0, ret+8(FP)
+ RET
+L7:
+ MOVD $·expm1xmone<>+0(SB), R1
+ FMOVD 0(R1), F0
+ FMOVD F0, ret+8(FP)
+ RET
+L21:
+ ADDW $0x1000, R1
+ RISBGN $0, $15, $48, R1, R2
+ LDGR R2, F0
+ FMADD F0, F4, F0
+ MOVD $·expm1x2ff<>+0(SB), R3
+ FMOVD 0(R5), F4
+ FMOVD 0(R3), F2
+ WFMADB V2, V0, V4, V0
+ FMOVD F0, ret+8(FP)
+ RET
+LEXITTAGexpm1:
+ FMOVD F0, ret+8(FP)
+ RET