summaryrefslogtreecommitdiffstats
path: root/src/math/sin_s390x.s
diff options
context:
space:
mode:
Diffstat (limited to 'src/math/sin_s390x.s')
-rw-r--r--src/math/sin_s390x.s356
1 files changed, 356 insertions, 0 deletions
diff --git a/src/math/sin_s390x.s b/src/math/sin_s390x.s
new file mode 100644
index 0000000..7eb2206
--- /dev/null
+++ b/src/math/sin_s390x.s
@@ -0,0 +1,356 @@
+// Copyright 2016 The Go Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+#include "textflag.h"
+
+// Various constants
+DATA sincosxnan<>+0(SB)/8, $0x7ff8000000000000
+GLOBL sincosxnan<>+0(SB), RODATA, $8
+DATA sincosxlim<>+0(SB)/8, $0x432921fb54442d19
+GLOBL sincosxlim<>+0(SB), RODATA, $8
+DATA sincosxadd<>+0(SB)/8, $0xc338000000000000
+GLOBL sincosxadd<>+0(SB), RODATA, $8
+DATA sincosxpi2l<>+0(SB)/8, $0.108285667392191389e-31
+GLOBL sincosxpi2l<>+0(SB), RODATA, $8
+DATA sincosxpi2m<>+0(SB)/8, $0.612323399573676480e-16
+GLOBL sincosxpi2m<>+0(SB), RODATA, $8
+DATA sincosxpi2h<>+0(SB)/8, $0.157079632679489656e+01
+GLOBL sincosxpi2h<>+0(SB), RODATA, $8
+DATA sincosrpi2<>+0(SB)/8, $0.636619772367581341e+00
+GLOBL sincosrpi2<>+0(SB), RODATA, $8
+
+// Minimax polynomial approximations
+DATA sincosc0<>+0(SB)/8, $0.100000000000000000E+01
+GLOBL sincosc0<>+0(SB), RODATA, $8
+DATA sincosc1<>+0(SB)/8, $-.499999999999999833E+00
+GLOBL sincosc1<>+0(SB), RODATA, $8
+DATA sincosc2<>+0(SB)/8, $0.416666666666625843E-01
+GLOBL sincosc2<>+0(SB), RODATA, $8
+DATA sincosc3<>+0(SB)/8, $-.138888888885498984E-02
+GLOBL sincosc3<>+0(SB), RODATA, $8
+DATA sincosc4<>+0(SB)/8, $0.248015871681607202E-04
+GLOBL sincosc4<>+0(SB), RODATA, $8
+DATA sincosc5<>+0(SB)/8, $-.275572911309937875E-06
+GLOBL sincosc5<>+0(SB), RODATA, $8
+DATA sincosc6<>+0(SB)/8, $0.208735047247632818E-08
+GLOBL sincosc6<>+0(SB), RODATA, $8
+DATA sincosc7<>+0(SB)/8, $-.112753632738365317E-10
+GLOBL sincosc7<>+0(SB), RODATA, $8
+DATA sincoss0<>+0(SB)/8, $0.100000000000000000E+01
+GLOBL sincoss0<>+0(SB), RODATA, $8
+DATA sincoss1<>+0(SB)/8, $-.166666666666666657E+00
+GLOBL sincoss1<>+0(SB), RODATA, $8
+DATA sincoss2<>+0(SB)/8, $0.833333333333309209E-02
+GLOBL sincoss2<>+0(SB), RODATA, $8
+DATA sincoss3<>+0(SB)/8, $-.198412698410701448E-03
+GLOBL sincoss3<>+0(SB), RODATA, $8
+DATA sincoss4<>+0(SB)/8, $0.275573191453906794E-05
+GLOBL sincoss4<>+0(SB), RODATA, $8
+DATA sincoss5<>+0(SB)/8, $-.250520918387633290E-07
+GLOBL sincoss5<>+0(SB), RODATA, $8
+DATA sincoss6<>+0(SB)/8, $0.160571285514715856E-09
+GLOBL sincoss6<>+0(SB), RODATA, $8
+DATA sincoss7<>+0(SB)/8, $-.753213484933210972E-12
+GLOBL sincoss7<>+0(SB), RODATA, $8
+
+// Sin returns the sine of the radian argument x.
+//
+// Special cases are:
+// Sin(±0) = ±0
+// Sin(±Inf) = NaN
+// Sin(NaN) = NaN
+// The algorithm used is minimax polynomial approximation.
+// with coefficients determined with a Remez exchange algorithm.
+
+TEXT ·sinAsm(SB),NOSPLIT,$0-16
+ FMOVD x+0(FP), F0
+ //special case Sin(±0) = ±0
+ FMOVD $(0.0), F1
+ FCMPU F0, F1
+ BEQ sinIsZero
+ LTDBR F0, F0
+ BLTU L17
+ FMOVD F0, F5
+L2:
+ MOVD $sincoss7<>+0(SB), R1
+ FMOVD 0(R1), F4
+ MOVD $sincoss6<>+0(SB), R1
+ FMOVD 0(R1), F1
+ MOVD $sincoss5<>+0(SB), R1
+ VLEG $0, 0(R1), V18
+ MOVD $sincoss4<>+0(SB), R1
+ FMOVD 0(R1), F6
+ MOVD $sincoss2<>+0(SB), R1
+ VLEG $0, 0(R1), V16
+ MOVD $sincoss3<>+0(SB), R1
+ FMOVD 0(R1), F7
+ MOVD $sincoss1<>+0(SB), R1
+ FMOVD 0(R1), F3
+ MOVD $sincoss0<>+0(SB), R1
+ FMOVD 0(R1), F2
+ WFCHDBS V2, V5, V2
+ BEQ L18
+ MOVD $sincosrpi2<>+0(SB), R1
+ FMOVD 0(R1), F3
+ MOVD $sincosxadd<>+0(SB), R1
+ FMOVD 0(R1), F2
+ WFMSDB V0, V3, V2, V3
+ FMOVD 0(R1), F6
+ FADD F3, F6
+ MOVD $sincosxpi2h<>+0(SB), R1
+ FMOVD 0(R1), F2
+ FMSUB F2, F6, F0
+ MOVD $sincosxpi2m<>+0(SB), R1
+ FMOVD 0(R1), F4
+ FMADD F4, F6, F0
+ MOVD $sincosxpi2l<>+0(SB), R1
+ WFMDB V0, V0, V1
+ FMOVD 0(R1), F7
+ WFMDB V1, V1, V2
+ LGDR F3, R1
+ MOVD $sincosxlim<>+0(SB), R2
+ TMLL R1, $1
+ BEQ L6
+ FMOVD 0(R2), F0
+ WFCHDBS V0, V5, V0
+ BNE L14
+ MOVD $sincosc7<>+0(SB), R2
+ FMOVD 0(R2), F0
+ MOVD $sincosc6<>+0(SB), R2
+ FMOVD 0(R2), F4
+ MOVD $sincosc5<>+0(SB), R2
+ WFMADB V1, V0, V4, V0
+ FMOVD 0(R2), F6
+ MOVD $sincosc4<>+0(SB), R2
+ WFMADB V1, V0, V6, V0
+ FMOVD 0(R2), F4
+ MOVD $sincosc2<>+0(SB), R2
+ FMOVD 0(R2), F6
+ WFMADB V2, V4, V6, V4
+ MOVD $sincosc3<>+0(SB), R2
+ FMOVD 0(R2), F3
+ MOVD $sincosc1<>+0(SB), R2
+ WFMADB V2, V0, V3, V0
+ FMOVD 0(R2), F6
+ WFMADB V1, V4, V6, V4
+ TMLL R1, $2
+ WFMADB V2, V0, V4, V0
+ MOVD $sincosc0<>+0(SB), R1
+ FMOVD 0(R1), F2
+ WFMADB V1, V0, V2, V0
+ BNE L15
+ FMOVD F0, ret+8(FP)
+ RET
+
+L6:
+ FMOVD 0(R2), F4
+ WFCHDBS V4, V5, V4
+ BNE L14
+ MOVD $sincoss7<>+0(SB), R2
+ FMOVD 0(R2), F4
+ MOVD $sincoss6<>+0(SB), R2
+ FMOVD 0(R2), F3
+ MOVD $sincoss5<>+0(SB), R2
+ WFMADB V1, V4, V3, V4
+ WFMADB V6, V7, V0, V6
+ FMOVD 0(R2), F0
+ MOVD $sincoss4<>+0(SB), R2
+ FMADD F4, F1, F0
+ FMOVD 0(R2), F3
+ MOVD $sincoss2<>+0(SB), R2
+ FMOVD 0(R2), F4
+ MOVD $sincoss3<>+0(SB), R2
+ WFMADB V2, V3, V4, V3
+ FMOVD 0(R2), F4
+ MOVD $sincoss1<>+0(SB), R2
+ WFMADB V2, V0, V4, V0
+ FMOVD 0(R2), F4
+ WFMADB V1, V3, V4, V3
+ FNEG F6, F4
+ WFMADB V2, V0, V3, V2
+ WFMDB V4, V1, V0
+ TMLL R1, $2
+ WFMSDB V0, V2, V6, V0
+ BNE L15
+ FMOVD F0, ret+8(FP)
+ RET
+
+L14:
+ MOVD $sincosxnan<>+0(SB), R1
+ FMOVD 0(R1), F0
+ FMOVD F0, ret+8(FP)
+ RET
+
+L18:
+ WFMDB V0, V0, V2
+ WFMADB V2, V4, V1, V4
+ WFMDB V2, V2, V1
+ WFMADB V2, V4, V18, V4
+ WFMADB V1, V6, V16, V6
+ WFMADB V1, V4, V7, V4
+ WFMADB V2, V6, V3, V6
+ FMUL F0, F2
+ WFMADB V1, V4, V6, V4
+ FMADD F4, F2, F0
+ FMOVD F0, ret+8(FP)
+ RET
+
+L17:
+ FNEG F0, F5
+ BR L2
+L15:
+ FNEG F0, F0
+ FMOVD F0, ret+8(FP)
+ RET
+
+
+sinIsZero:
+ FMOVD F0, ret+8(FP)
+ RET
+
+// Cos returns the cosine of the radian argument.
+//
+// Special cases are:
+// Cos(±Inf) = NaN
+// Cos(NaN) = NaN
+// The algorithm used is minimax polynomial approximation.
+// with coefficients determined with a Remez exchange algorithm.
+
+TEXT ·cosAsm(SB),NOSPLIT,$0-16
+ FMOVD x+0(FP), F0
+ LTDBR F0, F0
+ BLTU L35
+ FMOVD F0, F1
+L21:
+ MOVD $sincosc7<>+0(SB), R1
+ FMOVD 0(R1), F4
+ MOVD $sincosc6<>+0(SB), R1
+ VLEG $0, 0(R1), V20
+ MOVD $sincosc5<>+0(SB), R1
+ VLEG $0, 0(R1), V18
+ MOVD $sincosc4<>+0(SB), R1
+ FMOVD 0(R1), F6
+ MOVD $sincosc2<>+0(SB), R1
+ VLEG $0, 0(R1), V16
+ MOVD $sincosc3<>+0(SB), R1
+ FMOVD 0(R1), F7
+ MOVD $sincosc1<>+0(SB), R1
+ FMOVD 0(R1), F5
+ MOVD $sincosrpi2<>+0(SB), R1
+ FMOVD 0(R1), F2
+ MOVD $sincosxadd<>+0(SB), R1
+ FMOVD 0(R1), F3
+ MOVD $sincoss0<>+0(SB), R1
+ WFMSDB V0, V2, V3, V2
+ FMOVD 0(R1), F3
+ WFCHDBS V3, V1, V3
+ LGDR F2, R1
+ BEQ L36
+ MOVD $sincosxadd<>+0(SB), R2
+ FMOVD 0(R2), F4
+ FADD F2, F4
+ MOVD $sincosxpi2h<>+0(SB), R2
+ FMOVD 0(R2), F2
+ WFMSDB V4, V2, V0, V2
+ MOVD $sincosxpi2m<>+0(SB), R2
+ FMOVD 0(R2), F0
+ WFMADB V4, V0, V2, V0
+ MOVD $sincosxpi2l<>+0(SB), R2
+ WFMDB V0, V0, V2
+ FMOVD 0(R2), F5
+ WFMDB V2, V2, V6
+ MOVD $sincosxlim<>+0(SB), R2
+ TMLL R1, $1
+ BNE L25
+ FMOVD 0(R2), F0
+ WFCHDBS V0, V1, V0
+ BNE L33
+ MOVD $sincosc7<>+0(SB), R2
+ FMOVD 0(R2), F0
+ MOVD $sincosc6<>+0(SB), R2
+ FMOVD 0(R2), F4
+ MOVD $sincosc5<>+0(SB), R2
+ WFMADB V2, V0, V4, V0
+ FMOVD 0(R2), F1
+ MOVD $sincosc4<>+0(SB), R2
+ WFMADB V2, V0, V1, V0
+ FMOVD 0(R2), F4
+ MOVD $sincosc2<>+0(SB), R2
+ FMOVD 0(R2), F1
+ WFMADB V6, V4, V1, V4
+ MOVD $sincosc3<>+0(SB), R2
+ FMOVD 0(R2), F3
+ MOVD $sincosc1<>+0(SB), R2
+ WFMADB V6, V0, V3, V0
+ FMOVD 0(R2), F1
+ WFMADB V2, V4, V1, V4
+ TMLL R1, $2
+ WFMADB V6, V0, V4, V0
+ MOVD $sincosc0<>+0(SB), R1
+ FMOVD 0(R1), F4
+ WFMADB V2, V0, V4, V0
+ BNE L34
+ FMOVD F0, ret+8(FP)
+ RET
+
+L25:
+ FMOVD 0(R2), F3
+ WFCHDBS V3, V1, V1
+ BNE L33
+ MOVD $sincoss7<>+0(SB), R2
+ FMOVD 0(R2), F1
+ MOVD $sincoss6<>+0(SB), R2
+ FMOVD 0(R2), F3
+ MOVD $sincoss5<>+0(SB), R2
+ WFMADB V2, V1, V3, V1
+ FMOVD 0(R2), F3
+ MOVD $sincoss4<>+0(SB), R2
+ WFMADB V2, V1, V3, V1
+ FMOVD 0(R2), F3
+ MOVD $sincoss2<>+0(SB), R2
+ FMOVD 0(R2), F7
+ WFMADB V6, V3, V7, V3
+ MOVD $sincoss3<>+0(SB), R2
+ FMADD F5, F4, F0
+ FMOVD 0(R2), F4
+ MOVD $sincoss1<>+0(SB), R2
+ FMADD F1, F6, F4
+ FMOVD 0(R2), F1
+ FMADD F3, F2, F1
+ FMUL F0, F2
+ WFMADB V6, V4, V1, V6
+ TMLL R1, $2
+ FMADD F6, F2, F0
+ BNE L34
+ FMOVD F0, ret+8(FP)
+ RET
+
+L33:
+ MOVD $sincosxnan<>+0(SB), R1
+ FMOVD 0(R1), F0
+ FMOVD F0, ret+8(FP)
+ RET
+
+L36:
+ FMUL F0, F0
+ MOVD $sincosc0<>+0(SB), R1
+ WFMDB V0, V0, V1
+ WFMADB V0, V4, V20, V4
+ WFMADB V1, V6, V16, V6
+ WFMADB V0, V4, V18, V4
+ WFMADB V0, V6, V5, V6
+ WFMADB V1, V4, V7, V4
+ FMOVD 0(R1), F2
+ WFMADB V1, V4, V6, V4
+ WFMADB V0, V4, V2, V0
+ FMOVD F0, ret+8(FP)
+ RET
+
+L35:
+ FNEG F0, F1
+ BR L21
+L34:
+ FNEG F0, F0
+ FMOVD F0, ret+8(FP)
+ RET