summaryrefslogtreecommitdiffstats
path: root/src/math/atan2_s390x.s
diff options
context:
space:
mode:
Diffstat (limited to 'src/math/atan2_s390x.s')
-rw-r--r--src/math/atan2_s390x.s297
1 files changed, 297 insertions, 0 deletions
diff --git a/src/math/atan2_s390x.s b/src/math/atan2_s390x.s
new file mode 100644
index 0000000..587b89e
--- /dev/null
+++ b/src/math/atan2_s390x.s
@@ -0,0 +1,297 @@
+// Copyright 2017 The Go Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+#include "textflag.h"
+
+#define PosInf 0x7FF0000000000000
+#define NegInf 0xFFF0000000000000
+#define NegZero 0x8000000000000000
+#define Pi 0x400921FB54442D18
+#define NegPi 0xC00921FB54442D18
+#define Pi3Div4 0x4002D97C7F3321D2 // 3Pi/4
+#define NegPi3Div4 0xC002D97C7F3321D2 // -3Pi/4
+#define PiDiv4 0x3FE921FB54442D18 // Pi/4
+#define NegPiDiv4 0xBFE921FB54442D18 // -Pi/4
+
+// Minimax polynomial coefficients and other constants
+DATA ·atan2rodataL25<> + 0(SB)/8, $0.199999999999554423E+00
+DATA ·atan2rodataL25<> + 8(SB)/8, $-.333333333333330928E+00
+DATA ·atan2rodataL25<> + 16(SB)/8, $0.111111110136634272E+00
+DATA ·atan2rodataL25<> + 24(SB)/8, $-.142857142828026806E+00
+DATA ·atan2rodataL25<> + 32(SB)/8, $0.769228118888682505E-01
+DATA ·atan2rodataL25<> + 40(SB)/8, $0.588059263575587687E-01
+DATA ·atan2rodataL25<> + 48(SB)/8, $-.909090711945939878E-01
+DATA ·atan2rodataL25<> + 56(SB)/8, $-.666641501287528609E-01
+DATA ·atan2rodataL25<> + 64(SB)/8, $0.472329433805024762E-01
+DATA ·atan2rodataL25<> + 72(SB)/8, $-.525380587584426406E-01
+DATA ·atan2rodataL25<> + 80(SB)/8, $-.422172007412067035E-01
+DATA ·atan2rodataL25<> + 88(SB)/8, $0.366935664549587481E-01
+DATA ·atan2rodataL25<> + 96(SB)/8, $0.220852012160300086E-01
+DATA ·atan2rodataL25<> + 104(SB)/8, $-.299856214685512712E-01
+DATA ·atan2rodataL25<> + 112(SB)/8, $0.726338160757602439E-02
+DATA ·atan2rodataL25<> + 120(SB)/8, $0.134893651284712515E-04
+DATA ·atan2rodataL25<> + 128(SB)/8, $-.291935324869629616E-02
+DATA ·atan2rodataL25<> + 136(SB)/8, $-.154797890856877418E-03
+DATA ·atan2rodataL25<> + 144(SB)/8, $0.843488472994227321E-03
+DATA ·atan2rodataL25<> + 152(SB)/8, $-.139950258898989925E-01
+GLOBL ·atan2rodataL25<> + 0(SB), RODATA, $160
+
+DATA ·atan2xpi2h<> + 0(SB)/8, $0x3ff330e4e4fa7b1b
+DATA ·atan2xpi2h<> + 8(SB)/8, $0xbff330e4e4fa7b1b
+DATA ·atan2xpi2h<> + 16(SB)/8, $0x400330e4e4fa7b1b
+DATA ·atan2xpi2h<> + 24(SB)/8, $0xc00330e4e4fa7b1b
+GLOBL ·atan2xpi2h<> + 0(SB), RODATA, $32
+DATA ·atan2xpim<> + 0(SB)/8, $0x3ff4f42b00000000
+GLOBL ·atan2xpim<> + 0(SB), RODATA, $8
+
+// Atan2 returns the arc tangent of y/x, using
+// the signs of the two to determine the quadrant
+// of the return value.
+//
+// Special cases are (in order):
+// Atan2(y, NaN) = NaN
+// Atan2(NaN, x) = NaN
+// Atan2(+0, x>=0) = +0
+// Atan2(-0, x>=0) = -0
+// Atan2(+0, x<=-0) = +Pi
+// Atan2(-0, x<=-0) = -Pi
+// Atan2(y>0, 0) = +Pi/2
+// Atan2(y<0, 0) = -Pi/2
+// Atan2(+Inf, +Inf) = +Pi/4
+// Atan2(-Inf, +Inf) = -Pi/4
+// Atan2(+Inf, -Inf) = 3Pi/4
+// Atan2(-Inf, -Inf) = -3Pi/4
+// Atan2(y, +Inf) = 0
+// Atan2(y>0, -Inf) = +Pi
+// Atan2(y<0, -Inf) = -Pi
+// Atan2(+Inf, x) = +Pi/2
+// Atan2(-Inf, x) = -Pi/2
+// The algorithm used is minimax polynomial approximation
+// with coefficients determined with a Remez exchange algorithm.
+
+TEXT ·atan2Asm(SB), NOSPLIT, $0-24
+ // special case
+ MOVD x+0(FP), R1
+ MOVD y+8(FP), R2
+
+ // special case Atan2(NaN, y) = NaN
+ MOVD $~(1<<63), R5
+ AND R1, R5 // x = |x|
+ MOVD $PosInf, R3
+ CMPUBLT R3, R5, returnX
+
+ // special case Atan2(x, NaN) = NaN
+ MOVD $~(1<<63), R5
+ AND R2, R5
+ CMPUBLT R3, R5, returnY
+
+ MOVD $NegZero, R3
+ CMPUBEQ R3, R1, xIsNegZero
+
+ MOVD $0, R3
+ CMPUBEQ R3, R1, xIsPosZero
+
+ MOVD $PosInf, R4
+ CMPUBEQ R4, R2, yIsPosInf
+
+ MOVD $NegInf, R4
+ CMPUBEQ R4, R2, yIsNegInf
+ BR Normal
+xIsNegZero:
+ // special case Atan(-0, y>=0) = -0
+ MOVD $0, R4
+ CMPBLE R4, R2, returnX
+
+ //special case Atan2(-0, y<=-0) = -Pi
+ MOVD $NegZero, R4
+ CMPBGE R4, R2, returnNegPi
+ BR Normal
+xIsPosZero:
+ //special case Atan2(0, 0) = 0
+ MOVD $0, R4
+ CMPUBEQ R4, R2, returnX
+
+ //special case Atan2(0, y<=-0) = Pi
+ MOVD $NegZero, R4
+ CMPBGE R4, R2, returnPi
+ BR Normal
+yIsNegInf:
+ //special case Atan2(+Inf, -Inf) = 3Pi/4
+ MOVD $PosInf, R3
+ CMPUBEQ R3, R1, posInfNegInf
+
+ //special case Atan2(-Inf, -Inf) = -3Pi/4
+ MOVD $NegInf, R3
+ CMPUBEQ R3, R1, negInfNegInf
+ BR Normal
+yIsPosInf:
+ //special case Atan2(+Inf, +Inf) = Pi/4
+ MOVD $PosInf, R3
+ CMPUBEQ R3, R1, posInfPosInf
+
+ //special case Atan2(-Inf, +Inf) = -Pi/4
+ MOVD $NegInf, R3
+ CMPUBEQ R3, R1, negInfPosInf
+
+ //special case Atan2(x, +Inf) = Copysign(0, x)
+ CMPBLT R1, $0, returnNegZero
+ BR returnPosZero
+
+Normal:
+ FMOVD x+0(FP), F0
+ FMOVD y+8(FP), F2
+ MOVD $·atan2rodataL25<>+0(SB), R9
+ LGDR F0, R2
+ LGDR F2, R1
+ RISBGNZ $32, $63, $32, R2, R2
+ RISBGNZ $32, $63, $32, R1, R1
+ WORD $0xB9170032 //llgtr %r3,%r2
+ RISBGZ $63, $63, $33, R2, R5
+ WORD $0xB9170041 //llgtr %r4,%r1
+ WFLCDB V0, V20
+ MOVW R4, R6
+ MOVW R3, R7
+ CMPUBLT R6, R7, L17
+ WFDDB V2, V0, V3
+ ADDW $2, R5, R2
+ MOVW R4, R6
+ MOVW R3, R7
+ CMPUBLE R6, R7, L20
+L3:
+ WFMDB V3, V3, V4
+ VLEG $0, 152(R9), V18
+ VLEG $0, 144(R9), V16
+ FMOVD 136(R9), F1
+ FMOVD 128(R9), F5
+ FMOVD 120(R9), F6
+ WFMADB V4, V16, V5, V16
+ WFMADB V4, V6, V1, V6
+ FMOVD 112(R9), F7
+ WFMDB V4, V4, V1
+ WFMADB V4, V7, V18, V7
+ VLEG $0, 104(R9), V18
+ WFMADB V1, V6, V16, V6
+ CMPWU R4, R3
+ FMOVD 96(R9), F5
+ VLEG $0, 88(R9), V16
+ WFMADB V4, V5, V18, V5
+ VLEG $0, 80(R9), V18
+ VLEG $0, 72(R9), V22
+ WFMADB V4, V16, V18, V16
+ VLEG $0, 64(R9), V18
+ WFMADB V1, V7, V5, V7
+ WFMADB V4, V18, V22, V18
+ WFMDB V1, V1, V5
+ WFMADB V1, V16, V18, V16
+ VLEG $0, 56(R9), V18
+ WFMADB V5, V6, V7, V6
+ VLEG $0, 48(R9), V22
+ FMOVD 40(R9), F7
+ WFMADB V4, V7, V18, V7
+ VLEG $0, 32(R9), V18
+ WFMADB V5, V6, V16, V6
+ WFMADB V4, V18, V22, V18
+ VLEG $0, 24(R9), V16
+ WFMADB V1, V7, V18, V7
+ VLEG $0, 16(R9), V18
+ VLEG $0, 8(R9), V22
+ WFMADB V4, V18, V16, V18
+ VLEG $0, 0(R9), V16
+ WFMADB V5, V6, V7, V6
+ WFMADB V4, V16, V22, V16
+ FMUL F3, F4
+ WFMADB V1, V18, V16, V1
+ FMADD F6, F5, F1
+ WFMADB V4, V1, V3, V4
+ BLT L18
+ BGT L7
+ LTDBR F2, F2
+ BLTU L21
+L8:
+ LTDBR F0, F0
+ BLTU L22
+L9:
+ WFCHDBS V2, V0, V0
+ BNE L18
+L7:
+ MOVW R1, R6
+ CMPBGE R6, $0, L1
+L18:
+ RISBGZ $58, $60, $3, R2, R2
+ MOVD $·atan2xpi2h<>+0(SB), R1
+ MOVD ·atan2xpim<>+0(SB), R3
+ LDGR R3, F0
+ WORD $0xED021000 //madb %f4,%f0,0(%r2,%r1)
+ BYTE $0x40
+ BYTE $0x1E
+L1:
+ FMOVD F4, ret+16(FP)
+ RET
+
+L20:
+ LTDBR F2, F2
+ BLTU L23
+ FMOVD F2, F6
+L4:
+ LTDBR F0, F0
+ BLTU L24
+ FMOVD F0, F4
+L5:
+ WFCHDBS V6, V4, V4
+ BEQ L3
+L17:
+ WFDDB V0, V2, V4
+ BYTE $0x18 //lr %r2,%r5
+ BYTE $0x25
+ WORD $0xB3130034 //lcdbr %f3,%f4
+ BR L3
+L23:
+ WORD $0xB3130062 //lcdbr %f6,%f2
+ BR L4
+L22:
+ VLR V20, V0
+ BR L9
+L21:
+ WORD $0xB3130022 //lcdbr %f2,%f2
+ BR L8
+L24:
+ VLR V20, V4
+ BR L5
+returnX: //the result is same as the first argument
+ MOVD R1, ret+16(FP)
+ RET
+returnY: //the result is same as the second argument
+ MOVD R2, ret+16(FP)
+ RET
+returnPi:
+ MOVD $Pi, R1
+ MOVD R1, ret+16(FP)
+ RET
+returnNegPi:
+ MOVD $NegPi, R1
+ MOVD R1, ret+16(FP)
+ RET
+posInfNegInf:
+ MOVD $Pi3Div4, R1
+ MOVD R1, ret+16(FP)
+ RET
+negInfNegInf:
+ MOVD $NegPi3Div4, R1
+ MOVD R1, ret+16(FP)
+ RET
+posInfPosInf:
+ MOVD $PiDiv4, R1
+ MOVD R1, ret+16(FP)
+ RET
+negInfPosInf:
+ MOVD $NegPiDiv4, R1
+ MOVD R1, ret+16(FP)
+ RET
+returnNegZero:
+ MOVD $NegZero, R1
+ MOVD R1, ret+16(FP)
+ RET
+returnPosZero:
+ MOVD $0, ret+16(FP)
+ RET