1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
|
// Copyright 2017 The Go Authors. All rights reserved.
// Use of this source code is governed by a BSD-style
// license that can be found in the LICENSE file.
#include "textflag.h"
#define PosInf 0x7FF0000000000000
#define NegInf 0xFFF0000000000000
#define NegZero 0x8000000000000000
#define Pi 0x400921FB54442D18
#define NegPi 0xC00921FB54442D18
#define Pi3Div4 0x4002D97C7F3321D2 // 3Pi/4
#define NegPi3Div4 0xC002D97C7F3321D2 // -3Pi/4
#define PiDiv4 0x3FE921FB54442D18 // Pi/4
#define NegPiDiv4 0xBFE921FB54442D18 // -Pi/4
// Minimax polynomial coefficients and other constants
DATA ·atan2rodataL25<> + 0(SB)/8, $0.199999999999554423E+00
DATA ·atan2rodataL25<> + 8(SB)/8, $-.333333333333330928E+00
DATA ·atan2rodataL25<> + 16(SB)/8, $0.111111110136634272E+00
DATA ·atan2rodataL25<> + 24(SB)/8, $-.142857142828026806E+00
DATA ·atan2rodataL25<> + 32(SB)/8, $0.769228118888682505E-01
DATA ·atan2rodataL25<> + 40(SB)/8, $0.588059263575587687E-01
DATA ·atan2rodataL25<> + 48(SB)/8, $-.909090711945939878E-01
DATA ·atan2rodataL25<> + 56(SB)/8, $-.666641501287528609E-01
DATA ·atan2rodataL25<> + 64(SB)/8, $0.472329433805024762E-01
DATA ·atan2rodataL25<> + 72(SB)/8, $-.525380587584426406E-01
DATA ·atan2rodataL25<> + 80(SB)/8, $-.422172007412067035E-01
DATA ·atan2rodataL25<> + 88(SB)/8, $0.366935664549587481E-01
DATA ·atan2rodataL25<> + 96(SB)/8, $0.220852012160300086E-01
DATA ·atan2rodataL25<> + 104(SB)/8, $-.299856214685512712E-01
DATA ·atan2rodataL25<> + 112(SB)/8, $0.726338160757602439E-02
DATA ·atan2rodataL25<> + 120(SB)/8, $0.134893651284712515E-04
DATA ·atan2rodataL25<> + 128(SB)/8, $-.291935324869629616E-02
DATA ·atan2rodataL25<> + 136(SB)/8, $-.154797890856877418E-03
DATA ·atan2rodataL25<> + 144(SB)/8, $0.843488472994227321E-03
DATA ·atan2rodataL25<> + 152(SB)/8, $-.139950258898989925E-01
GLOBL ·atan2rodataL25<> + 0(SB), RODATA, $160
DATA ·atan2xpi2h<> + 0(SB)/8, $0x3ff330e4e4fa7b1b
DATA ·atan2xpi2h<> + 8(SB)/8, $0xbff330e4e4fa7b1b
DATA ·atan2xpi2h<> + 16(SB)/8, $0x400330e4e4fa7b1b
DATA ·atan2xpi2h<> + 24(SB)/8, $0xc00330e4e4fa7b1b
GLOBL ·atan2xpi2h<> + 0(SB), RODATA, $32
DATA ·atan2xpim<> + 0(SB)/8, $0x3ff4f42b00000000
GLOBL ·atan2xpim<> + 0(SB), RODATA, $8
// Atan2 returns the arc tangent of y/x, using
// the signs of the two to determine the quadrant
// of the return value.
//
// Special cases are (in order):
// Atan2(y, NaN) = NaN
// Atan2(NaN, x) = NaN
// Atan2(+0, x>=0) = +0
// Atan2(-0, x>=0) = -0
// Atan2(+0, x<=-0) = +Pi
// Atan2(-0, x<=-0) = -Pi
// Atan2(y>0, 0) = +Pi/2
// Atan2(y<0, 0) = -Pi/2
// Atan2(+Inf, +Inf) = +Pi/4
// Atan2(-Inf, +Inf) = -Pi/4
// Atan2(+Inf, -Inf) = 3Pi/4
// Atan2(-Inf, -Inf) = -3Pi/4
// Atan2(y, +Inf) = 0
// Atan2(y>0, -Inf) = +Pi
// Atan2(y<0, -Inf) = -Pi
// Atan2(+Inf, x) = +Pi/2
// Atan2(-Inf, x) = -Pi/2
// The algorithm used is minimax polynomial approximation
// with coefficients determined with a Remez exchange algorithm.
TEXT ·atan2Asm(SB), NOSPLIT, $0-24
// special case
MOVD x+0(FP), R1
MOVD y+8(FP), R2
// special case Atan2(NaN, y) = NaN
MOVD $~(1<<63), R5
AND R1, R5 // x = |x|
MOVD $PosInf, R3
CMPUBLT R3, R5, returnX
// special case Atan2(x, NaN) = NaN
MOVD $~(1<<63), R5
AND R2, R5
CMPUBLT R3, R5, returnY
MOVD $NegZero, R3
CMPUBEQ R3, R1, xIsNegZero
MOVD $0, R3
CMPUBEQ R3, R1, xIsPosZero
MOVD $PosInf, R4
CMPUBEQ R4, R2, yIsPosInf
MOVD $NegInf, R4
CMPUBEQ R4, R2, yIsNegInf
BR Normal
xIsNegZero:
// special case Atan(-0, y>=0) = -0
MOVD $0, R4
CMPBLE R4, R2, returnX
//special case Atan2(-0, y<=-0) = -Pi
MOVD $NegZero, R4
CMPBGE R4, R2, returnNegPi
BR Normal
xIsPosZero:
//special case Atan2(0, 0) = 0
MOVD $0, R4
CMPUBEQ R4, R2, returnX
//special case Atan2(0, y<=-0) = Pi
MOVD $NegZero, R4
CMPBGE R4, R2, returnPi
BR Normal
yIsNegInf:
//special case Atan2(+Inf, -Inf) = 3Pi/4
MOVD $PosInf, R3
CMPUBEQ R3, R1, posInfNegInf
//special case Atan2(-Inf, -Inf) = -3Pi/4
MOVD $NegInf, R3
CMPUBEQ R3, R1, negInfNegInf
BR Normal
yIsPosInf:
//special case Atan2(+Inf, +Inf) = Pi/4
MOVD $PosInf, R3
CMPUBEQ R3, R1, posInfPosInf
//special case Atan2(-Inf, +Inf) = -Pi/4
MOVD $NegInf, R3
CMPUBEQ R3, R1, negInfPosInf
//special case Atan2(x, +Inf) = Copysign(0, x)
CMPBLT R1, $0, returnNegZero
BR returnPosZero
Normal:
FMOVD x+0(FP), F0
FMOVD y+8(FP), F2
MOVD $·atan2rodataL25<>+0(SB), R9
LGDR F0, R2
LGDR F2, R1
RISBGNZ $32, $63, $32, R2, R2
RISBGNZ $32, $63, $32, R1, R1
WORD $0xB9170032 //llgtr %r3,%r2
RISBGZ $63, $63, $33, R2, R5
WORD $0xB9170041 //llgtr %r4,%r1
WFLCDB V0, V20
MOVW R4, R6
MOVW R3, R7
CMPUBLT R6, R7, L17
WFDDB V2, V0, V3
ADDW $2, R5, R2
MOVW R4, R6
MOVW R3, R7
CMPUBLE R6, R7, L20
L3:
WFMDB V3, V3, V4
VLEG $0, 152(R9), V18
VLEG $0, 144(R9), V16
FMOVD 136(R9), F1
FMOVD 128(R9), F5
FMOVD 120(R9), F6
WFMADB V4, V16, V5, V16
WFMADB V4, V6, V1, V6
FMOVD 112(R9), F7
WFMDB V4, V4, V1
WFMADB V4, V7, V18, V7
VLEG $0, 104(R9), V18
WFMADB V1, V6, V16, V6
CMPWU R4, R3
FMOVD 96(R9), F5
VLEG $0, 88(R9), V16
WFMADB V4, V5, V18, V5
VLEG $0, 80(R9), V18
VLEG $0, 72(R9), V22
WFMADB V4, V16, V18, V16
VLEG $0, 64(R9), V18
WFMADB V1, V7, V5, V7
WFMADB V4, V18, V22, V18
WFMDB V1, V1, V5
WFMADB V1, V16, V18, V16
VLEG $0, 56(R9), V18
WFMADB V5, V6, V7, V6
VLEG $0, 48(R9), V22
FMOVD 40(R9), F7
WFMADB V4, V7, V18, V7
VLEG $0, 32(R9), V18
WFMADB V5, V6, V16, V6
WFMADB V4, V18, V22, V18
VLEG $0, 24(R9), V16
WFMADB V1, V7, V18, V7
VLEG $0, 16(R9), V18
VLEG $0, 8(R9), V22
WFMADB V4, V18, V16, V18
VLEG $0, 0(R9), V16
WFMADB V5, V6, V7, V6
WFMADB V4, V16, V22, V16
FMUL F3, F4
WFMADB V1, V18, V16, V1
FMADD F6, F5, F1
WFMADB V4, V1, V3, V4
BLT L18
BGT L7
LTDBR F2, F2
BLTU L21
L8:
LTDBR F0, F0
BLTU L22
L9:
WFCHDBS V2, V0, V0
BNE L18
L7:
MOVW R1, R6
CMPBGE R6, $0, L1
L18:
RISBGZ $58, $60, $3, R2, R2
MOVD $·atan2xpi2h<>+0(SB), R1
MOVD ·atan2xpim<>+0(SB), R3
LDGR R3, F0
WORD $0xED021000 //madb %f4,%f0,0(%r2,%r1)
BYTE $0x40
BYTE $0x1E
L1:
FMOVD F4, ret+16(FP)
RET
L20:
LTDBR F2, F2
BLTU L23
FMOVD F2, F6
L4:
LTDBR F0, F0
BLTU L24
FMOVD F0, F4
L5:
WFCHDBS V6, V4, V4
BEQ L3
L17:
WFDDB V0, V2, V4
BYTE $0x18 //lr %r2,%r5
BYTE $0x25
WORD $0xB3130034 //lcdbr %f3,%f4
BR L3
L23:
WORD $0xB3130062 //lcdbr %f6,%f2
BR L4
L22:
VLR V20, V0
BR L9
L21:
WORD $0xB3130022 //lcdbr %f2,%f2
BR L8
L24:
VLR V20, V4
BR L5
returnX: //the result is same as the first argument
MOVD R1, ret+16(FP)
RET
returnY: //the result is same as the second argument
MOVD R2, ret+16(FP)
RET
returnPi:
MOVD $Pi, R1
MOVD R1, ret+16(FP)
RET
returnNegPi:
MOVD $NegPi, R1
MOVD R1, ret+16(FP)
RET
posInfNegInf:
MOVD $Pi3Div4, R1
MOVD R1, ret+16(FP)
RET
negInfNegInf:
MOVD $NegPi3Div4, R1
MOVD R1, ret+16(FP)
RET
posInfPosInf:
MOVD $PiDiv4, R1
MOVD R1, ret+16(FP)
RET
negInfPosInf:
MOVD $NegPiDiv4, R1
MOVD R1, ret+16(FP)
RET
returnNegZero:
MOVD $NegZero, R1
MOVD R1, ret+16(FP)
RET
returnPosZero:
MOVD $0, ret+16(FP)
RET
|