summaryrefslogtreecommitdiffstats
path: root/src/math/cbrt_s390x.s
diff options
context:
space:
mode:
Diffstat (limited to 'src/math/cbrt_s390x.s')
-rw-r--r--src/math/cbrt_s390x.s156
1 files changed, 156 insertions, 0 deletions
diff --git a/src/math/cbrt_s390x.s b/src/math/cbrt_s390x.s
new file mode 100644
index 0000000..87bba53
--- /dev/null
+++ b/src/math/cbrt_s390x.s
@@ -0,0 +1,156 @@
+// Copyright 2017 The Go Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+#include "textflag.h"
+
+// Minimax polynomial coefficients and other constants
+DATA ·cbrtrodataL9<> + 0(SB)/8, $-.00016272731015974436E+00
+DATA ·cbrtrodataL9<> + 8(SB)/8, $0.66639548758285293179E+00
+DATA ·cbrtrodataL9<> + 16(SB)/8, $0.55519402697349815993E+00
+DATA ·cbrtrodataL9<> + 24(SB)/8, $0.49338566048766782004E+00
+DATA ·cbrtrodataL9<> + 32(SB)/8, $0.45208160036325611486E+00
+DATA ·cbrtrodataL9<> + 40(SB)/8, $0.43099892837778637816E+00
+DATA ·cbrtrodataL9<> + 48(SB)/8, $1.000244140625
+DATA ·cbrtrodataL9<> + 56(SB)/8, $0.33333333333333333333E+00
+DATA ·cbrtrodataL9<> + 64(SB)/8, $79228162514264337593543950336.
+GLOBL ·cbrtrodataL9<> + 0(SB), RODATA, $72
+
+// Index tables
+DATA ·cbrttab32069<> + 0(SB)/8, $0x404030303020202
+DATA ·cbrttab32069<> + 8(SB)/8, $0x101010101000000
+DATA ·cbrttab32069<> + 16(SB)/8, $0x808070706060605
+DATA ·cbrttab32069<> + 24(SB)/8, $0x505040404040303
+DATA ·cbrttab32069<> + 32(SB)/8, $0xe0d0c0c0b0b0b0a
+DATA ·cbrttab32069<> + 40(SB)/8, $0xa09090908080808
+DATA ·cbrttab32069<> + 48(SB)/8, $0x11111010100f0f0f
+DATA ·cbrttab32069<> + 56(SB)/8, $0xe0e0e0e0e0d0d0d
+DATA ·cbrttab32069<> + 64(SB)/8, $0x1515141413131312
+DATA ·cbrttab32069<> + 72(SB)/8, $0x1212111111111010
+GLOBL ·cbrttab32069<> + 0(SB), RODATA, $80
+
+DATA ·cbrttab22068<> + 0(SB)/8, $0x151015001420141
+DATA ·cbrttab22068<> + 8(SB)/8, $0x140013201310130
+DATA ·cbrttab22068<> + 16(SB)/8, $0x122012101200112
+DATA ·cbrttab22068<> + 24(SB)/8, $0x111011001020101
+DATA ·cbrttab22068<> + 32(SB)/8, $0x10000f200f100f0
+DATA ·cbrttab22068<> + 40(SB)/8, $0xe200e100e000d2
+DATA ·cbrttab22068<> + 48(SB)/8, $0xd100d000c200c1
+DATA ·cbrttab22068<> + 56(SB)/8, $0xc000b200b100b0
+DATA ·cbrttab22068<> + 64(SB)/8, $0xa200a100a00092
+DATA ·cbrttab22068<> + 72(SB)/8, $0x91009000820081
+DATA ·cbrttab22068<> + 80(SB)/8, $0x80007200710070
+DATA ·cbrttab22068<> + 88(SB)/8, $0x62006100600052
+DATA ·cbrttab22068<> + 96(SB)/8, $0x51005000420041
+DATA ·cbrttab22068<> + 104(SB)/8, $0x40003200310030
+DATA ·cbrttab22068<> + 112(SB)/8, $0x22002100200012
+DATA ·cbrttab22068<> + 120(SB)/8, $0x11001000020001
+GLOBL ·cbrttab22068<> + 0(SB), RODATA, $128
+
+DATA ·cbrttab12067<> + 0(SB)/8, $0x53e1529051324fe1
+DATA ·cbrttab12067<> + 8(SB)/8, $0x4e904d324be14a90
+DATA ·cbrttab12067<> + 16(SB)/8, $0x493247e146904532
+DATA ·cbrttab12067<> + 24(SB)/8, $0x43e1429041323fe1
+DATA ·cbrttab12067<> + 32(SB)/8, $0x3e903d323be13a90
+DATA ·cbrttab12067<> + 40(SB)/8, $0x393237e136903532
+DATA ·cbrttab12067<> + 48(SB)/8, $0x33e1329031322fe1
+DATA ·cbrttab12067<> + 56(SB)/8, $0x2e902d322be12a90
+DATA ·cbrttab12067<> + 64(SB)/8, $0xd3e1d290d132cfe1
+DATA ·cbrttab12067<> + 72(SB)/8, $0xce90cd32cbe1ca90
+DATA ·cbrttab12067<> + 80(SB)/8, $0xc932c7e1c690c532
+DATA ·cbrttab12067<> + 88(SB)/8, $0xc3e1c290c132bfe1
+DATA ·cbrttab12067<> + 96(SB)/8, $0xbe90bd32bbe1ba90
+DATA ·cbrttab12067<> + 104(SB)/8, $0xb932b7e1b690b532
+DATA ·cbrttab12067<> + 112(SB)/8, $0xb3e1b290b132afe1
+DATA ·cbrttab12067<> + 120(SB)/8, $0xae90ad32abe1aa90
+GLOBL ·cbrttab12067<> + 0(SB), RODATA, $128
+
+// Cbrt returns the cube root of the argument.
+//
+// Special cases are:
+// Cbrt(±0) = ±0
+// Cbrt(±Inf) = ±Inf
+// Cbrt(NaN) = NaN
+// The algorithm used is minimax polynomial approximation
+// with coefficients determined with a Remez exchange algorithm.
+
+TEXT ·cbrtAsm(SB), NOSPLIT, $0-16
+ FMOVD x+0(FP), F0
+ MOVD $·cbrtrodataL9<>+0(SB), R9
+ LGDR F0, R2
+ WORD $0xC039000F //iilf %r3,1048575
+ BYTE $0xFF
+ BYTE $0xFF
+ SRAD $32, R2
+ WORD $0xB9170012 //llgtr %r1,%r2
+ MOVW R1, R6
+ MOVW R3, R7
+ CMPBLE R6, R7, L2
+ WORD $0xC0397FEF //iilf %r3,2146435071
+ BYTE $0xFF
+ BYTE $0xFF
+ MOVW R3, R7
+ CMPBLE R6, R7, L8
+L1:
+ FMOVD F0, ret+8(FP)
+ RET
+L3:
+L2:
+ LTDBR F0, F0
+ BEQ L1
+ FMOVD F0, F2
+ WORD $0xED209040 //mdb %f2,.L10-.L9(%r9)
+ BYTE $0x00
+ BYTE $0x1C
+ MOVH $0x200, R4
+ LGDR F2, R2
+ SRAD $32, R2
+L4:
+ RISBGZ $57, $62, $39, R2, R3
+ MOVD $·cbrttab12067<>+0(SB), R1
+ WORD $0x48131000 //lh %r1,0(%r3,%r1)
+ RISBGZ $57, $62, $45, R2, R3
+ MOVD $·cbrttab22068<>+0(SB), R5
+ RISBGNZ $60, $63, $48, R2, R2
+ WORD $0x4A135000 //ah %r1,0(%r3,%r5)
+ BYTE $0x18 //lr %r3,%r1
+ BYTE $0x31
+ MOVD $·cbrttab32069<>+0(SB), R1
+ FMOVD 56(R9), F1
+ FMOVD 48(R9), F5
+ WORD $0xEC23393B //rosbg %r2,%r3,57,59,4
+ BYTE $0x04
+ BYTE $0x56
+ WORD $0xE3121000 //llc %r1,0(%r2,%r1)
+ BYTE $0x00
+ BYTE $0x94
+ ADDW R3, R1
+ ADDW R4, R1
+ SLW $16, R1, R1
+ SLD $32, R1, R1
+ LDGR R1, F2
+ WFMDB V2, V2, V4
+ WFMDB V4, V0, V6
+ WFMSDB V4, V6, V2, V4
+ FMOVD 40(R9), F6
+ FMSUB F1, F4, F2
+ FMOVD 32(R9), F4
+ WFMDB V2, V2, V3
+ FMOVD 24(R9), F1
+ FMUL F3, F0
+ FMOVD 16(R9), F3
+ WFMADB V2, V0, V5, V2
+ FMOVD 8(R9), F5
+ FMADD F6, F2, F4
+ WFMADB V2, V1, V3, V1
+ WFMDB V2, V2, V6
+ FMOVD 0(R9), F3
+ WFMADB V4, V6, V1, V4
+ WFMADB V2, V5, V3, V2
+ FMADD F4, F6, F2
+ FMADD F2, F0, F0
+ FMOVD F0, ret+8(FP)
+ RET
+L8:
+ MOVH $0x0, R4
+ BR L4