diff options
Diffstat (limited to 'src/math/cbrt_s390x.s')
-rw-r--r-- | src/math/cbrt_s390x.s | 156 |
1 files changed, 156 insertions, 0 deletions
diff --git a/src/math/cbrt_s390x.s b/src/math/cbrt_s390x.s new file mode 100644 index 0000000..87bba53 --- /dev/null +++ b/src/math/cbrt_s390x.s @@ -0,0 +1,156 @@ +// Copyright 2017 The Go Authors. All rights reserved. +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. + +#include "textflag.h" + +// Minimax polynomial coefficients and other constants +DATA ·cbrtrodataL9<> + 0(SB)/8, $-.00016272731015974436E+00 +DATA ·cbrtrodataL9<> + 8(SB)/8, $0.66639548758285293179E+00 +DATA ·cbrtrodataL9<> + 16(SB)/8, $0.55519402697349815993E+00 +DATA ·cbrtrodataL9<> + 24(SB)/8, $0.49338566048766782004E+00 +DATA ·cbrtrodataL9<> + 32(SB)/8, $0.45208160036325611486E+00 +DATA ·cbrtrodataL9<> + 40(SB)/8, $0.43099892837778637816E+00 +DATA ·cbrtrodataL9<> + 48(SB)/8, $1.000244140625 +DATA ·cbrtrodataL9<> + 56(SB)/8, $0.33333333333333333333E+00 +DATA ·cbrtrodataL9<> + 64(SB)/8, $79228162514264337593543950336. +GLOBL ·cbrtrodataL9<> + 0(SB), RODATA, $72 + +// Index tables +DATA ·cbrttab32069<> + 0(SB)/8, $0x404030303020202 +DATA ·cbrttab32069<> + 8(SB)/8, $0x101010101000000 +DATA ·cbrttab32069<> + 16(SB)/8, $0x808070706060605 +DATA ·cbrttab32069<> + 24(SB)/8, $0x505040404040303 +DATA ·cbrttab32069<> + 32(SB)/8, $0xe0d0c0c0b0b0b0a +DATA ·cbrttab32069<> + 40(SB)/8, $0xa09090908080808 +DATA ·cbrttab32069<> + 48(SB)/8, $0x11111010100f0f0f +DATA ·cbrttab32069<> + 56(SB)/8, $0xe0e0e0e0e0d0d0d +DATA ·cbrttab32069<> + 64(SB)/8, $0x1515141413131312 +DATA ·cbrttab32069<> + 72(SB)/8, $0x1212111111111010 +GLOBL ·cbrttab32069<> + 0(SB), RODATA, $80 + +DATA ·cbrttab22068<> + 0(SB)/8, $0x151015001420141 +DATA ·cbrttab22068<> + 8(SB)/8, $0x140013201310130 +DATA ·cbrttab22068<> + 16(SB)/8, $0x122012101200112 +DATA ·cbrttab22068<> + 24(SB)/8, $0x111011001020101 +DATA ·cbrttab22068<> + 32(SB)/8, $0x10000f200f100f0 +DATA ·cbrttab22068<> + 40(SB)/8, $0xe200e100e000d2 +DATA ·cbrttab22068<> + 48(SB)/8, $0xd100d000c200c1 +DATA ·cbrttab22068<> + 56(SB)/8, $0xc000b200b100b0 +DATA ·cbrttab22068<> + 64(SB)/8, $0xa200a100a00092 +DATA ·cbrttab22068<> + 72(SB)/8, $0x91009000820081 +DATA ·cbrttab22068<> + 80(SB)/8, $0x80007200710070 +DATA ·cbrttab22068<> + 88(SB)/8, $0x62006100600052 +DATA ·cbrttab22068<> + 96(SB)/8, $0x51005000420041 +DATA ·cbrttab22068<> + 104(SB)/8, $0x40003200310030 +DATA ·cbrttab22068<> + 112(SB)/8, $0x22002100200012 +DATA ·cbrttab22068<> + 120(SB)/8, $0x11001000020001 +GLOBL ·cbrttab22068<> + 0(SB), RODATA, $128 + +DATA ·cbrttab12067<> + 0(SB)/8, $0x53e1529051324fe1 +DATA ·cbrttab12067<> + 8(SB)/8, $0x4e904d324be14a90 +DATA ·cbrttab12067<> + 16(SB)/8, $0x493247e146904532 +DATA ·cbrttab12067<> + 24(SB)/8, $0x43e1429041323fe1 +DATA ·cbrttab12067<> + 32(SB)/8, $0x3e903d323be13a90 +DATA ·cbrttab12067<> + 40(SB)/8, $0x393237e136903532 +DATA ·cbrttab12067<> + 48(SB)/8, $0x33e1329031322fe1 +DATA ·cbrttab12067<> + 56(SB)/8, $0x2e902d322be12a90 +DATA ·cbrttab12067<> + 64(SB)/8, $0xd3e1d290d132cfe1 +DATA ·cbrttab12067<> + 72(SB)/8, $0xce90cd32cbe1ca90 +DATA ·cbrttab12067<> + 80(SB)/8, $0xc932c7e1c690c532 +DATA ·cbrttab12067<> + 88(SB)/8, $0xc3e1c290c132bfe1 +DATA ·cbrttab12067<> + 96(SB)/8, $0xbe90bd32bbe1ba90 +DATA ·cbrttab12067<> + 104(SB)/8, $0xb932b7e1b690b532 +DATA ·cbrttab12067<> + 112(SB)/8, $0xb3e1b290b132afe1 +DATA ·cbrttab12067<> + 120(SB)/8, $0xae90ad32abe1aa90 +GLOBL ·cbrttab12067<> + 0(SB), RODATA, $128 + +// Cbrt returns the cube root of the argument. +// +// Special cases are: +// Cbrt(±0) = ±0 +// Cbrt(±Inf) = ±Inf +// Cbrt(NaN) = NaN +// The algorithm used is minimax polynomial approximation +// with coefficients determined with a Remez exchange algorithm. + +TEXT ·cbrtAsm(SB), NOSPLIT, $0-16 + FMOVD x+0(FP), F0 + MOVD $·cbrtrodataL9<>+0(SB), R9 + LGDR F0, R2 + WORD $0xC039000F //iilf %r3,1048575 + BYTE $0xFF + BYTE $0xFF + SRAD $32, R2 + WORD $0xB9170012 //llgtr %r1,%r2 + MOVW R1, R6 + MOVW R3, R7 + CMPBLE R6, R7, L2 + WORD $0xC0397FEF //iilf %r3,2146435071 + BYTE $0xFF + BYTE $0xFF + MOVW R3, R7 + CMPBLE R6, R7, L8 +L1: + FMOVD F0, ret+8(FP) + RET +L3: +L2: + LTDBR F0, F0 + BEQ L1 + FMOVD F0, F2 + WORD $0xED209040 //mdb %f2,.L10-.L9(%r9) + BYTE $0x00 + BYTE $0x1C + MOVH $0x200, R4 + LGDR F2, R2 + SRAD $32, R2 +L4: + RISBGZ $57, $62, $39, R2, R3 + MOVD $·cbrttab12067<>+0(SB), R1 + WORD $0x48131000 //lh %r1,0(%r3,%r1) + RISBGZ $57, $62, $45, R2, R3 + MOVD $·cbrttab22068<>+0(SB), R5 + RISBGNZ $60, $63, $48, R2, R2 + WORD $0x4A135000 //ah %r1,0(%r3,%r5) + BYTE $0x18 //lr %r3,%r1 + BYTE $0x31 + MOVD $·cbrttab32069<>+0(SB), R1 + FMOVD 56(R9), F1 + FMOVD 48(R9), F5 + WORD $0xEC23393B //rosbg %r2,%r3,57,59,4 + BYTE $0x04 + BYTE $0x56 + WORD $0xE3121000 //llc %r1,0(%r2,%r1) + BYTE $0x00 + BYTE $0x94 + ADDW R3, R1 + ADDW R4, R1 + SLW $16, R1, R1 + SLD $32, R1, R1 + LDGR R1, F2 + WFMDB V2, V2, V4 + WFMDB V4, V0, V6 + WFMSDB V4, V6, V2, V4 + FMOVD 40(R9), F6 + FMSUB F1, F4, F2 + FMOVD 32(R9), F4 + WFMDB V2, V2, V3 + FMOVD 24(R9), F1 + FMUL F3, F0 + FMOVD 16(R9), F3 + WFMADB V2, V0, V5, V2 + FMOVD 8(R9), F5 + FMADD F6, F2, F4 + WFMADB V2, V1, V3, V1 + WFMDB V2, V2, V6 + FMOVD 0(R9), F3 + WFMADB V4, V6, V1, V4 + WFMADB V2, V5, V3, V2 + FMADD F4, F6, F2 + FMADD F2, F0, F0 + FMOVD F0, ret+8(FP) + RET +L8: + MOVH $0x0, R4 + BR L4 |