diff options
Diffstat (limited to 'src/math')
215 files changed, 48092 insertions, 0 deletions
diff --git a/src/math/abs.go b/src/math/abs.go new file mode 100644 index 0000000..df83add --- /dev/null +++ b/src/math/abs.go @@ -0,0 +1,14 @@ +// Copyright 2009 The Go Authors. All rights reserved. +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. + +package math + +// Abs returns the absolute value of x. +// +// Special cases are: +// Abs(±Inf) = +Inf +// Abs(NaN) = NaN +func Abs(x float64) float64 { + return Float64frombits(Float64bits(x) &^ (1 << 63)) +} diff --git a/src/math/acos_s390x.s b/src/math/acos_s390x.s new file mode 100644 index 0000000..d2288b8 --- /dev/null +++ b/src/math/acos_s390x.s @@ -0,0 +1,144 @@ +// Copyright 2017 The Go Authors. All rights reserved. +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. + +#include "textflag.h" + +// Minimax polynomial coefficients and other constants +DATA ·acosrodataL13<> + 0(SB)/8, $0.314159265358979323E+01 //pi +DATA ·acosrodataL13<> + 8(SB)/8, $-0.0 +DATA ·acosrodataL13<> + 16(SB)/8, $0x7ff8000000000000 //Nan +DATA ·acosrodataL13<> + 24(SB)/8, $-1.0 +DATA ·acosrodataL13<> + 32(SB)/8, $1.0 +DATA ·acosrodataL13<> + 40(SB)/8, $0.166666666666651626E+00 +DATA ·acosrodataL13<> + 48(SB)/8, $0.750000000042621169E-01 +DATA ·acosrodataL13<> + 56(SB)/8, $0.446428567178116477E-01 +DATA ·acosrodataL13<> + 64(SB)/8, $0.303819660378071894E-01 +DATA ·acosrodataL13<> + 72(SB)/8, $0.223715011892010405E-01 +DATA ·acosrodataL13<> + 80(SB)/8, $0.173659424522364952E-01 +DATA ·acosrodataL13<> + 88(SB)/8, $0.137810186504372266E-01 +DATA ·acosrodataL13<> + 96(SB)/8, $0.134066870961173521E-01 +DATA ·acosrodataL13<> + 104(SB)/8, $-.412335502831898721E-02 +DATA ·acosrodataL13<> + 112(SB)/8, $0.867383739532082719E-01 +DATA ·acosrodataL13<> + 120(SB)/8, $-.328765950607171649E+00 +DATA ·acosrodataL13<> + 128(SB)/8, $0.110401073869414626E+01 +DATA ·acosrodataL13<> + 136(SB)/8, $-.270694366992537307E+01 +DATA ·acosrodataL13<> + 144(SB)/8, $0.500196500770928669E+01 +DATA ·acosrodataL13<> + 152(SB)/8, $-.665866959108585165E+01 +DATA ·acosrodataL13<> + 160(SB)/8, $-.344895269334086578E+01 +DATA ·acosrodataL13<> + 168(SB)/8, $0.927437952918301659E+00 +DATA ·acosrodataL13<> + 176(SB)/8, $0.610487478874645653E+01 +DATA ·acosrodataL13<> + 184(SB)/8, $0.157079632679489656e+01 +DATA ·acosrodataL13<> + 192(SB)/8, $0.0 +GLOBL ·acosrodataL13<> + 0(SB), RODATA, $200 + +// Acos returns the arccosine, in radians, of the argument. +// +// Special case is: +// Acos(x) = NaN if x < -1 or x > 1 +// The algorithm used is minimax polynomial approximation +// with coefficients determined with a Remez exchange algorithm. + +TEXT ·acosAsm(SB), NOSPLIT, $0-16 + FMOVD x+0(FP), F0 + MOVD $·acosrodataL13<>+0(SB), R9 + LGDR F0, R12 + FMOVD F0, F10 + SRAD $32, R12 + WORD $0xC0293FE6 //iilf %r2,1072079005 + BYTE $0xA0 + BYTE $0x9D + WORD $0xB917001C //llgtr %r1,%r12 + CMPW R1,R2 + BGT L2 + FMOVD 192(R9), F8 + FMADD F0, F0, F8 + FMOVD 184(R9), F1 +L3: + WFMDB V8, V8, V2 + FMOVD 176(R9), F6 + FMOVD 168(R9), F0 + FMOVD 160(R9), F4 + WFMADB V2, V0, V6, V0 + FMOVD 152(R9), F6 + WFMADB V2, V4, V6, V4 + FMOVD 144(R9), F6 + WFMADB V2, V0, V6, V0 + FMOVD 136(R9), F6 + WFMADB V2, V4, V6, V4 + FMOVD 128(R9), F6 + WFMADB V2, V0, V6, V0 + FMOVD 120(R9), F6 + WFMADB V2, V4, V6, V4 + FMOVD 112(R9), F6 + WFMADB V2, V0, V6, V0 + FMOVD 104(R9), F6 + WFMADB V2, V4, V6, V4 + FMOVD 96(R9), F6 + WFMADB V2, V0, V6, V0 + FMOVD 88(R9), F6 + WFMADB V2, V4, V6, V4 + FMOVD 80(R9), F6 + WFMADB V2, V0, V6, V0 + FMOVD 72(R9), F6 + WFMADB V2, V4, V6, V4 + FMOVD 64(R9), F6 + WFMADB V2, V0, V6, V0 + FMOVD 56(R9), F6 + WFMADB V2, V4, V6, V4 + FMOVD 48(R9), F6 + WFMADB V2, V0, V6, V0 + FMOVD 40(R9), F6 + WFMADB V2, V4, V6, V2 + FMOVD 192(R9), F4 + WFMADB V8, V0, V2, V0 + WFMADB V10, V8, V4, V8 + FMADD F0, F8, F10 + WFSDB V10, V1, V10 +L1: + FMOVD F10, ret+8(FP) + RET + +L2: + WORD $0xC0293FEF //iilf %r2,1072693247 + BYTE $0xFF + BYTE $0xFF + CMPW R1, R2 + BLE L12 +L4: + WORD $0xED009020 //cdb %f0,.L34-.L13(%r9) + BYTE $0x00 + BYTE $0x19 + BEQ L8 + WORD $0xED009018 //cdb %f0,.L35-.L13(%r9) + BYTE $0x00 + BYTE $0x19 + BEQ L9 + WFCEDBS V10, V10, V0 + BVS L1 + FMOVD 16(R9), F10 + BR L1 +L12: + FMOVD 24(R9), F0 + FMADD F10, F10, F0 + WORD $0xB3130080 //lcdbr %f8,%f0 + WORD $0xED009008 //cdb %f0,.L37-.L13(%r9) + BYTE $0x00 + BYTE $0x19 + FSQRT F8, F10 +L5: + MOVW R12, R4 + CMPBLE R4, $0, L7 + WORD $0xB31300AA //lcdbr %f10,%f10 + FMOVD $0, F1 + BR L3 +L9: + FMOVD 0(R9), F10 + BR L1 +L8: + FMOVD $0, F0 + FMOVD F0, ret+8(FP) + RET +L7: + FMOVD 0(R9), F1 + BR L3 diff --git a/src/math/acosh.go b/src/math/acosh.go new file mode 100644 index 0000000..f74e0b6 --- /dev/null +++ b/src/math/acosh.go @@ -0,0 +1,64 @@ +// Copyright 2010 The Go Authors. All rights reserved. +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. + +package math + +// The original C code, the long comment, and the constants +// below are from FreeBSD's /usr/src/lib/msun/src/e_acosh.c +// and came with this notice. The go code is a simplified +// version of the original C. +// +// ==================================================== +// Copyright (C) 1993 by Sun Microsystems, Inc. All rights reserved. +// +// Developed at SunPro, a Sun Microsystems, Inc. business. +// Permission to use, copy, modify, and distribute this +// software is freely granted, provided that this notice +// is preserved. +// ==================================================== +// +// +// __ieee754_acosh(x) +// Method : +// Based on +// acosh(x) = log [ x + sqrt(x*x-1) ] +// we have +// acosh(x) := log(x)+ln2, if x is large; else +// acosh(x) := log(2x-1/(sqrt(x*x-1)+x)) if x>2; else +// acosh(x) := log1p(t+sqrt(2.0*t+t*t)); where t=x-1. +// +// Special cases: +// acosh(x) is NaN with signal if x<1. +// acosh(NaN) is NaN without signal. +// + +// Acosh returns the inverse hyperbolic cosine of x. +// +// Special cases are: +// Acosh(+Inf) = +Inf +// Acosh(x) = NaN if x < 1 +// Acosh(NaN) = NaN +func Acosh(x float64) float64 { + if haveArchAcosh { + return archAcosh(x) + } + return acosh(x) +} + +func acosh(x float64) float64 { + const Large = 1 << 28 // 2**28 + // first case is special case + switch { + case x < 1 || IsNaN(x): + return NaN() + case x == 1: + return 0 + case x >= Large: + return Log(x) + Ln2 // x > 2**28 + case x > 2: + return Log(2*x - 1/(x+Sqrt(x*x-1))) // 2**28 > x > 2 + } + t := x - 1 + return Log1p(t + Sqrt(2*t+t*t)) // 2 >= x > 1 +} diff --git a/src/math/acosh_s390x.s b/src/math/acosh_s390x.s new file mode 100644 index 0000000..9294c48 --- /dev/null +++ b/src/math/acosh_s390x.s @@ -0,0 +1,158 @@ +// Copyright 2017 The Go Authors. All rights reserved. +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. + +#include "textflag.h" + +// Minimax polynomial coefficients and other constants +DATA ·acoshrodataL11<> + 0(SB)/8, $-1.0 +DATA ·acoshrodataL11<> + 8(SB)/8, $.41375273347623353626 +DATA ·acoshrodataL11<> + 16(SB)/8, $.51487302528619766235E+04 +DATA ·acoshrodataL11<> + 24(SB)/8, $-1.67526912689208984375 +DATA ·acoshrodataL11<> + 32(SB)/8, $0.181818181818181826E+00 +DATA ·acoshrodataL11<> + 40(SB)/8, $-.165289256198351540E-01 +DATA ·acoshrodataL11<> + 48(SB)/8, $0.200350613573012186E-02 +DATA ·acoshrodataL11<> + 56(SB)/8, $-.273205381970859341E-03 +DATA ·acoshrodataL11<> + 64(SB)/8, $0.397389654305194527E-04 +DATA ·acoshrodataL11<> + 72(SB)/8, $0.938370938292558173E-06 +DATA ·acoshrodataL11<> + 80(SB)/8, $-.602107458843052029E-05 +DATA ·acoshrodataL11<> + 88(SB)/8, $0.212881813645679599E-07 +DATA ·acoshrodataL11<> + 96(SB)/8, $-.148682720127920854E-06 +DATA ·acoshrodataL11<> + 104(SB)/8, $-5.5 +DATA ·acoshrodataL11<> + 112(SB)/8, $0x7ff8000000000000 //Nan +GLOBL ·acoshrodataL11<> + 0(SB), RODATA, $120 + +// Table of log correction terms +DATA ·acoshtab2068<> + 0(SB)/8, $0.585235384085551248E-01 +DATA ·acoshtab2068<> + 8(SB)/8, $0.412206153771168640E-01 +DATA ·acoshtab2068<> + 16(SB)/8, $0.273839003221648339E-01 +DATA ·acoshtab2068<> + 24(SB)/8, $0.166383778368856480E-01 +DATA ·acoshtab2068<> + 32(SB)/8, $0.866678223433169637E-02 +DATA ·acoshtab2068<> + 40(SB)/8, $0.319831684989627514E-02 +DATA ·acoshtab2068<> + 48(SB)/8, $0.0 +DATA ·acoshtab2068<> + 56(SB)/8, $-.113006378583725549E-02 +DATA ·acoshtab2068<> + 64(SB)/8, $-.367979419636602491E-03 +DATA ·acoshtab2068<> + 72(SB)/8, $0.213172484510484979E-02 +DATA ·acoshtab2068<> + 80(SB)/8, $0.623271047682013536E-02 +DATA ·acoshtab2068<> + 88(SB)/8, $0.118140812789696885E-01 +DATA ·acoshtab2068<> + 96(SB)/8, $0.187681358930914206E-01 +DATA ·acoshtab2068<> + 104(SB)/8, $0.269985148668178992E-01 +DATA ·acoshtab2068<> + 112(SB)/8, $0.364186619761331328E-01 +DATA ·acoshtab2068<> + 120(SB)/8, $0.469505379381388441E-01 +GLOBL ·acoshtab2068<> + 0(SB), RODATA, $128 + +// Acosh returns the inverse hyperbolic cosine of the argument. +// +// Special cases are: +// Acosh(+Inf) = +Inf +// Acosh(x) = NaN if x < 1 +// Acosh(NaN) = NaN +// The algorithm used is minimax polynomial approximation +// with coefficients determined with a Remez exchange algorithm. + +TEXT ·acoshAsm(SB), NOSPLIT, $0-16 + FMOVD x+0(FP), F0 + MOVD $·acoshrodataL11<>+0(SB), R9 + LGDR F0, R1 + WORD $0xC0295FEF //iilf %r2,1609564159 + BYTE $0xFF + BYTE $0xFF + SRAD $32, R1 + CMPW R1, R2 + BGT L2 + WORD $0xC0293FEF //iilf %r2,1072693247 + BYTE $0xFF + BYTE $0xFF + CMPW R1, R2 + BGT L10 +L3: + WFCEDBS V0, V0, V2 + BVS L1 + FMOVD 112(R9), F0 +L1: + FMOVD F0, ret+8(FP) + RET +L2: + WORD $0xC0297FEF //iilf %r2,2146435071 + BYTE $0xFF + BYTE $0xFF + MOVW R1, R6 + MOVW R2, R7 + CMPBGT R6, R7, L1 + FMOVD F0, F8 + FMOVD $0, F0 + WFADB V0, V8, V0 + WORD $0xC0398006 //iilf %r3,2147909631 + BYTE $0x7F + BYTE $0xFF + LGDR F0, R5 + SRAD $32, R5 + MOVH $0x0, R1 + SUBW R5, R3 + FMOVD $0, F10 + RISBGZ $32, $47, $0, R3, R4 + RISBGZ $57, $60, $51, R3, R3 + BYTE $0x18 //lr %r2,%r4 + BYTE $0x24 + RISBGN $0, $31, $32, R4, R1 + SUBW $0x100000, R2 + SRAW $8, R2, R2 + ORW $0x45000000, R2 +L5: + LDGR R1, F0 + FMOVD 104(R9), F2 + FMADD F8, F0, F2 + FMOVD 96(R9), F4 + WFMADB V10, V0, V2, V0 + FMOVD 88(R9), F6 + FMOVD 80(R9), F2 + WFMADB V0, V6, V4, V6 + FMOVD 72(R9), F1 + WFMDB V0, V0, V4 + WFMADB V0, V1, V2, V1 + FMOVD 64(R9), F2 + WFMADB V6, V4, V1, V6 + FMOVD 56(R9), F1 + RISBGZ $57, $60, $0, R3, R3 + WFMADB V0, V2, V1, V2 + FMOVD 48(R9), F1 + WFMADB V4, V6, V2, V6 + FMOVD 40(R9), F2 + WFMADB V0, V1, V2, V1 + VLVGF $0, R2, V2 + WFMADB V4, V6, V1, V4 + LDEBR F2, F2 + FMOVD 32(R9), F6 + WFMADB V0, V4, V6, V4 + FMOVD 24(R9), F1 + FMOVD 16(R9), F6 + MOVD $·acoshtab2068<>+0(SB), R1 + WFMADB V2, V1, V6, V2 + FMOVD 0(R3)(R1*1), F3 + WFMADB V0, V4, V3, V0 + FMOVD 8(R9), F4 + FMADD F4, F2, F0 + FMOVD F0, ret+8(FP) + RET +L10: + FMOVD F0, F8 + FMOVD 0(R9), F0 + FMADD F8, F8, F0 + LTDBR F0, F0 + FSQRT F0, F10 +L4: + WFADB V10, V8, V0 + WORD $0xC0398006 //iilf %r3,2147909631 + BYTE $0x7F + BYTE $0xFF + LGDR F0, R5 + SRAD $32, R5 + MOVH $0x0, R1 + SUBW R5, R3 + SRAW $8, R3, R2 + RISBGZ $32, $47, $0, R3, R4 + ANDW $0xFFFFFF00, R2 + RISBGZ $57, $60, $51, R3, R3 + ORW $0x45000000, R2 + RISBGN $0, $31, $32, R4, R1 + BR L5 diff --git a/src/math/all_test.go b/src/math/all_test.go new file mode 100644 index 0000000..c11d823 --- /dev/null +++ b/src/math/all_test.go @@ -0,0 +1,3855 @@ +// Copyright 2009 The Go Authors. All rights reserved. +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. + +package math_test + +import ( + "fmt" + . "math" + "testing" + "unsafe" +) + +var vf = []float64{ + 4.9790119248836735e+00, + 7.7388724745781045e+00, + -2.7688005719200159e-01, + -5.0106036182710749e+00, + 9.6362937071984173e+00, + 2.9263772392439646e+00, + 5.2290834314593066e+00, + 2.7279399104360102e+00, + 1.8253080916808550e+00, + -8.6859247685756013e+00, +} + +// The expected results below were computed by the high precision calculators +// at https://keisan.casio.com/. More exact input values (array vf[], above) +// were obtained by printing them with "%.26f". The answers were calculated +// to 26 digits (by using the "Digit number" drop-down control of each +// calculator). +var acos = []float64{ + 1.0496193546107222142571536e+00, + 6.8584012813664425171660692e-01, + 1.5984878714577160325521819e+00, + 2.0956199361475859327461799e+00, + 2.7053008467824138592616927e-01, + 1.2738121680361776018155625e+00, + 1.0205369421140629186287407e+00, + 1.2945003481781246062157835e+00, + 1.3872364345374451433846657e+00, + 2.6231510803970463967294145e+00, +} +var acosh = []float64{ + 2.4743347004159012494457618e+00, + 2.8576385344292769649802701e+00, + 7.2796961502981066190593175e-01, + 2.4796794418831451156471977e+00, + 3.0552020742306061857212962e+00, + 2.044238592688586588942468e+00, + 2.5158701513104513595766636e+00, + 1.99050839282411638174299e+00, + 1.6988625798424034227205445e+00, + 2.9611454842470387925531875e+00, +} +var asin = []float64{ + 5.2117697218417440497416805e-01, + 8.8495619865825236751471477e-01, + -02.769154466281941332086016e-02, + -5.2482360935268931351485822e-01, + 1.3002662421166552333051524e+00, + 2.9698415875871901741575922e-01, + 5.5025938468083370060258102e-01, + 2.7629597861677201301553823e-01, + 1.83559892257451475846656e-01, + -1.0523547536021497774980928e+00, +} +var asinh = []float64{ + 2.3083139124923523427628243e+00, + 2.743551594301593620039021e+00, + -2.7345908534880091229413487e-01, + -2.3145157644718338650499085e+00, + 2.9613652154015058521951083e+00, + 1.7949041616585821933067568e+00, + 2.3564032905983506405561554e+00, + 1.7287118790768438878045346e+00, + 1.3626658083714826013073193e+00, + -2.8581483626513914445234004e+00, +} +var atan = []float64{ + 1.372590262129621651920085e+00, + 1.442290609645298083020664e+00, + -2.7011324359471758245192595e-01, + -1.3738077684543379452781531e+00, + 1.4673921193587666049154681e+00, + 1.2415173565870168649117764e+00, + 1.3818396865615168979966498e+00, + 1.2194305844639670701091426e+00, + 1.0696031952318783760193244e+00, + -1.4561721938838084990898679e+00, +} +var atanh = []float64{ + 5.4651163712251938116878204e-01, + 1.0299474112843111224914709e+00, + -2.7695084420740135145234906e-02, + -5.5072096119207195480202529e-01, + 1.9943940993171843235906642e+00, + 3.01448604578089708203017e-01, + 5.8033427206942188834370595e-01, + 2.7987997499441511013958297e-01, + 1.8459947964298794318714228e-01, + -1.3273186910532645867272502e+00, +} +var atan2 = []float64{ + 1.1088291730037004444527075e+00, + 9.1218183188715804018797795e-01, + 1.5984772603216203736068915e+00, + 2.0352918654092086637227327e+00, + 8.0391819139044720267356014e-01, + 1.2861075249894661588866752e+00, + 1.0889904479131695712182587e+00, + 1.3044821793397925293797357e+00, + 1.3902530903455392306872261e+00, + 2.2859857424479142655411058e+00, +} +var cbrt = []float64{ + 1.7075799841925094446722675e+00, + 1.9779982212970353936691498e+00, + -6.5177429017779910853339447e-01, + -1.7111838886544019873338113e+00, + 2.1279920909827937423960472e+00, + 1.4303536770460741452312367e+00, + 1.7357021059106154902341052e+00, + 1.3972633462554328350552916e+00, + 1.2221149580905388454977636e+00, + -2.0556003730500069110343596e+00, +} +var ceil = []float64{ + 5.0000000000000000e+00, + 8.0000000000000000e+00, + Copysign(0, -1), + -5.0000000000000000e+00, + 1.0000000000000000e+01, + 3.0000000000000000e+00, + 6.0000000000000000e+00, + 3.0000000000000000e+00, + 2.0000000000000000e+00, + -8.0000000000000000e+00, +} +var copysign = []float64{ + -4.9790119248836735e+00, + -7.7388724745781045e+00, + -2.7688005719200159e-01, + -5.0106036182710749e+00, + -9.6362937071984173e+00, + -2.9263772392439646e+00, + -5.2290834314593066e+00, + -2.7279399104360102e+00, + -1.8253080916808550e+00, + -8.6859247685756013e+00, +} +var cos = []float64{ + 2.634752140995199110787593e-01, + 1.148551260848219865642039e-01, + 9.6191297325640768154550453e-01, + 2.938141150061714816890637e-01, + -9.777138189897924126294461e-01, + -9.7693041344303219127199518e-01, + 4.940088096948647263961162e-01, + -9.1565869021018925545016502e-01, + -2.517729313893103197176091e-01, + -7.39241351595676573201918e-01, +} + +// Results for 100000 * Pi + vf[i] +var cosLarge = []float64{ + 2.634752141185559426744e-01, + 1.14855126055543100712e-01, + 9.61912973266488928113e-01, + 2.9381411499556122552e-01, + -9.777138189880161924641e-01, + -9.76930413445147608049e-01, + 4.940088097314976789841e-01, + -9.15658690217517835002e-01, + -2.51772931436786954751e-01, + -7.3924135157173099849e-01, +} + +var cosh = []float64{ + 7.2668796942212842775517446e+01, + 1.1479413465659254502011135e+03, + 1.0385767908766418550935495e+00, + 7.5000957789658051428857788e+01, + 7.655246669605357888468613e+03, + 9.3567491758321272072888257e+00, + 9.331351599270605471131735e+01, + 7.6833430994624643209296404e+00, + 3.1829371625150718153881164e+00, + 2.9595059261916188501640911e+03, +} +var erf = []float64{ + 5.1865354817738701906913566e-01, + 7.2623875834137295116929844e-01, + -3.123458688281309990629839e-02, + -5.2143121110253302920437013e-01, + 8.2704742671312902508629582e-01, + 3.2101767558376376743993945e-01, + 5.403990312223245516066252e-01, + 3.0034702916738588551174831e-01, + 2.0369924417882241241559589e-01, + -7.8069386968009226729944677e-01, +} +var erfc = []float64{ + 4.8134645182261298093086434e-01, + 2.7376124165862704883070156e-01, + 1.0312345868828130999062984e+00, + 1.5214312111025330292043701e+00, + 1.7295257328687097491370418e-01, + 6.7898232441623623256006055e-01, + 4.596009687776754483933748e-01, + 6.9965297083261411448825169e-01, + 7.9630075582117758758440411e-01, + 1.7806938696800922672994468e+00, +} +var erfinv = []float64{ + 4.746037673358033586786350696e-01, + 8.559054432692110956388764172e-01, + -2.45427830571707336251331946e-02, + -4.78116683518973366268905506e-01, + 1.479804430319470983648120853e+00, + 2.654485787128896161882650211e-01, + 5.027444534221520197823192493e-01, + 2.466703532707627818954585670e-01, + 1.632011465103005426240343116e-01, + -1.06672334642196900710000389e+00, +} +var exp = []float64{ + 1.4533071302642137507696589e+02, + 2.2958822575694449002537581e+03, + 7.5814542574851666582042306e-01, + 6.6668778421791005061482264e-03, + 1.5310493273896033740861206e+04, + 1.8659907517999328638667732e+01, + 1.8662167355098714543942057e+02, + 1.5301332413189378961665788e+01, + 6.2047063430646876349125085e+00, + 1.6894712385826521111610438e-04, +} +var expm1 = []float64{ + 5.105047796122957327384770212e-02, + 8.046199708567344080562675439e-02, + -2.764970978891639815187418703e-03, + -4.8871434888875355394330300273e-02, + 1.0115864277221467777117227494e-01, + 2.969616407795910726014621657e-02, + 5.368214487944892300914037972e-02, + 2.765488851131274068067445335e-02, + 1.842068661871398836913874273e-02, + -8.3193870863553801814961137573e-02, +} +var expm1Large = []float64{ + 4.2031418113550844e+21, + 4.0690789717473863e+33, + -0.9372627915981363e+00, + -1.0, + 7.077694784145933e+41, + 5.117936223839153e+12, + 5.124137759001189e+22, + 7.03546003972584e+11, + 8.456921800389698e+07, + -1.0, +} +var exp2 = []float64{ + 3.1537839463286288034313104e+01, + 2.1361549283756232296144849e+02, + 8.2537402562185562902577219e-01, + 3.1021158628740294833424229e-02, + 7.9581744110252191462569661e+02, + 7.6019905892596359262696423e+00, + 3.7506882048388096973183084e+01, + 6.6250893439173561733216375e+00, + 3.5438267900243941544605339e+00, + 2.4281533133513300984289196e-03, +} +var fabs = []float64{ + 4.9790119248836735e+00, + 7.7388724745781045e+00, + 2.7688005719200159e-01, + 5.0106036182710749e+00, + 9.6362937071984173e+00, + 2.9263772392439646e+00, + 5.2290834314593066e+00, + 2.7279399104360102e+00, + 1.8253080916808550e+00, + 8.6859247685756013e+00, +} +var fdim = []float64{ + 4.9790119248836735e+00, + 7.7388724745781045e+00, + 0.0000000000000000e+00, + 0.0000000000000000e+00, + 9.6362937071984173e+00, + 2.9263772392439646e+00, + 5.2290834314593066e+00, + 2.7279399104360102e+00, + 1.8253080916808550e+00, + 0.0000000000000000e+00, +} +var floor = []float64{ + 4.0000000000000000e+00, + 7.0000000000000000e+00, + -1.0000000000000000e+00, + -6.0000000000000000e+00, + 9.0000000000000000e+00, + 2.0000000000000000e+00, + 5.0000000000000000e+00, + 2.0000000000000000e+00, + 1.0000000000000000e+00, + -9.0000000000000000e+00, +} +var fmod = []float64{ + 4.197615023265299782906368e-02, + 2.261127525421895434476482e+00, + 3.231794108794261433104108e-02, + 4.989396381728925078391512e+00, + 3.637062928015826201999516e-01, + 1.220868282268106064236690e+00, + 4.770916568540693347699744e+00, + 1.816180268691969246219742e+00, + 8.734595415957246977711748e-01, + 1.314075231424398637614104e+00, +} + +type fi struct { + f float64 + i int +} + +var frexp = []fi{ + {6.2237649061045918750e-01, 3}, + {9.6735905932226306250e-01, 3}, + {-5.5376011438400318000e-01, -1}, + {-6.2632545228388436250e-01, 3}, + {6.02268356699901081250e-01, 4}, + {7.3159430981099115000e-01, 2}, + {6.5363542893241332500e-01, 3}, + {6.8198497760900255000e-01, 2}, + {9.1265404584042750000e-01, 1}, + {-5.4287029803597508250e-01, 4}, +} +var gamma = []float64{ + 2.3254348370739963835386613898e+01, + 2.991153837155317076427529816e+03, + -4.561154336726758060575129109e+00, + 7.719403468842639065959210984e-01, + 1.6111876618855418534325755566e+05, + 1.8706575145216421164173224946e+00, + 3.4082787447257502836734201635e+01, + 1.579733951448952054898583387e+00, + 9.3834586598354592860187267089e-01, + -2.093995902923148389186189429e-05, +} +var j0 = []float64{ + -1.8444682230601672018219338e-01, + 2.27353668906331975435892e-01, + 9.809259936157051116270273e-01, + -1.741170131426226587841181e-01, + -2.1389448451144143352039069e-01, + -2.340905848928038763337414e-01, + -1.0029099691890912094586326e-01, + -1.5466726714884328135358907e-01, + 3.252650187653420388714693e-01, + -8.72218484409407250005360235e-03, +} +var j1 = []float64{ + -3.251526395295203422162967e-01, + 1.893581711430515718062564e-01, + -1.3711761352467242914491514e-01, + 3.287486536269617297529617e-01, + 1.3133899188830978473849215e-01, + 3.660243417832986825301766e-01, + -3.4436769271848174665420672e-01, + 4.329481396640773768835036e-01, + 5.8181350531954794639333955e-01, + -2.7030574577733036112996607e-01, +} +var j2 = []float64{ + 5.3837518920137802565192769e-02, + -1.7841678003393207281244667e-01, + 9.521746934916464142495821e-03, + 4.28958355470987397983072e-02, + 2.4115371837854494725492872e-01, + 4.842458532394520316844449e-01, + -3.142145220618633390125946e-02, + 4.720849184745124761189957e-01, + 3.122312022520957042957497e-01, + 7.096213118930231185707277e-02, +} +var jM3 = []float64{ + -3.684042080996403091021151e-01, + 2.8157665936340887268092661e-01, + 4.401005480841948348343589e-04, + 3.629926999056814081597135e-01, + 3.123672198825455192489266e-02, + -2.958805510589623607540455e-01, + -3.2033177696533233403289416e-01, + -2.592737332129663376736604e-01, + -1.0241334641061485092351251e-01, + -2.3762660886100206491674503e-01, +} +var lgamma = []fi{ + {3.146492141244545774319734e+00, 1}, + {8.003414490659126375852113e+00, 1}, + {1.517575735509779707488106e+00, -1}, + {-2.588480028182145853558748e-01, 1}, + {1.1989897050205555002007985e+01, 1}, + {6.262899811091257519386906e-01, 1}, + {3.5287924899091566764846037e+00, 1}, + {4.5725644770161182299423372e-01, 1}, + {-6.363667087767961257654854e-02, 1}, + {-1.077385130910300066425564e+01, -1}, +} +var log = []float64{ + 1.605231462693062999102599e+00, + 2.0462560018708770653153909e+00, + -1.2841708730962657801275038e+00, + 1.6115563905281545116286206e+00, + 2.2655365644872016636317461e+00, + 1.0737652208918379856272735e+00, + 1.6542360106073546632707956e+00, + 1.0035467127723465801264487e+00, + 6.0174879014578057187016475e-01, + 2.161703872847352815363655e+00, +} +var logb = []float64{ + 2.0000000000000000e+00, + 2.0000000000000000e+00, + -2.0000000000000000e+00, + 2.0000000000000000e+00, + 3.0000000000000000e+00, + 1.0000000000000000e+00, + 2.0000000000000000e+00, + 1.0000000000000000e+00, + 0.0000000000000000e+00, + 3.0000000000000000e+00, +} +var log10 = []float64{ + 6.9714316642508290997617083e-01, + 8.886776901739320576279124e-01, + -5.5770832400658929815908236e-01, + 6.998900476822994346229723e-01, + 9.8391002850684232013281033e-01, + 4.6633031029295153334285302e-01, + 7.1842557117242328821552533e-01, + 4.3583479968917773161304553e-01, + 2.6133617905227038228626834e-01, + 9.3881606348649405716214241e-01, +} +var log1p = []float64{ + 4.8590257759797794104158205e-02, + 7.4540265965225865330849141e-02, + -2.7726407903942672823234024e-03, + -5.1404917651627649094953380e-02, + 9.1998280672258624681335010e-02, + 2.8843762576593352865894824e-02, + 5.0969534581863707268992645e-02, + 2.6913947602193238458458594e-02, + 1.8088493239630770262045333e-02, + -9.0865245631588989681559268e-02, +} +var log2 = []float64{ + 2.3158594707062190618898251e+00, + 2.9521233862883917703341018e+00, + -1.8526669502700329984917062e+00, + 2.3249844127278861543568029e+00, + 3.268478366538305087466309e+00, + 1.5491157592596970278166492e+00, + 2.3865580889631732407886495e+00, + 1.447811865817085365540347e+00, + 8.6813999540425116282815557e-01, + 3.118679457227342224364709e+00, +} +var modf = [][2]float64{ + {4.0000000000000000e+00, 9.7901192488367350108546816e-01}, + {7.0000000000000000e+00, 7.3887247457810456552351752e-01}, + {Copysign(0, -1), -2.7688005719200159404635997e-01}, + {-5.0000000000000000e+00, -1.060361827107492160848778e-02}, + {9.0000000000000000e+00, 6.3629370719841737980004837e-01}, + {2.0000000000000000e+00, 9.2637723924396464525443662e-01}, + {5.0000000000000000e+00, 2.2908343145930665230025625e-01}, + {2.0000000000000000e+00, 7.2793991043601025126008608e-01}, + {1.0000000000000000e+00, 8.2530809168085506044576505e-01}, + {-8.0000000000000000e+00, -6.8592476857560136238589621e-01}, +} +var nextafter32 = []float32{ + 4.979012489318848e+00, + 7.738873004913330e+00, + -2.768800258636475e-01, + -5.010602951049805e+00, + 9.636294364929199e+00, + 2.926377534866333e+00, + 5.229084014892578e+00, + 2.727940082550049e+00, + 1.825308203697205e+00, + -8.685923576354980e+00, +} +var nextafter64 = []float64{ + 4.97901192488367438926388786e+00, + 7.73887247457810545370193722e+00, + -2.7688005719200153853520874e-01, + -5.01060361827107403343006808e+00, + 9.63629370719841915615688777e+00, + 2.92637723924396508934364647e+00, + 5.22908343145930754047867595e+00, + 2.72793991043601069534929593e+00, + 1.82530809168085528249036997e+00, + -8.68592476857559958602905681e+00, +} +var pow = []float64{ + 9.5282232631648411840742957e+04, + 5.4811599352999901232411871e+07, + 5.2859121715894396531132279e-01, + 9.7587991957286474464259698e-06, + 4.328064329346044846740467e+09, + 8.4406761805034547437659092e+02, + 1.6946633276191194947742146e+05, + 5.3449040147551939075312879e+02, + 6.688182138451414936380374e+01, + 2.0609869004248742886827439e-09, +} +var remainder = []float64{ + 4.197615023265299782906368e-02, + 2.261127525421895434476482e+00, + 3.231794108794261433104108e-02, + -2.120723654214984321697556e-02, + 3.637062928015826201999516e-01, + 1.220868282268106064236690e+00, + -4.581668629186133046005125e-01, + -9.117596417440410050403443e-01, + 8.734595415957246977711748e-01, + 1.314075231424398637614104e+00, +} +var round = []float64{ + 5, + 8, + Copysign(0, -1), + -5, + 10, + 3, + 5, + 3, + 2, + -9, +} +var signbit = []bool{ + false, + false, + true, + true, + false, + false, + false, + false, + false, + true, +} +var sin = []float64{ + -9.6466616586009283766724726e-01, + 9.9338225271646545763467022e-01, + -2.7335587039794393342449301e-01, + 9.5586257685042792878173752e-01, + -2.099421066779969164496634e-01, + 2.135578780799860532750616e-01, + -8.694568971167362743327708e-01, + 4.019566681155577786649878e-01, + 9.6778633541687993721617774e-01, + -6.734405869050344734943028e-01, +} + +// Results for 100000 * Pi + vf[i] +var sinLarge = []float64{ + -9.646661658548936063912e-01, + 9.933822527198506903752e-01, + -2.7335587036246899796e-01, + 9.55862576853689321268e-01, + -2.099421066862688873691e-01, + 2.13557878070308981163e-01, + -8.694568970959221300497e-01, + 4.01956668098863248917e-01, + 9.67786335404528727927e-01, + -6.7344058693131973066e-01, +} +var sinh = []float64{ + 7.2661916084208532301448439e+01, + 1.1479409110035194500526446e+03, + -2.8043136512812518927312641e-01, + -7.499429091181587232835164e+01, + 7.6552466042906758523925934e+03, + 9.3031583421672014313789064e+00, + 9.330815755828109072810322e+01, + 7.6179893137269146407361477e+00, + 3.021769180549615819524392e+00, + -2.95950575724449499189888e+03, +} +var sqrt = []float64{ + 2.2313699659365484748756904e+00, + 2.7818829009464263511285458e+00, + 5.2619393496314796848143251e-01, + 2.2384377628763938724244104e+00, + 3.1042380236055381099288487e+00, + 1.7106657298385224403917771e+00, + 2.286718922705479046148059e+00, + 1.6516476350711159636222979e+00, + 1.3510396336454586262419247e+00, + 2.9471892997524949215723329e+00, +} +var tan = []float64{ + -3.661316565040227801781974e+00, + 8.64900232648597589369854e+00, + -2.8417941955033612725238097e-01, + 3.253290185974728640827156e+00, + 2.147275640380293804770778e-01, + -2.18600910711067004921551e-01, + -1.760002817872367935518928e+00, + -4.389808914752818126249079e-01, + -3.843885560201130679995041e+00, + 9.10988793377685105753416e-01, +} + +// Results for 100000 * Pi + vf[i] +var tanLarge = []float64{ + -3.66131656475596512705e+00, + 8.6490023287202547927e+00, + -2.841794195104782406e-01, + 3.2532901861033120983e+00, + 2.14727564046880001365e-01, + -2.18600910700688062874e-01, + -1.760002817699722747043e+00, + -4.38980891453536115952e-01, + -3.84388555942723509071e+00, + 9.1098879344275101051e-01, +} +var tanh = []float64{ + 9.9990531206936338549262119e-01, + 9.9999962057085294197613294e-01, + -2.7001505097318677233756845e-01, + -9.9991110943061718603541401e-01, + 9.9999999146798465745022007e-01, + 9.9427249436125236705001048e-01, + 9.9994257600983138572705076e-01, + 9.9149409509772875982054701e-01, + 9.4936501296239685514466577e-01, + -9.9999994291374030946055701e-01, +} +var trunc = []float64{ + 4.0000000000000000e+00, + 7.0000000000000000e+00, + Copysign(0, -1), + -5.0000000000000000e+00, + 9.0000000000000000e+00, + 2.0000000000000000e+00, + 5.0000000000000000e+00, + 2.0000000000000000e+00, + 1.0000000000000000e+00, + -8.0000000000000000e+00, +} +var y0 = []float64{ + -3.053399153780788357534855e-01, + 1.7437227649515231515503649e-01, + -8.6221781263678836910392572e-01, + -3.100664880987498407872839e-01, + 1.422200649300982280645377e-01, + 4.000004067997901144239363e-01, + -3.3340749753099352392332536e-01, + 4.5399790746668954555205502e-01, + 4.8290004112497761007536522e-01, + 2.7036697826604756229601611e-01, +} +var y1 = []float64{ + 0.15494213737457922210218611, + -0.2165955142081145245075746, + -2.4644949631241895201032829, + 0.1442740489541836405154505, + 0.2215379960518984777080163, + 0.3038800915160754150565448, + 0.0691107642452362383808547, + 0.2380116417809914424860165, + -0.20849492979459761009678934, + 0.0242503179793232308250804, +} +var y2 = []float64{ + 0.3675780219390303613394936, + -0.23034826393250119879267257, + -16.939677983817727205631397, + 0.367653980523052152867791, + -0.0962401471767804440353136, + -0.1923169356184851105200523, + 0.35984072054267882391843766, + -0.2794987252299739821654982, + -0.7113490692587462579757954, + -0.2647831587821263302087457, +} +var yM3 = []float64{ + -0.14035984421094849100895341, + -0.097535139617792072703973, + 242.25775994555580176377379, + -0.1492267014802818619511046, + 0.26148702629155918694500469, + 0.56675383593895176530394248, + -0.206150264009006981070575, + 0.64784284687568332737963658, + 1.3503631555901938037008443, + 0.1461869756579956803341844, +} + +// arguments and expected results for special cases +var vfacosSC = []float64{ + -Pi, + 1, + Pi, + NaN(), +} +var acosSC = []float64{ + NaN(), + 0, + NaN(), + NaN(), +} + +var vfacoshSC = []float64{ + Inf(-1), + 0.5, + 1, + Inf(1), + NaN(), +} +var acoshSC = []float64{ + NaN(), + NaN(), + 0, + Inf(1), + NaN(), +} + +var vfasinSC = []float64{ + -Pi, + Copysign(0, -1), + 0, + Pi, + NaN(), +} +var asinSC = []float64{ + NaN(), + Copysign(0, -1), + 0, + NaN(), + NaN(), +} + +var vfasinhSC = []float64{ + Inf(-1), + Copysign(0, -1), + 0, + Inf(1), + NaN(), +} +var asinhSC = []float64{ + Inf(-1), + Copysign(0, -1), + 0, + Inf(1), + NaN(), +} + +var vfatanSC = []float64{ + Inf(-1), + Copysign(0, -1), + 0, + Inf(1), + NaN(), +} +var atanSC = []float64{ + -Pi / 2, + Copysign(0, -1), + 0, + Pi / 2, + NaN(), +} + +var vfatanhSC = []float64{ + Inf(-1), + -Pi, + -1, + Copysign(0, -1), + 0, + 1, + Pi, + Inf(1), + NaN(), +} +var atanhSC = []float64{ + NaN(), + NaN(), + Inf(-1), + Copysign(0, -1), + 0, + Inf(1), + NaN(), + NaN(), + NaN(), +} +var vfatan2SC = [][2]float64{ + {Inf(-1), Inf(-1)}, + {Inf(-1), -Pi}, + {Inf(-1), 0}, + {Inf(-1), +Pi}, + {Inf(-1), Inf(1)}, + {Inf(-1), NaN()}, + {-Pi, Inf(-1)}, + {-Pi, 0}, + {-Pi, Inf(1)}, + {-Pi, NaN()}, + {Copysign(0, -1), Inf(-1)}, + {Copysign(0, -1), -Pi}, + {Copysign(0, -1), Copysign(0, -1)}, + {Copysign(0, -1), 0}, + {Copysign(0, -1), +Pi}, + {Copysign(0, -1), Inf(1)}, + {Copysign(0, -1), NaN()}, + {0, Inf(-1)}, + {0, -Pi}, + {0, Copysign(0, -1)}, + {0, 0}, + {0, +Pi}, + {0, Inf(1)}, + {0, NaN()}, + {+Pi, Inf(-1)}, + {+Pi, 0}, + {+Pi, Inf(1)}, + {1.0, Inf(1)}, + {-1.0, Inf(1)}, + {+Pi, NaN()}, + {Inf(1), Inf(-1)}, + {Inf(1), -Pi}, + {Inf(1), 0}, + {Inf(1), +Pi}, + {Inf(1), Inf(1)}, + {Inf(1), NaN()}, + {NaN(), NaN()}, +} +var atan2SC = []float64{ + -3 * Pi / 4, // atan2(-Inf, -Inf) + -Pi / 2, // atan2(-Inf, -Pi) + -Pi / 2, // atan2(-Inf, +0) + -Pi / 2, // atan2(-Inf, +Pi) + -Pi / 4, // atan2(-Inf, +Inf) + NaN(), // atan2(-Inf, NaN) + -Pi, // atan2(-Pi, -Inf) + -Pi / 2, // atan2(-Pi, +0) + Copysign(0, -1), // atan2(-Pi, Inf) + NaN(), // atan2(-Pi, NaN) + -Pi, // atan2(-0, -Inf) + -Pi, // atan2(-0, -Pi) + -Pi, // atan2(-0, -0) + Copysign(0, -1), // atan2(-0, +0) + Copysign(0, -1), // atan2(-0, +Pi) + Copysign(0, -1), // atan2(-0, +Inf) + NaN(), // atan2(-0, NaN) + Pi, // atan2(+0, -Inf) + Pi, // atan2(+0, -Pi) + Pi, // atan2(+0, -0) + 0, // atan2(+0, +0) + 0, // atan2(+0, +Pi) + 0, // atan2(+0, +Inf) + NaN(), // atan2(+0, NaN) + Pi, // atan2(+Pi, -Inf) + Pi / 2, // atan2(+Pi, +0) + 0, // atan2(+Pi, +Inf) + 0, // atan2(+1, +Inf) + Copysign(0, -1), // atan2(-1, +Inf) + NaN(), // atan2(+Pi, NaN) + 3 * Pi / 4, // atan2(+Inf, -Inf) + Pi / 2, // atan2(+Inf, -Pi) + Pi / 2, // atan2(+Inf, +0) + Pi / 2, // atan2(+Inf, +Pi) + Pi / 4, // atan2(+Inf, +Inf) + NaN(), // atan2(+Inf, NaN) + NaN(), // atan2(NaN, NaN) +} + +var vfcbrtSC = []float64{ + Inf(-1), + Copysign(0, -1), + 0, + Inf(1), + NaN(), +} +var cbrtSC = []float64{ + Inf(-1), + Copysign(0, -1), + 0, + Inf(1), + NaN(), +} + +var vfceilSC = []float64{ + Inf(-1), + Copysign(0, -1), + 0, + Inf(1), + NaN(), +} +var ceilSC = []float64{ + Inf(-1), + Copysign(0, -1), + 0, + Inf(1), + NaN(), +} + +var vfcopysignSC = []float64{ + Inf(-1), + Inf(1), + NaN(), +} +var copysignSC = []float64{ + Inf(-1), + Inf(-1), + NaN(), +} + +var vfcosSC = []float64{ + Inf(-1), + Inf(1), + NaN(), +} +var cosSC = []float64{ + NaN(), + NaN(), + NaN(), +} + +var vfcoshSC = []float64{ + Inf(-1), + Copysign(0, -1), + 0, + Inf(1), + NaN(), +} +var coshSC = []float64{ + Inf(1), + 1, + 1, + Inf(1), + NaN(), +} + +var vferfSC = []float64{ + Inf(-1), + Copysign(0, -1), + 0, + Inf(1), + NaN(), + -1000, + 1000, +} +var erfSC = []float64{ + -1, + Copysign(0, -1), + 0, + 1, + NaN(), + -1, + 1, +} + +var vferfcSC = []float64{ + Inf(-1), + Inf(1), + NaN(), + -1000, + 1000, +} +var erfcSC = []float64{ + 2, + 0, + NaN(), + 2, + 0, +} + +var vferfinvSC = []float64{ + 1, + -1, + 0, + Inf(-1), + Inf(1), + NaN(), +} +var erfinvSC = []float64{ + Inf(+1), + Inf(-1), + 0, + NaN(), + NaN(), + NaN(), +} + +var vferfcinvSC = []float64{ + 0, + 2, + 1, + Inf(1), + Inf(-1), + NaN(), +} +var erfcinvSC = []float64{ + Inf(+1), + Inf(-1), + 0, + NaN(), + NaN(), + NaN(), +} + +var vfexpSC = []float64{ + Inf(-1), + -2000, + 2000, + Inf(1), + NaN(), + // smallest float64 that overflows Exp(x) + 7.097827128933841e+02, + // Issue 18912 + 1.48852223e+09, + 1.4885222e+09, + 1, + // near zero + 3.725290298461915e-09, + // denormal + -740, +} +var expSC = []float64{ + 0, + 0, + Inf(1), + Inf(1), + NaN(), + Inf(1), + Inf(1), + Inf(1), + 2.718281828459045, + 1.0000000037252903, + 4.2e-322, +} + +var vfexp2SC = []float64{ + Inf(-1), + -2000, + 2000, + Inf(1), + NaN(), + // smallest float64 that overflows Exp2(x) + 1024, + // near underflow + -1.07399999999999e+03, + // near zero + 3.725290298461915e-09, +} +var exp2SC = []float64{ + 0, + 0, + Inf(1), + Inf(1), + NaN(), + Inf(1), + 5e-324, + 1.0000000025821745, +} + +var vfexpm1SC = []float64{ + Inf(-1), + -710, + Copysign(0, -1), + 0, + 710, + Inf(1), + NaN(), +} +var expm1SC = []float64{ + -1, + -1, + Copysign(0, -1), + 0, + Inf(1), + Inf(1), + NaN(), +} + +var vffabsSC = []float64{ + Inf(-1), + Copysign(0, -1), + 0, + Inf(1), + NaN(), +} +var fabsSC = []float64{ + Inf(1), + 0, + 0, + Inf(1), + NaN(), +} + +var vffdimSC = [][2]float64{ + {Inf(-1), Inf(-1)}, + {Inf(-1), Inf(1)}, + {Inf(-1), NaN()}, + {Copysign(0, -1), Copysign(0, -1)}, + {Copysign(0, -1), 0}, + {0, Copysign(0, -1)}, + {0, 0}, + {Inf(1), Inf(-1)}, + {Inf(1), Inf(1)}, + {Inf(1), NaN()}, + {NaN(), Inf(-1)}, + {NaN(), Copysign(0, -1)}, + {NaN(), 0}, + {NaN(), Inf(1)}, + {NaN(), NaN()}, +} +var nan = Float64frombits(0xFFF8000000000000) // SSE2 DIVSD 0/0 +var vffdim2SC = [][2]float64{ + {Inf(-1), Inf(-1)}, + {Inf(-1), Inf(1)}, + {Inf(-1), nan}, + {Copysign(0, -1), Copysign(0, -1)}, + {Copysign(0, -1), 0}, + {0, Copysign(0, -1)}, + {0, 0}, + {Inf(1), Inf(-1)}, + {Inf(1), Inf(1)}, + {Inf(1), nan}, + {nan, Inf(-1)}, + {nan, Copysign(0, -1)}, + {nan, 0}, + {nan, Inf(1)}, + {nan, nan}, +} +var fdimSC = []float64{ + NaN(), + 0, + NaN(), + 0, + 0, + 0, + 0, + Inf(1), + NaN(), + NaN(), + NaN(), + NaN(), + NaN(), + NaN(), + NaN(), +} +var fmaxSC = []float64{ + Inf(-1), + Inf(1), + NaN(), + Copysign(0, -1), + 0, + 0, + 0, + Inf(1), + Inf(1), + Inf(1), + NaN(), + NaN(), + NaN(), + Inf(1), + NaN(), +} +var fminSC = []float64{ + Inf(-1), + Inf(-1), + Inf(-1), + Copysign(0, -1), + Copysign(0, -1), + Copysign(0, -1), + 0, + Inf(-1), + Inf(1), + NaN(), + Inf(-1), + NaN(), + NaN(), + NaN(), + NaN(), +} + +var vffmodSC = [][2]float64{ + {Inf(-1), Inf(-1)}, + {Inf(-1), -Pi}, + {Inf(-1), 0}, + {Inf(-1), Pi}, + {Inf(-1), Inf(1)}, + {Inf(-1), NaN()}, + {-Pi, Inf(-1)}, + {-Pi, 0}, + {-Pi, Inf(1)}, + {-Pi, NaN()}, + {Copysign(0, -1), Inf(-1)}, + {Copysign(0, -1), 0}, + {Copysign(0, -1), Inf(1)}, + {Copysign(0, -1), NaN()}, + {0, Inf(-1)}, + {0, 0}, + {0, Inf(1)}, + {0, NaN()}, + {Pi, Inf(-1)}, + {Pi, 0}, + {Pi, Inf(1)}, + {Pi, NaN()}, + {Inf(1), Inf(-1)}, + {Inf(1), -Pi}, + {Inf(1), 0}, + {Inf(1), Pi}, + {Inf(1), Inf(1)}, + {Inf(1), NaN()}, + {NaN(), Inf(-1)}, + {NaN(), -Pi}, + {NaN(), 0}, + {NaN(), Pi}, + {NaN(), Inf(1)}, + {NaN(), NaN()}, +} +var fmodSC = []float64{ + NaN(), // fmod(-Inf, -Inf) + NaN(), // fmod(-Inf, -Pi) + NaN(), // fmod(-Inf, 0) + NaN(), // fmod(-Inf, Pi) + NaN(), // fmod(-Inf, +Inf) + NaN(), // fmod(-Inf, NaN) + -Pi, // fmod(-Pi, -Inf) + NaN(), // fmod(-Pi, 0) + -Pi, // fmod(-Pi, +Inf) + NaN(), // fmod(-Pi, NaN) + Copysign(0, -1), // fmod(-0, -Inf) + NaN(), // fmod(-0, 0) + Copysign(0, -1), // fmod(-0, Inf) + NaN(), // fmod(-0, NaN) + 0, // fmod(0, -Inf) + NaN(), // fmod(0, 0) + 0, // fmod(0, +Inf) + NaN(), // fmod(0, NaN) + Pi, // fmod(Pi, -Inf) + NaN(), // fmod(Pi, 0) + Pi, // fmod(Pi, +Inf) + NaN(), // fmod(Pi, NaN) + NaN(), // fmod(+Inf, -Inf) + NaN(), // fmod(+Inf, -Pi) + NaN(), // fmod(+Inf, 0) + NaN(), // fmod(+Inf, Pi) + NaN(), // fmod(+Inf, +Inf) + NaN(), // fmod(+Inf, NaN) + NaN(), // fmod(NaN, -Inf) + NaN(), // fmod(NaN, -Pi) + NaN(), // fmod(NaN, 0) + NaN(), // fmod(NaN, Pi) + NaN(), // fmod(NaN, +Inf) + NaN(), // fmod(NaN, NaN) +} + +var vffrexpSC = []float64{ + Inf(-1), + Copysign(0, -1), + 0, + Inf(1), + NaN(), +} +var frexpSC = []fi{ + {Inf(-1), 0}, + {Copysign(0, -1), 0}, + {0, 0}, + {Inf(1), 0}, + {NaN(), 0}, +} + +var vfgamma = [][2]float64{ + {Inf(1), Inf(1)}, + {Inf(-1), NaN()}, + {0, Inf(1)}, + {Copysign(0, -1), Inf(-1)}, + {NaN(), NaN()}, + {-1, NaN()}, + {-2, NaN()}, + {-3, NaN()}, + {-1e16, NaN()}, + {-1e300, NaN()}, + {1.7e308, Inf(1)}, + + // Test inputs inspired by Python test suite. + // Outputs computed at high precision by PARI/GP. + // If recomputing table entries, be careful to use + // high-precision (%.1000g) formatting of the float64 inputs. + // For example, -2.0000000000000004 is the float64 with exact value + // -2.00000000000000044408920985626161695, and + // gamma(-2.0000000000000004) = -1249999999999999.5386078562728167651513, while + // gamma(-2.00000000000000044408920985626161695) = -1125899906826907.2044875028130093136826. + // Thus the table lists -1.1258999068426235e+15 as the answer. + {0.5, 1.772453850905516}, + {1.5, 0.886226925452758}, + {2.5, 1.329340388179137}, + {3.5, 3.3233509704478426}, + {-0.5, -3.544907701811032}, + {-1.5, 2.363271801207355}, + {-2.5, -0.9453087204829419}, + {-3.5, 0.2700882058522691}, + {0.1, 9.51350769866873}, + {0.01, 99.4325851191506}, + {1e-08, 9.999999942278434e+07}, + {1e-16, 1e+16}, + {0.001, 999.4237724845955}, + {1e-16, 1e+16}, + {1e-308, 1e+308}, + {5.6e-309, 1.7857142857142864e+308}, + {5.5e-309, Inf(1)}, + {1e-309, Inf(1)}, + {1e-323, Inf(1)}, + {5e-324, Inf(1)}, + {-0.1, -10.686287021193193}, + {-0.01, -100.58719796441078}, + {-1e-08, -1.0000000057721567e+08}, + {-1e-16, -1e+16}, + {-0.001, -1000.5782056293586}, + {-1e-16, -1e+16}, + {-1e-308, -1e+308}, + {-5.6e-309, -1.7857142857142864e+308}, + {-5.5e-309, Inf(-1)}, + {-1e-309, Inf(-1)}, + {-1e-323, Inf(-1)}, + {-5e-324, Inf(-1)}, + {-0.9999999999999999, -9.007199254740992e+15}, + {-1.0000000000000002, 4.5035996273704955e+15}, + {-1.9999999999999998, 2.2517998136852485e+15}, + {-2.0000000000000004, -1.1258999068426235e+15}, + {-100.00000000000001, -7.540083334883109e-145}, + {-99.99999999999999, 7.540083334884096e-145}, + {17, 2.0922789888e+13}, + {171, 7.257415615307999e+306}, + {171.6, 1.5858969096672565e+308}, + {171.624, 1.7942117599248104e+308}, + {171.625, Inf(1)}, + {172, Inf(1)}, + {2000, Inf(1)}, + {-100.5, -3.3536908198076787e-159}, + {-160.5, -5.255546447007829e-286}, + {-170.5, -3.3127395215386074e-308}, + {-171.5, 1.9316265431712e-310}, + {-176.5, -1.196e-321}, + {-177.5, 5e-324}, + {-178.5, Copysign(0, -1)}, + {-179.5, 0}, + {-201.0001, 0}, + {-202.9999, Copysign(0, -1)}, + {-1000.5, Copysign(0, -1)}, + {-1.0000000003e+09, Copysign(0, -1)}, + {-4.5035996273704955e+15, 0}, + {-63.349078729022985, 4.177797167776188e-88}, + {-127.45117632943295, 1.183111089623681e-214}, +} + +var vfhypotSC = [][2]float64{ + {Inf(-1), Inf(-1)}, + {Inf(-1), 0}, + {Inf(-1), Inf(1)}, + {Inf(-1), NaN()}, + {Copysign(0, -1), Copysign(0, -1)}, + {Copysign(0, -1), 0}, + {0, Copysign(0, -1)}, + {0, 0}, // +0, +0 + {0, Inf(-1)}, + {0, Inf(1)}, + {0, NaN()}, + {Inf(1), Inf(-1)}, + {Inf(1), 0}, + {Inf(1), Inf(1)}, + {Inf(1), NaN()}, + {NaN(), Inf(-1)}, + {NaN(), 0}, + {NaN(), Inf(1)}, + {NaN(), NaN()}, +} +var hypotSC = []float64{ + Inf(1), + Inf(1), + Inf(1), + Inf(1), + 0, + 0, + 0, + 0, + Inf(1), + Inf(1), + NaN(), + Inf(1), + Inf(1), + Inf(1), + Inf(1), + Inf(1), + NaN(), + Inf(1), + NaN(), +} + +var ilogbSC = []int{ + MaxInt32, + MinInt32, + MaxInt32, + MaxInt32, +} + +var vfj0SC = []float64{ + Inf(-1), + 0, + Inf(1), + NaN(), +} +var j0SC = []float64{ + 0, + 1, + 0, + NaN(), +} +var j1SC = []float64{ + 0, + 0, + 0, + NaN(), +} +var j2SC = []float64{ + 0, + 0, + 0, + NaN(), +} +var jM3SC = []float64{ + 0, + 0, + 0, + NaN(), +} + +var vfldexpSC = []fi{ + {0, 0}, + {0, -1075}, + {0, 1024}, + {Copysign(0, -1), 0}, + {Copysign(0, -1), -1075}, + {Copysign(0, -1), 1024}, + {Inf(1), 0}, + {Inf(1), -1024}, + {Inf(-1), 0}, + {Inf(-1), -1024}, + {NaN(), -1024}, + {10, int(1) << (uint64(unsafe.Sizeof(0)-1) * 8)}, + {10, -(int(1) << (uint64(unsafe.Sizeof(0)-1) * 8))}, +} +var ldexpSC = []float64{ + 0, + 0, + 0, + Copysign(0, -1), + Copysign(0, -1), + Copysign(0, -1), + Inf(1), + Inf(1), + Inf(-1), + Inf(-1), + NaN(), + Inf(1), + 0, +} + +var vflgammaSC = []float64{ + Inf(-1), + -3, + 0, + 1, + 2, + Inf(1), + NaN(), +} +var lgammaSC = []fi{ + {Inf(-1), 1}, + {Inf(1), 1}, + {Inf(1), 1}, + {0, 1}, + {0, 1}, + {Inf(1), 1}, + {NaN(), 1}, +} + +var vflogSC = []float64{ + Inf(-1), + -Pi, + Copysign(0, -1), + 0, + 1, + Inf(1), + NaN(), +} +var logSC = []float64{ + NaN(), + NaN(), + Inf(-1), + Inf(-1), + 0, + Inf(1), + NaN(), +} + +var vflogbSC = []float64{ + Inf(-1), + 0, + Inf(1), + NaN(), +} +var logbSC = []float64{ + Inf(1), + Inf(-1), + Inf(1), + NaN(), +} + +var vflog1pSC = []float64{ + Inf(-1), + -Pi, + -1, + Copysign(0, -1), + 0, + Inf(1), + NaN(), + 4503599627370496.5, // Issue #29488 +} +var log1pSC = []float64{ + NaN(), + NaN(), + Inf(-1), + Copysign(0, -1), + 0, + Inf(1), + NaN(), + 36.04365338911715, // Issue #29488 +} + +var vfmodfSC = []float64{ + Inf(-1), + Copysign(0, -1), + Inf(1), + NaN(), +} +var modfSC = [][2]float64{ + {Inf(-1), NaN()}, // [2]float64{Copysign(0, -1), Inf(-1)}, + {Copysign(0, -1), Copysign(0, -1)}, + {Inf(1), NaN()}, // [2]float64{0, Inf(1)}, + {NaN(), NaN()}, +} + +var vfnextafter32SC = [][2]float32{ + {0, 0}, + {0, float32(Copysign(0, -1))}, + {0, -1}, + {0, float32(NaN())}, + {float32(Copysign(0, -1)), 1}, + {float32(Copysign(0, -1)), 0}, + {float32(Copysign(0, -1)), float32(Copysign(0, -1))}, + {float32(Copysign(0, -1)), -1}, + {float32(NaN()), 0}, + {float32(NaN()), float32(NaN())}, +} +var nextafter32SC = []float32{ + 0, + 0, + -1.401298464e-45, // Float32frombits(0x80000001) + float32(NaN()), + 1.401298464e-45, // Float32frombits(0x00000001) + float32(Copysign(0, -1)), + float32(Copysign(0, -1)), + -1.401298464e-45, // Float32frombits(0x80000001) + float32(NaN()), + float32(NaN()), +} + +var vfnextafter64SC = [][2]float64{ + {0, 0}, + {0, Copysign(0, -1)}, + {0, -1}, + {0, NaN()}, + {Copysign(0, -1), 1}, + {Copysign(0, -1), 0}, + {Copysign(0, -1), Copysign(0, -1)}, + {Copysign(0, -1), -1}, + {NaN(), 0}, + {NaN(), NaN()}, +} +var nextafter64SC = []float64{ + 0, + 0, + -4.9406564584124654418e-324, // Float64frombits(0x8000000000000001) + NaN(), + 4.9406564584124654418e-324, // Float64frombits(0x0000000000000001) + Copysign(0, -1), + Copysign(0, -1), + -4.9406564584124654418e-324, // Float64frombits(0x8000000000000001) + NaN(), + NaN(), +} + +var vfpowSC = [][2]float64{ + {Inf(-1), -Pi}, + {Inf(-1), -3}, + {Inf(-1), Copysign(0, -1)}, + {Inf(-1), 0}, + {Inf(-1), 1}, + {Inf(-1), 3}, + {Inf(-1), Pi}, + {Inf(-1), 0.5}, + {Inf(-1), NaN()}, + + {-Pi, Inf(-1)}, + {-Pi, -Pi}, + {-Pi, Copysign(0, -1)}, + {-Pi, 0}, + {-Pi, 1}, + {-Pi, Pi}, + {-Pi, Inf(1)}, + {-Pi, NaN()}, + + {-1, Inf(-1)}, + {-1, Inf(1)}, + {-1, NaN()}, + {-1 / 2, Inf(-1)}, + {-1 / 2, Inf(1)}, + {Copysign(0, -1), Inf(-1)}, + {Copysign(0, -1), -Pi}, + {Copysign(0, -1), -0.5}, + {Copysign(0, -1), -3}, + {Copysign(0, -1), 3}, + {Copysign(0, -1), Pi}, + {Copysign(0, -1), 0.5}, + {Copysign(0, -1), Inf(1)}, + + {0, Inf(-1)}, + {0, -Pi}, + {0, -3}, + {0, Copysign(0, -1)}, + {0, 0}, + {0, 3}, + {0, Pi}, + {0, Inf(1)}, + {0, NaN()}, + + {1 / 2, Inf(-1)}, + {1 / 2, Inf(1)}, + {1, Inf(-1)}, + {1, Inf(1)}, + {1, NaN()}, + + {Pi, Inf(-1)}, + {Pi, Copysign(0, -1)}, + {Pi, 0}, + {Pi, 1}, + {Pi, Inf(1)}, + {Pi, NaN()}, + {Inf(1), -Pi}, + {Inf(1), Copysign(0, -1)}, + {Inf(1), 0}, + {Inf(1), 1}, + {Inf(1), Pi}, + {Inf(1), NaN()}, + {NaN(), -Pi}, + {NaN(), Copysign(0, -1)}, + {NaN(), 0}, + {NaN(), 1}, + {NaN(), Pi}, + {NaN(), NaN()}, + + // Issue #7394 overflow checks + {2, float64(1 << 32)}, + {2, -float64(1 << 32)}, + {-2, float64(1<<32 + 1)}, + {1 / 2, float64(1 << 45)}, + {1 / 2, -float64(1 << 45)}, + {Nextafter(1, 2), float64(1 << 63)}, + {Nextafter(1, -2), float64(1 << 63)}, + {Nextafter(-1, 2), float64(1 << 63)}, + {Nextafter(-1, -2), float64(1 << 63)}, +} +var powSC = []float64{ + 0, // pow(-Inf, -Pi) + Copysign(0, -1), // pow(-Inf, -3) + 1, // pow(-Inf, -0) + 1, // pow(-Inf, +0) + Inf(-1), // pow(-Inf, 1) + Inf(-1), // pow(-Inf, 3) + Inf(1), // pow(-Inf, Pi) + Inf(1), // pow(-Inf, 0.5) + NaN(), // pow(-Inf, NaN) + 0, // pow(-Pi, -Inf) + NaN(), // pow(-Pi, -Pi) + 1, // pow(-Pi, -0) + 1, // pow(-Pi, +0) + -Pi, // pow(-Pi, 1) + NaN(), // pow(-Pi, Pi) + Inf(1), // pow(-Pi, +Inf) + NaN(), // pow(-Pi, NaN) + 1, // pow(-1, -Inf) IEEE 754-2008 + 1, // pow(-1, +Inf) IEEE 754-2008 + NaN(), // pow(-1, NaN) + Inf(1), // pow(-1/2, -Inf) + 0, // pow(-1/2, +Inf) + Inf(1), // pow(-0, -Inf) + Inf(1), // pow(-0, -Pi) + Inf(1), // pow(-0, -0.5) + Inf(-1), // pow(-0, -3) IEEE 754-2008 + Copysign(0, -1), // pow(-0, 3) IEEE 754-2008 + 0, // pow(-0, +Pi) + 0, // pow(-0, 0.5) + 0, // pow(-0, +Inf) + Inf(1), // pow(+0, -Inf) + Inf(1), // pow(+0, -Pi) + Inf(1), // pow(+0, -3) + 1, // pow(+0, -0) + 1, // pow(+0, +0) + 0, // pow(+0, 3) + 0, // pow(+0, +Pi) + 0, // pow(+0, +Inf) + NaN(), // pow(+0, NaN) + Inf(1), // pow(1/2, -Inf) + 0, // pow(1/2, +Inf) + 1, // pow(1, -Inf) IEEE 754-2008 + 1, // pow(1, +Inf) IEEE 754-2008 + 1, // pow(1, NaN) IEEE 754-2008 + 0, // pow(+Pi, -Inf) + 1, // pow(+Pi, -0) + 1, // pow(+Pi, +0) + Pi, // pow(+Pi, 1) + Inf(1), // pow(+Pi, +Inf) + NaN(), // pow(+Pi, NaN) + 0, // pow(+Inf, -Pi) + 1, // pow(+Inf, -0) + 1, // pow(+Inf, +0) + Inf(1), // pow(+Inf, 1) + Inf(1), // pow(+Inf, Pi) + NaN(), // pow(+Inf, NaN) + NaN(), // pow(NaN, -Pi) + 1, // pow(NaN, -0) + 1, // pow(NaN, +0) + NaN(), // pow(NaN, 1) + NaN(), // pow(NaN, +Pi) + NaN(), // pow(NaN, NaN) + + // Issue #7394 overflow checks + Inf(1), // pow(2, float64(1 << 32)) + 0, // pow(2, -float64(1 << 32)) + Inf(-1), // pow(-2, float64(1<<32 + 1)) + 0, // pow(1/2, float64(1 << 45)) + Inf(1), // pow(1/2, -float64(1 << 45)) + Inf(1), // pow(Nextafter(1, 2), float64(1 << 63)) + 0, // pow(Nextafter(1, -2), float64(1 << 63)) + 0, // pow(Nextafter(-1, 2), float64(1 << 63)) + Inf(1), // pow(Nextafter(-1, -2), float64(1 << 63)) +} + +var vfpow10SC = []int{ + MinInt32, + -324, + -323, + -50, + -22, + -1, + 0, + 1, + 22, + 50, + 100, + 200, + 308, + 309, + MaxInt32, +} + +var pow10SC = []float64{ + 0, // pow10(MinInt32) + 0, // pow10(-324) + 1.0e-323, // pow10(-323) + 1.0e-50, // pow10(-50) + 1.0e-22, // pow10(-22) + 1.0e-1, // pow10(-1) + 1.0e0, // pow10(0) + 1.0e1, // pow10(1) + 1.0e22, // pow10(22) + 1.0e50, // pow10(50) + 1.0e100, // pow10(100) + 1.0e200, // pow10(200) + 1.0e308, // pow10(308) + Inf(1), // pow10(309) + Inf(1), // pow10(MaxInt32) +} + +var vfroundSC = [][2]float64{ + {0, 0}, + {1.390671161567e-309, 0}, // denormal + {0.49999999999999994, 0}, // 0.5-epsilon + {0.5, 1}, + {0.5000000000000001, 1}, // 0.5+epsilon + {-1.5, -2}, + {-2.5, -3}, + {NaN(), NaN()}, + {Inf(1), Inf(1)}, + {2251799813685249.5, 2251799813685250}, // 1 bit fraction + {2251799813685250.5, 2251799813685251}, + {4503599627370495.5, 4503599627370496}, // 1 bit fraction, rounding to 0 bit fraction + {4503599627370497, 4503599627370497}, // large integer +} +var vfroundEvenSC = [][2]float64{ + {0, 0}, + {1.390671161567e-309, 0}, // denormal + {0.49999999999999994, 0}, // 0.5-epsilon + {0.5, 0}, + {0.5000000000000001, 1}, // 0.5+epsilon + {-1.5, -2}, + {-2.5, -2}, + {NaN(), NaN()}, + {Inf(1), Inf(1)}, + {2251799813685249.5, 2251799813685250}, // 1 bit fraction + {2251799813685250.5, 2251799813685250}, + {4503599627370495.5, 4503599627370496}, // 1 bit fraction, rounding to 0 bit fraction + {4503599627370497, 4503599627370497}, // large integer +} + +var vfsignbitSC = []float64{ + Inf(-1), + Copysign(0, -1), + 0, + Inf(1), + NaN(), +} +var signbitSC = []bool{ + true, + true, + false, + false, + false, +} + +var vfsinSC = []float64{ + Inf(-1), + Copysign(0, -1), + 0, + Inf(1), + NaN(), +} +var sinSC = []float64{ + NaN(), + Copysign(0, -1), + 0, + NaN(), + NaN(), +} + +var vfsinhSC = []float64{ + Inf(-1), + Copysign(0, -1), + 0, + Inf(1), + NaN(), +} +var sinhSC = []float64{ + Inf(-1), + Copysign(0, -1), + 0, + Inf(1), + NaN(), +} + +var vfsqrtSC = []float64{ + Inf(-1), + -Pi, + Copysign(0, -1), + 0, + Inf(1), + NaN(), + Float64frombits(2), // subnormal; see https://golang.org/issue/13013 +} +var sqrtSC = []float64{ + NaN(), + NaN(), + Copysign(0, -1), + 0, + Inf(1), + NaN(), + 3.1434555694052576e-162, +} + +var vftanhSC = []float64{ + Inf(-1), + Copysign(0, -1), + 0, + Inf(1), + NaN(), +} +var tanhSC = []float64{ + -1, + Copysign(0, -1), + 0, + 1, + NaN(), +} + +var vfy0SC = []float64{ + Inf(-1), + 0, + Inf(1), + NaN(), + -1, +} +var y0SC = []float64{ + NaN(), + Inf(-1), + 0, + NaN(), + NaN(), +} +var y1SC = []float64{ + NaN(), + Inf(-1), + 0, + NaN(), + NaN(), +} +var y2SC = []float64{ + NaN(), + Inf(-1), + 0, + NaN(), + NaN(), +} +var yM3SC = []float64{ + NaN(), + Inf(1), + 0, + NaN(), + NaN(), +} + +// arguments and expected results for boundary cases +const ( + SmallestNormalFloat64 = 2.2250738585072014e-308 // 2**-1022 + LargestSubnormalFloat64 = SmallestNormalFloat64 - SmallestNonzeroFloat64 +) + +var vffrexpBC = []float64{ + SmallestNormalFloat64, + LargestSubnormalFloat64, + SmallestNonzeroFloat64, + MaxFloat64, + -SmallestNormalFloat64, + -LargestSubnormalFloat64, + -SmallestNonzeroFloat64, + -MaxFloat64, +} +var frexpBC = []fi{ + {0.5, -1021}, + {0.99999999999999978, -1022}, + {0.5, -1073}, + {0.99999999999999989, 1024}, + {-0.5, -1021}, + {-0.99999999999999978, -1022}, + {-0.5, -1073}, + {-0.99999999999999989, 1024}, +} + +var vfldexpBC = []fi{ + {SmallestNormalFloat64, -52}, + {LargestSubnormalFloat64, -51}, + {SmallestNonzeroFloat64, 1074}, + {MaxFloat64, -(1023 + 1074)}, + {1, -1075}, + {-1, -1075}, + {1, 1024}, + {-1, 1024}, + {1.0000000000000002, -1075}, + {1, -1075}, +} +var ldexpBC = []float64{ + SmallestNonzeroFloat64, + 1e-323, // 2**-1073 + 1, + 1e-323, // 2**-1073 + 0, + Copysign(0, -1), + Inf(1), + Inf(-1), + SmallestNonzeroFloat64, + 0, +} + +var logbBC = []float64{ + -1022, + -1023, + -1074, + 1023, + -1022, + -1023, + -1074, + 1023, +} + +// Test cases were generated with Berkeley TestFloat-3e/testfloat_gen. +// http://www.jhauser.us/arithmetic/TestFloat.html. +// The default rounding mode is selected (nearest/even), and exception flags are ignored. +var fmaC = []struct{ x, y, z, want float64 }{ + // Large exponent spread + {-3.999999999999087, -1.1123914289620494e-16, -7.999877929687506, -7.999877929687505}, + {-262112.0000004768, -0.06251525855623184, 1.1102230248837136e-16, 16385.99945072085}, + {-6.462348523533467e-27, -2.3763644720331857e-211, 4.000000000931324, 4.000000000931324}, + + // Effective addition + {-2.0000000037252907, 6.7904383376e-313, -3.3951933161e-313, -1.697607001654e-312}, + {-0.12499999999999999, 512.007568359375, -1.4193627164960366e-16, -64.00094604492188}, + {-2.7550648847397148e-39, -3.4028301595800694e+38, 0.9960937495343386, 1.9335955376735676}, + {5.723369164769208e+24, 3.8149300927159385e-06, 1.84489958778182e+19, 4.028324913621874e+19}, + {-0.4843749999990904, -3.6893487872543293e+19, 9.223653786709391e+18, 2.7093936974938993e+19}, + {-3.8146972665201165e-06, 4.2949672959999385e+09, -2.2204460489938386e-16, -16384.000003844263}, + {6.98156394130982e-309, -1.1072962560000002e+09, -4.4414561548793455e-308, -7.73065965765153e-300}, + + // Effective subtraction + {5e-324, 4.5, -2e-323, 0}, + {5e-324, 7, -3.5e-323, 0}, + {5e-324, 0.5000000000000001, -5e-324, Copysign(0, -1)}, + {-2.1240680525e-314, -1.233647078189316e+308, -0.25781249999954525, -0.25780987964919844}, + {8.579992955364441e-308, 0.6037391876780558, -4.4501307410480706e-308, 7.29947236107098e-309}, + {-4.450143471986689e-308, -0.9960937499927239, -4.450419332475649e-308, -1.7659233458788e-310}, + {1.4932076393918112, -2.2248022430460833e-308, 4.449875571054211e-308, 1.127783865601762e-308}, + + // Overflow + {-2.288020632214759e+38, -8.98846570988901e+307, 1.7696041796300924e+308, Inf(0)}, + {1.4888652783208255e+308, -9.007199254742012e+15, -6.807282911929205e+38, Inf(-1)}, + {9.142703268902826e+192, -1.3504889569802838e+296, -1.9082200803806996e-89, Inf(-1)}, + + // Finite x and y, but non-finite z. + {31.99218749627471, -1.7976930544991702e+308, Inf(0), Inf(0)}, + {-1.7976931281784667e+308, -2.0009765625002265, Inf(-1), Inf(-1)}, + + // Special + {0, 0, 0, 0}, + {-1.1754226043408471e-38, NaN(), Inf(0), NaN()}, + {0, 0, 2.22507385643494e-308, 2.22507385643494e-308}, + {-8.65697792e+09, NaN(), -7.516192799999999e+09, NaN()}, + {-0.00012207403779029757, 3.221225471996093e+09, NaN(), NaN()}, + {Inf(-1), 0.1252441407414153, -1.387184532981584e-76, Inf(-1)}, + {Inf(0), 1.525878907671432e-05, -9.214364835452549e+18, Inf(0)}, + + // Random + {0.1777916152213626, -32.000015266239636, -2.2204459148334633e-16, -5.689334401293007}, + {-2.0816681711722314e-16, -0.4997558592585846, -0.9465627129124969, -0.9465627129124968}, + {-1.9999997615814211, 1.8518819259933516e+19, 16.874999999999996, -3.703763410463646e+19}, + {-0.12499994039717421, 32767.99999976135, -2.0752587082923246e+19, -2.075258708292325e+19}, + {7.705600568510257e-34, -1.801432979000528e+16, -0.17224197722973714, -0.17224197722973716}, + {3.8988133103758913e-308, -0.9848632812499999, 3.893879244098556e-308, 5.40811742605814e-310}, + {-0.012651981190687427, 6.911985574912436e+38, 6.669240527007144e+18, -8.745031148409496e+36}, + {4.612811918325842e+18, 1.4901161193847641e-08, 2.6077032311277997e-08, 6.873625395187494e+10}, + {-9.094947033611148e-13, 4.450691014249257e-308, 2.086006742350485e-308, 2.086006742346437e-308}, + {-7.751454006381804e-05, 5.588653777189071e-308, -2.2207280111272877e-308, -2.2211612130544025e-308}, +} + +var sqrt32 = []float32{ + 0, + float32(Copysign(0, -1)), + float32(NaN()), + float32(Inf(1)), + float32(Inf(-1)), + 1, + 2, + -2, + 4.9790119248836735e+00, + 7.7388724745781045e+00, + -2.7688005719200159e-01, + -5.0106036182710749e+00, +} + +func tolerance(a, b, e float64) bool { + // Multiplying by e here can underflow denormal values to zero. + // Check a==b so that at least if a and b are small and identical + // we say they match. + if a == b { + return true + } + d := a - b + if d < 0 { + d = -d + } + + // note: b is correct (expected) value, a is actual value. + // make error tolerance a fraction of b, not a. + if b != 0 { + e = e * b + if e < 0 { + e = -e + } + } + return d < e +} +func close(a, b float64) bool { return tolerance(a, b, 1e-14) } +func veryclose(a, b float64) bool { return tolerance(a, b, 4e-16) } +func soclose(a, b, e float64) bool { return tolerance(a, b, e) } +func alike(a, b float64) bool { + switch { + case IsNaN(a) && IsNaN(b): + return true + case a == b: + return Signbit(a) == Signbit(b) + } + return false +} + +func TestNaN(t *testing.T) { + f64 := NaN() + if f64 == f64 { + t.Fatalf("NaN() returns %g, expected NaN", f64) + } + f32 := float32(f64) + if f32 == f32 { + t.Fatalf("float32(NaN()) is %g, expected NaN", f32) + } +} + +func TestAcos(t *testing.T) { + for i := 0; i < len(vf); i++ { + a := vf[i] / 10 + if f := Acos(a); !close(acos[i], f) { + t.Errorf("Acos(%g) = %g, want %g", a, f, acos[i]) + } + } + for i := 0; i < len(vfacosSC); i++ { + if f := Acos(vfacosSC[i]); !alike(acosSC[i], f) { + t.Errorf("Acos(%g) = %g, want %g", vfacosSC[i], f, acosSC[i]) + } + } +} + +func TestAcosh(t *testing.T) { + for i := 0; i < len(vf); i++ { + a := 1 + Abs(vf[i]) + if f := Acosh(a); !veryclose(acosh[i], f) { + t.Errorf("Acosh(%g) = %g, want %g", a, f, acosh[i]) + } + } + for i := 0; i < len(vfacoshSC); i++ { + if f := Acosh(vfacoshSC[i]); !alike(acoshSC[i], f) { + t.Errorf("Acosh(%g) = %g, want %g", vfacoshSC[i], f, acoshSC[i]) + } + } +} + +func TestAsin(t *testing.T) { + for i := 0; i < len(vf); i++ { + a := vf[i] / 10 + if f := Asin(a); !veryclose(asin[i], f) { + t.Errorf("Asin(%g) = %g, want %g", a, f, asin[i]) + } + } + for i := 0; i < len(vfasinSC); i++ { + if f := Asin(vfasinSC[i]); !alike(asinSC[i], f) { + t.Errorf("Asin(%g) = %g, want %g", vfasinSC[i], f, asinSC[i]) + } + } +} + +func TestAsinh(t *testing.T) { + for i := 0; i < len(vf); i++ { + if f := Asinh(vf[i]); !veryclose(asinh[i], f) { + t.Errorf("Asinh(%g) = %g, want %g", vf[i], f, asinh[i]) + } + } + for i := 0; i < len(vfasinhSC); i++ { + if f := Asinh(vfasinhSC[i]); !alike(asinhSC[i], f) { + t.Errorf("Asinh(%g) = %g, want %g", vfasinhSC[i], f, asinhSC[i]) + } + } +} + +func TestAtan(t *testing.T) { + for i := 0; i < len(vf); i++ { + if f := Atan(vf[i]); !veryclose(atan[i], f) { + t.Errorf("Atan(%g) = %g, want %g", vf[i], f, atan[i]) + } + } + for i := 0; i < len(vfatanSC); i++ { + if f := Atan(vfatanSC[i]); !alike(atanSC[i], f) { + t.Errorf("Atan(%g) = %g, want %g", vfatanSC[i], f, atanSC[i]) + } + } +} + +func TestAtanh(t *testing.T) { + for i := 0; i < len(vf); i++ { + a := vf[i] / 10 + if f := Atanh(a); !veryclose(atanh[i], f) { + t.Errorf("Atanh(%g) = %g, want %g", a, f, atanh[i]) + } + } + for i := 0; i < len(vfatanhSC); i++ { + if f := Atanh(vfatanhSC[i]); !alike(atanhSC[i], f) { + t.Errorf("Atanh(%g) = %g, want %g", vfatanhSC[i], f, atanhSC[i]) + } + } +} + +func TestAtan2(t *testing.T) { + for i := 0; i < len(vf); i++ { + if f := Atan2(10, vf[i]); !veryclose(atan2[i], f) { + t.Errorf("Atan2(10, %g) = %g, want %g", vf[i], f, atan2[i]) + } + } + for i := 0; i < len(vfatan2SC); i++ { + if f := Atan2(vfatan2SC[i][0], vfatan2SC[i][1]); !alike(atan2SC[i], f) { + t.Errorf("Atan2(%g, %g) = %g, want %g", vfatan2SC[i][0], vfatan2SC[i][1], f, atan2SC[i]) + } + } +} + +func TestCbrt(t *testing.T) { + for i := 0; i < len(vf); i++ { + if f := Cbrt(vf[i]); !veryclose(cbrt[i], f) { + t.Errorf("Cbrt(%g) = %g, want %g", vf[i], f, cbrt[i]) + } + } + for i := 0; i < len(vfcbrtSC); i++ { + if f := Cbrt(vfcbrtSC[i]); !alike(cbrtSC[i], f) { + t.Errorf("Cbrt(%g) = %g, want %g", vfcbrtSC[i], f, cbrtSC[i]) + } + } +} + +func TestCeil(t *testing.T) { + for i := 0; i < len(vf); i++ { + if f := Ceil(vf[i]); !alike(ceil[i], f) { + t.Errorf("Ceil(%g) = %g, want %g", vf[i], f, ceil[i]) + } + } + for i := 0; i < len(vfceilSC); i++ { + if f := Ceil(vfceilSC[i]); !alike(ceilSC[i], f) { + t.Errorf("Ceil(%g) = %g, want %g", vfceilSC[i], f, ceilSC[i]) + } + } +} + +func TestCopysign(t *testing.T) { + for i := 0; i < len(vf); i++ { + if f := Copysign(vf[i], -1); copysign[i] != f { + t.Errorf("Copysign(%g, -1) = %g, want %g", vf[i], f, copysign[i]) + } + } + for i := 0; i < len(vf); i++ { + if f := Copysign(vf[i], 1); -copysign[i] != f { + t.Errorf("Copysign(%g, 1) = %g, want %g", vf[i], f, -copysign[i]) + } + } + for i := 0; i < len(vfcopysignSC); i++ { + if f := Copysign(vfcopysignSC[i], -1); !alike(copysignSC[i], f) { + t.Errorf("Copysign(%g, -1) = %g, want %g", vfcopysignSC[i], f, copysignSC[i]) + } + } +} + +func TestCos(t *testing.T) { + for i := 0; i < len(vf); i++ { + if f := Cos(vf[i]); !veryclose(cos[i], f) { + t.Errorf("Cos(%g) = %g, want %g", vf[i], f, cos[i]) + } + } + for i := 0; i < len(vfcosSC); i++ { + if f := Cos(vfcosSC[i]); !alike(cosSC[i], f) { + t.Errorf("Cos(%g) = %g, want %g", vfcosSC[i], f, cosSC[i]) + } + } +} + +func TestCosh(t *testing.T) { + for i := 0; i < len(vf); i++ { + if f := Cosh(vf[i]); !close(cosh[i], f) { + t.Errorf("Cosh(%g) = %g, want %g", vf[i], f, cosh[i]) + } + } + for i := 0; i < len(vfcoshSC); i++ { + if f := Cosh(vfcoshSC[i]); !alike(coshSC[i], f) { + t.Errorf("Cosh(%g) = %g, want %g", vfcoshSC[i], f, coshSC[i]) + } + } +} + +func TestErf(t *testing.T) { + for i := 0; i < len(vf); i++ { + a := vf[i] / 10 + if f := Erf(a); !veryclose(erf[i], f) { + t.Errorf("Erf(%g) = %g, want %g", a, f, erf[i]) + } + } + for i := 0; i < len(vferfSC); i++ { + if f := Erf(vferfSC[i]); !alike(erfSC[i], f) { + t.Errorf("Erf(%g) = %g, want %g", vferfSC[i], f, erfSC[i]) + } + } +} + +func TestErfc(t *testing.T) { + for i := 0; i < len(vf); i++ { + a := vf[i] / 10 + if f := Erfc(a); !veryclose(erfc[i], f) { + t.Errorf("Erfc(%g) = %g, want %g", a, f, erfc[i]) + } + } + for i := 0; i < len(vferfcSC); i++ { + if f := Erfc(vferfcSC[i]); !alike(erfcSC[i], f) { + t.Errorf("Erfc(%g) = %g, want %g", vferfcSC[i], f, erfcSC[i]) + } + } +} + +func TestErfinv(t *testing.T) { + for i := 0; i < len(vf); i++ { + a := vf[i] / 10 + if f := Erfinv(a); !veryclose(erfinv[i], f) { + t.Errorf("Erfinv(%g) = %g, want %g", a, f, erfinv[i]) + } + } + for i := 0; i < len(vferfinvSC); i++ { + if f := Erfinv(vferfinvSC[i]); !alike(erfinvSC[i], f) { + t.Errorf("Erfinv(%g) = %g, want %g", vferfinvSC[i], f, erfinvSC[i]) + } + } + for x := -0.9; x <= 0.90; x += 1e-2 { + if f := Erf(Erfinv(x)); !close(x, f) { + t.Errorf("Erf(Erfinv(%g)) = %g, want %g", x, f, x) + } + } + for x := -0.9; x <= 0.90; x += 1e-2 { + if f := Erfinv(Erf(x)); !close(x, f) { + t.Errorf("Erfinv(Erf(%g)) = %g, want %g", x, f, x) + } + } +} + +func TestErfcinv(t *testing.T) { + for i := 0; i < len(vf); i++ { + a := 1.0 - (vf[i] / 10) + if f := Erfcinv(a); !veryclose(erfinv[i], f) { + t.Errorf("Erfcinv(%g) = %g, want %g", a, f, erfinv[i]) + } + } + for i := 0; i < len(vferfcinvSC); i++ { + if f := Erfcinv(vferfcinvSC[i]); !alike(erfcinvSC[i], f) { + t.Errorf("Erfcinv(%g) = %g, want %g", vferfcinvSC[i], f, erfcinvSC[i]) + } + } + for x := 0.1; x <= 1.9; x += 1e-2 { + if f := Erfc(Erfcinv(x)); !close(x, f) { + t.Errorf("Erfc(Erfcinv(%g)) = %g, want %g", x, f, x) + } + } + for x := 0.1; x <= 1.9; x += 1e-2 { + if f := Erfcinv(Erfc(x)); !close(x, f) { + t.Errorf("Erfcinv(Erfc(%g)) = %g, want %g", x, f, x) + } + } +} + +func TestExp(t *testing.T) { + testExp(t, Exp, "Exp") + testExp(t, ExpGo, "ExpGo") +} + +func testExp(t *testing.T, Exp func(float64) float64, name string) { + for i := 0; i < len(vf); i++ { + if f := Exp(vf[i]); !veryclose(exp[i], f) { + t.Errorf("%s(%g) = %g, want %g", name, vf[i], f, exp[i]) + } + } + for i := 0; i < len(vfexpSC); i++ { + if f := Exp(vfexpSC[i]); !alike(expSC[i], f) { + t.Errorf("%s(%g) = %g, want %g", name, vfexpSC[i], f, expSC[i]) + } + } +} + +func TestExpm1(t *testing.T) { + for i := 0; i < len(vf); i++ { + a := vf[i] / 100 + if f := Expm1(a); !veryclose(expm1[i], f) { + t.Errorf("Expm1(%g) = %g, want %g", a, f, expm1[i]) + } + } + for i := 0; i < len(vf); i++ { + a := vf[i] * 10 + if f := Expm1(a); !close(expm1Large[i], f) { + t.Errorf("Expm1(%g) = %g, want %g", a, f, expm1Large[i]) + } + } + for i := 0; i < len(vfexpm1SC); i++ { + if f := Expm1(vfexpm1SC[i]); !alike(expm1SC[i], f) { + t.Errorf("Expm1(%g) = %g, want %g", vfexpm1SC[i], f, expm1SC[i]) + } + } +} + +func TestExp2(t *testing.T) { + testExp2(t, Exp2, "Exp2") + testExp2(t, Exp2Go, "Exp2Go") +} + +func testExp2(t *testing.T, Exp2 func(float64) float64, name string) { + for i := 0; i < len(vf); i++ { + if f := Exp2(vf[i]); !close(exp2[i], f) { + t.Errorf("%s(%g) = %g, want %g", name, vf[i], f, exp2[i]) + } + } + for i := 0; i < len(vfexp2SC); i++ { + if f := Exp2(vfexp2SC[i]); !alike(exp2SC[i], f) { + t.Errorf("%s(%g) = %g, want %g", name, vfexp2SC[i], f, exp2SC[i]) + } + } + for n := -1074; n < 1024; n++ { + f := Exp2(float64(n)) + vf := Ldexp(1, n) + if f != vf { + t.Errorf("%s(%d) = %g, want %g", name, n, f, vf) + } + } +} + +func TestAbs(t *testing.T) { + for i := 0; i < len(vf); i++ { + if f := Abs(vf[i]); fabs[i] != f { + t.Errorf("Abs(%g) = %g, want %g", vf[i], f, fabs[i]) + } + } + for i := 0; i < len(vffabsSC); i++ { + if f := Abs(vffabsSC[i]); !alike(fabsSC[i], f) { + t.Errorf("Abs(%g) = %g, want %g", vffabsSC[i], f, fabsSC[i]) + } + } +} + +func TestDim(t *testing.T) { + for i := 0; i < len(vf); i++ { + if f := Dim(vf[i], 0); fdim[i] != f { + t.Errorf("Dim(%g, %g) = %g, want %g", vf[i], 0.0, f, fdim[i]) + } + } + for i := 0; i < len(vffdimSC); i++ { + if f := Dim(vffdimSC[i][0], vffdimSC[i][1]); !alike(fdimSC[i], f) { + t.Errorf("Dim(%g, %g) = %g, want %g", vffdimSC[i][0], vffdimSC[i][1], f, fdimSC[i]) + } + } + for i := 0; i < len(vffdim2SC); i++ { + if f := Dim(vffdim2SC[i][0], vffdim2SC[i][1]); !alike(fdimSC[i], f) { + t.Errorf("Dim(%g, %g) = %g, want %g", vffdim2SC[i][0], vffdim2SC[i][1], f, fdimSC[i]) + } + } +} + +func TestFloor(t *testing.T) { + for i := 0; i < len(vf); i++ { + if f := Floor(vf[i]); !alike(floor[i], f) { + t.Errorf("Floor(%g) = %g, want %g", vf[i], f, floor[i]) + } + } + for i := 0; i < len(vfceilSC); i++ { + if f := Floor(vfceilSC[i]); !alike(ceilSC[i], f) { + t.Errorf("Floor(%g) = %g, want %g", vfceilSC[i], f, ceilSC[i]) + } + } +} + +func TestMax(t *testing.T) { + for i := 0; i < len(vf); i++ { + if f := Max(vf[i], ceil[i]); ceil[i] != f { + t.Errorf("Max(%g, %g) = %g, want %g", vf[i], ceil[i], f, ceil[i]) + } + } + for i := 0; i < len(vffdimSC); i++ { + if f := Max(vffdimSC[i][0], vffdimSC[i][1]); !alike(fmaxSC[i], f) { + t.Errorf("Max(%g, %g) = %g, want %g", vffdimSC[i][0], vffdimSC[i][1], f, fmaxSC[i]) + } + } + for i := 0; i < len(vffdim2SC); i++ { + if f := Max(vffdim2SC[i][0], vffdim2SC[i][1]); !alike(fmaxSC[i], f) { + t.Errorf("Max(%g, %g) = %g, want %g", vffdim2SC[i][0], vffdim2SC[i][1], f, fmaxSC[i]) + } + } +} + +func TestMin(t *testing.T) { + for i := 0; i < len(vf); i++ { + if f := Min(vf[i], floor[i]); floor[i] != f { + t.Errorf("Min(%g, %g) = %g, want %g", vf[i], floor[i], f, floor[i]) + } + } + for i := 0; i < len(vffdimSC); i++ { + if f := Min(vffdimSC[i][0], vffdimSC[i][1]); !alike(fminSC[i], f) { + t.Errorf("Min(%g, %g) = %g, want %g", vffdimSC[i][0], vffdimSC[i][1], f, fminSC[i]) + } + } + for i := 0; i < len(vffdim2SC); i++ { + if f := Min(vffdim2SC[i][0], vffdim2SC[i][1]); !alike(fminSC[i], f) { + t.Errorf("Min(%g, %g) = %g, want %g", vffdim2SC[i][0], vffdim2SC[i][1], f, fminSC[i]) + } + } +} + +func TestMod(t *testing.T) { + for i := 0; i < len(vf); i++ { + if f := Mod(10, vf[i]); fmod[i] != f { + t.Errorf("Mod(10, %g) = %g, want %g", vf[i], f, fmod[i]) + } + } + for i := 0; i < len(vffmodSC); i++ { + if f := Mod(vffmodSC[i][0], vffmodSC[i][1]); !alike(fmodSC[i], f) { + t.Errorf("Mod(%g, %g) = %g, want %g", vffmodSC[i][0], vffmodSC[i][1], f, fmodSC[i]) + } + } + // verify precision of result for extreme inputs + if f := Mod(5.9790119248836734e+200, 1.1258465975523544); 0.6447968302508578 != f { + t.Errorf("Remainder(5.9790119248836734e+200, 1.1258465975523544) = %g, want 0.6447968302508578", f) + } +} + +func TestFrexp(t *testing.T) { + for i := 0; i < len(vf); i++ { + if f, j := Frexp(vf[i]); !veryclose(frexp[i].f, f) || frexp[i].i != j { + t.Errorf("Frexp(%g) = %g, %d, want %g, %d", vf[i], f, j, frexp[i].f, frexp[i].i) + } + } + for i := 0; i < len(vffrexpSC); i++ { + if f, j := Frexp(vffrexpSC[i]); !alike(frexpSC[i].f, f) || frexpSC[i].i != j { + t.Errorf("Frexp(%g) = %g, %d, want %g, %d", vffrexpSC[i], f, j, frexpSC[i].f, frexpSC[i].i) + } + } + for i := 0; i < len(vffrexpBC); i++ { + if f, j := Frexp(vffrexpBC[i]); !alike(frexpBC[i].f, f) || frexpBC[i].i != j { + t.Errorf("Frexp(%g) = %g, %d, want %g, %d", vffrexpBC[i], f, j, frexpBC[i].f, frexpBC[i].i) + } + } +} + +func TestGamma(t *testing.T) { + for i := 0; i < len(vf); i++ { + if f := Gamma(vf[i]); !close(gamma[i], f) { + t.Errorf("Gamma(%g) = %g, want %g", vf[i], f, gamma[i]) + } + } + for _, g := range vfgamma { + f := Gamma(g[0]) + var ok bool + if IsNaN(g[1]) || IsInf(g[1], 0) || g[1] == 0 || f == 0 { + ok = alike(g[1], f) + } else if g[0] > -50 && g[0] <= 171 { + ok = veryclose(g[1], f) + } else { + ok = close(g[1], f) + } + if !ok { + t.Errorf("Gamma(%g) = %g, want %g", g[0], f, g[1]) + } + } +} + +func TestHypot(t *testing.T) { + for i := 0; i < len(vf); i++ { + a := Abs(1e200 * tanh[i] * Sqrt(2)) + if f := Hypot(1e200*tanh[i], 1e200*tanh[i]); !veryclose(a, f) { + t.Errorf("Hypot(%g, %g) = %g, want %g", 1e200*tanh[i], 1e200*tanh[i], f, a) + } + } + for i := 0; i < len(vfhypotSC); i++ { + if f := Hypot(vfhypotSC[i][0], vfhypotSC[i][1]); !alike(hypotSC[i], f) { + t.Errorf("Hypot(%g, %g) = %g, want %g", vfhypotSC[i][0], vfhypotSC[i][1], f, hypotSC[i]) + } + } +} + +func TestHypotGo(t *testing.T) { + for i := 0; i < len(vf); i++ { + a := Abs(1e200 * tanh[i] * Sqrt(2)) + if f := HypotGo(1e200*tanh[i], 1e200*tanh[i]); !veryclose(a, f) { + t.Errorf("HypotGo(%g, %g) = %g, want %g", 1e200*tanh[i], 1e200*tanh[i], f, a) + } + } + for i := 0; i < len(vfhypotSC); i++ { + if f := HypotGo(vfhypotSC[i][0], vfhypotSC[i][1]); !alike(hypotSC[i], f) { + t.Errorf("HypotGo(%g, %g) = %g, want %g", vfhypotSC[i][0], vfhypotSC[i][1], f, hypotSC[i]) + } + } +} + +func TestIlogb(t *testing.T) { + for i := 0; i < len(vf); i++ { + a := frexp[i].i - 1 // adjust because fr in the interval [½, 1) + if e := Ilogb(vf[i]); a != e { + t.Errorf("Ilogb(%g) = %d, want %d", vf[i], e, a) + } + } + for i := 0; i < len(vflogbSC); i++ { + if e := Ilogb(vflogbSC[i]); ilogbSC[i] != e { + t.Errorf("Ilogb(%g) = %d, want %d", vflogbSC[i], e, ilogbSC[i]) + } + } + for i := 0; i < len(vffrexpBC); i++ { + if e := Ilogb(vffrexpBC[i]); int(logbBC[i]) != e { + t.Errorf("Ilogb(%g) = %d, want %d", vffrexpBC[i], e, int(logbBC[i])) + } + } +} + +func TestJ0(t *testing.T) { + for i := 0; i < len(vf); i++ { + if f := J0(vf[i]); !soclose(j0[i], f, 4e-14) { + t.Errorf("J0(%g) = %g, want %g", vf[i], f, j0[i]) + } + } + for i := 0; i < len(vfj0SC); i++ { + if f := J0(vfj0SC[i]); !alike(j0SC[i], f) { + t.Errorf("J0(%g) = %g, want %g", vfj0SC[i], f, j0SC[i]) + } + } +} + +func TestJ1(t *testing.T) { + for i := 0; i < len(vf); i++ { + if f := J1(vf[i]); !close(j1[i], f) { + t.Errorf("J1(%g) = %g, want %g", vf[i], f, j1[i]) + } + } + for i := 0; i < len(vfj0SC); i++ { + if f := J1(vfj0SC[i]); !alike(j1SC[i], f) { + t.Errorf("J1(%g) = %g, want %g", vfj0SC[i], f, j1SC[i]) + } + } +} + +func TestJn(t *testing.T) { + for i := 0; i < len(vf); i++ { + if f := Jn(2, vf[i]); !close(j2[i], f) { + t.Errorf("Jn(2, %g) = %g, want %g", vf[i], f, j2[i]) + } + if f := Jn(-3, vf[i]); !close(jM3[i], f) { + t.Errorf("Jn(-3, %g) = %g, want %g", vf[i], f, jM3[i]) + } + } + for i := 0; i < len(vfj0SC); i++ { + if f := Jn(2, vfj0SC[i]); !alike(j2SC[i], f) { + t.Errorf("Jn(2, %g) = %g, want %g", vfj0SC[i], f, j2SC[i]) + } + if f := Jn(-3, vfj0SC[i]); !alike(jM3SC[i], f) { + t.Errorf("Jn(-3, %g) = %g, want %g", vfj0SC[i], f, jM3SC[i]) + } + } +} + +func TestLdexp(t *testing.T) { + for i := 0; i < len(vf); i++ { + if f := Ldexp(frexp[i].f, frexp[i].i); !veryclose(vf[i], f) { + t.Errorf("Ldexp(%g, %d) = %g, want %g", frexp[i].f, frexp[i].i, f, vf[i]) + } + } + for i := 0; i < len(vffrexpSC); i++ { + if f := Ldexp(frexpSC[i].f, frexpSC[i].i); !alike(vffrexpSC[i], f) { + t.Errorf("Ldexp(%g, %d) = %g, want %g", frexpSC[i].f, frexpSC[i].i, f, vffrexpSC[i]) + } + } + for i := 0; i < len(vfldexpSC); i++ { + if f := Ldexp(vfldexpSC[i].f, vfldexpSC[i].i); !alike(ldexpSC[i], f) { + t.Errorf("Ldexp(%g, %d) = %g, want %g", vfldexpSC[i].f, vfldexpSC[i].i, f, ldexpSC[i]) + } + } + for i := 0; i < len(vffrexpBC); i++ { + if f := Ldexp(frexpBC[i].f, frexpBC[i].i); !alike(vffrexpBC[i], f) { + t.Errorf("Ldexp(%g, %d) = %g, want %g", frexpBC[i].f, frexpBC[i].i, f, vffrexpBC[i]) + } + } + for i := 0; i < len(vfldexpBC); i++ { + if f := Ldexp(vfldexpBC[i].f, vfldexpBC[i].i); !alike(ldexpBC[i], f) { + t.Errorf("Ldexp(%g, %d) = %g, want %g", vfldexpBC[i].f, vfldexpBC[i].i, f, ldexpBC[i]) + } + } +} + +func TestLgamma(t *testing.T) { + for i := 0; i < len(vf); i++ { + if f, s := Lgamma(vf[i]); !close(lgamma[i].f, f) || lgamma[i].i != s { + t.Errorf("Lgamma(%g) = %g, %d, want %g, %d", vf[i], f, s, lgamma[i].f, lgamma[i].i) + } + } + for i := 0; i < len(vflgammaSC); i++ { + if f, s := Lgamma(vflgammaSC[i]); !alike(lgammaSC[i].f, f) || lgammaSC[i].i != s { + t.Errorf("Lgamma(%g) = %g, %d, want %g, %d", vflgammaSC[i], f, s, lgammaSC[i].f, lgammaSC[i].i) + } + } +} + +func TestLog(t *testing.T) { + for i := 0; i < len(vf); i++ { + a := Abs(vf[i]) + if f := Log(a); log[i] != f { + t.Errorf("Log(%g) = %g, want %g", a, f, log[i]) + } + } + if f := Log(10); f != Ln10 { + t.Errorf("Log(%g) = %g, want %g", 10.0, f, Ln10) + } + for i := 0; i < len(vflogSC); i++ { + if f := Log(vflogSC[i]); !alike(logSC[i], f) { + t.Errorf("Log(%g) = %g, want %g", vflogSC[i], f, logSC[i]) + } + } +} + +func TestLogb(t *testing.T) { + for i := 0; i < len(vf); i++ { + if f := Logb(vf[i]); logb[i] != f { + t.Errorf("Logb(%g) = %g, want %g", vf[i], f, logb[i]) + } + } + for i := 0; i < len(vflogbSC); i++ { + if f := Logb(vflogbSC[i]); !alike(logbSC[i], f) { + t.Errorf("Logb(%g) = %g, want %g", vflogbSC[i], f, logbSC[i]) + } + } + for i := 0; i < len(vffrexpBC); i++ { + if f := Logb(vffrexpBC[i]); !alike(logbBC[i], f) { + t.Errorf("Logb(%g) = %g, want %g", vffrexpBC[i], f, logbBC[i]) + } + } +} + +func TestLog10(t *testing.T) { + for i := 0; i < len(vf); i++ { + a := Abs(vf[i]) + if f := Log10(a); !veryclose(log10[i], f) { + t.Errorf("Log10(%g) = %g, want %g", a, f, log10[i]) + } + } + if f := Log10(E); f != Log10E { + t.Errorf("Log10(%g) = %g, want %g", E, f, Log10E) + } + for i := 0; i < len(vflogSC); i++ { + if f := Log10(vflogSC[i]); !alike(logSC[i], f) { + t.Errorf("Log10(%g) = %g, want %g", vflogSC[i], f, logSC[i]) + } + } +} + +func TestLog1p(t *testing.T) { + for i := 0; i < len(vf); i++ { + a := vf[i] / 100 + if f := Log1p(a); !veryclose(log1p[i], f) { + t.Errorf("Log1p(%g) = %g, want %g", a, f, log1p[i]) + } + } + a := 9.0 + if f := Log1p(a); f != Ln10 { + t.Errorf("Log1p(%g) = %g, want %g", a, f, Ln10) + } + for i := 0; i < len(vflogSC); i++ { + if f := Log1p(vflog1pSC[i]); !alike(log1pSC[i], f) { + t.Errorf("Log1p(%g) = %g, want %g", vflog1pSC[i], f, log1pSC[i]) + } + } +} + +func TestLog2(t *testing.T) { + for i := 0; i < len(vf); i++ { + a := Abs(vf[i]) + if f := Log2(a); !veryclose(log2[i], f) { + t.Errorf("Log2(%g) = %g, want %g", a, f, log2[i]) + } + } + if f := Log2(E); f != Log2E { + t.Errorf("Log2(%g) = %g, want %g", E, f, Log2E) + } + for i := 0; i < len(vflogSC); i++ { + if f := Log2(vflogSC[i]); !alike(logSC[i], f) { + t.Errorf("Log2(%g) = %g, want %g", vflogSC[i], f, logSC[i]) + } + } + for i := -1074; i <= 1023; i++ { + f := Ldexp(1, i) + l := Log2(f) + if l != float64(i) { + t.Errorf("Log2(2**%d) = %g, want %d", i, l, i) + } + } +} + +func TestModf(t *testing.T) { + for i := 0; i < len(vf); i++ { + if f, g := Modf(vf[i]); !veryclose(modf[i][0], f) || !veryclose(modf[i][1], g) { + t.Errorf("Modf(%g) = %g, %g, want %g, %g", vf[i], f, g, modf[i][0], modf[i][1]) + } + } + for i := 0; i < len(vfmodfSC); i++ { + if f, g := Modf(vfmodfSC[i]); !alike(modfSC[i][0], f) || !alike(modfSC[i][1], g) { + t.Errorf("Modf(%g) = %g, %g, want %g, %g", vfmodfSC[i], f, g, modfSC[i][0], modfSC[i][1]) + } + } +} + +func TestNextafter32(t *testing.T) { + for i := 0; i < len(vf); i++ { + vfi := float32(vf[i]) + if f := Nextafter32(vfi, 10); nextafter32[i] != f { + t.Errorf("Nextafter32(%g, %g) = %g want %g", vfi, 10.0, f, nextafter32[i]) + } + } + for i := 0; i < len(vfnextafter32SC); i++ { + if f := Nextafter32(vfnextafter32SC[i][0], vfnextafter32SC[i][1]); !alike(float64(nextafter32SC[i]), float64(f)) { + t.Errorf("Nextafter32(%g, %g) = %g want %g", vfnextafter32SC[i][0], vfnextafter32SC[i][1], f, nextafter32SC[i]) + } + } +} + +func TestNextafter64(t *testing.T) { + for i := 0; i < len(vf); i++ { + if f := Nextafter(vf[i], 10); nextafter64[i] != f { + t.Errorf("Nextafter64(%g, %g) = %g want %g", vf[i], 10.0, f, nextafter64[i]) + } + } + for i := 0; i < len(vfnextafter64SC); i++ { + if f := Nextafter(vfnextafter64SC[i][0], vfnextafter64SC[i][1]); !alike(nextafter64SC[i], f) { + t.Errorf("Nextafter64(%g, %g) = %g want %g", vfnextafter64SC[i][0], vfnextafter64SC[i][1], f, nextafter64SC[i]) + } + } +} + +func TestPow(t *testing.T) { + for i := 0; i < len(vf); i++ { + if f := Pow(10, vf[i]); !close(pow[i], f) { + t.Errorf("Pow(10, %g) = %g, want %g", vf[i], f, pow[i]) + } + } + for i := 0; i < len(vfpowSC); i++ { + if f := Pow(vfpowSC[i][0], vfpowSC[i][1]); !alike(powSC[i], f) { + t.Errorf("Pow(%g, %g) = %g, want %g", vfpowSC[i][0], vfpowSC[i][1], f, powSC[i]) + } + } +} + +func TestPow10(t *testing.T) { + for i := 0; i < len(vfpow10SC); i++ { + if f := Pow10(vfpow10SC[i]); !alike(pow10SC[i], f) { + t.Errorf("Pow10(%d) = %g, want %g", vfpow10SC[i], f, pow10SC[i]) + } + } +} + +func TestRemainder(t *testing.T) { + for i := 0; i < len(vf); i++ { + if f := Remainder(10, vf[i]); remainder[i] != f { + t.Errorf("Remainder(10, %g) = %g, want %g", vf[i], f, remainder[i]) + } + } + for i := 0; i < len(vffmodSC); i++ { + if f := Remainder(vffmodSC[i][0], vffmodSC[i][1]); !alike(fmodSC[i], f) { + t.Errorf("Remainder(%g, %g) = %g, want %g", vffmodSC[i][0], vffmodSC[i][1], f, fmodSC[i]) + } + } + // verify precision of result for extreme inputs + if f := Remainder(5.9790119248836734e+200, 1.1258465975523544); -0.4810497673014966 != f { + t.Errorf("Remainder(5.9790119248836734e+200, 1.1258465975523544) = %g, want -0.4810497673014966", f) + } + // verify that sign is correct when r == 0. + test := func(x, y float64) { + if r := Remainder(x, y); r == 0 && Signbit(r) != Signbit(x) { + t.Errorf("Remainder(x=%f, y=%f) = %f, sign of (zero) result should agree with sign of x", x, y, r) + } + } + for x := 0.0; x <= 3.0; x += 1 { + for y := 1.0; y <= 3.0; y += 1 { + test(x, y) + test(x, -y) + test(-x, y) + test(-x, -y) + } + } +} + +func TestRound(t *testing.T) { + for i := 0; i < len(vf); i++ { + if f := Round(vf[i]); !alike(round[i], f) { + t.Errorf("Round(%g) = %g, want %g", vf[i], f, round[i]) + } + } + for i := 0; i < len(vfroundSC); i++ { + if f := Round(vfroundSC[i][0]); !alike(vfroundSC[i][1], f) { + t.Errorf("Round(%g) = %g, want %g", vfroundSC[i][0], f, vfroundSC[i][1]) + } + } +} + +func TestRoundToEven(t *testing.T) { + for i := 0; i < len(vf); i++ { + if f := RoundToEven(vf[i]); !alike(round[i], f) { + t.Errorf("RoundToEven(%g) = %g, want %g", vf[i], f, round[i]) + } + } + for i := 0; i < len(vfroundEvenSC); i++ { + if f := RoundToEven(vfroundEvenSC[i][0]); !alike(vfroundEvenSC[i][1], f) { + t.Errorf("RoundToEven(%g) = %g, want %g", vfroundEvenSC[i][0], f, vfroundEvenSC[i][1]) + } + } +} + +func TestSignbit(t *testing.T) { + for i := 0; i < len(vf); i++ { + if f := Signbit(vf[i]); signbit[i] != f { + t.Errorf("Signbit(%g) = %t, want %t", vf[i], f, signbit[i]) + } + } + for i := 0; i < len(vfsignbitSC); i++ { + if f := Signbit(vfsignbitSC[i]); signbitSC[i] != f { + t.Errorf("Signbit(%g) = %t, want %t", vfsignbitSC[i], f, signbitSC[i]) + } + } +} +func TestSin(t *testing.T) { + for i := 0; i < len(vf); i++ { + if f := Sin(vf[i]); !veryclose(sin[i], f) { + t.Errorf("Sin(%g) = %g, want %g", vf[i], f, sin[i]) + } + } + for i := 0; i < len(vfsinSC); i++ { + if f := Sin(vfsinSC[i]); !alike(sinSC[i], f) { + t.Errorf("Sin(%g) = %g, want %g", vfsinSC[i], f, sinSC[i]) + } + } +} + +func TestSincos(t *testing.T) { + for i := 0; i < len(vf); i++ { + if s, c := Sincos(vf[i]); !veryclose(sin[i], s) || !veryclose(cos[i], c) { + t.Errorf("Sincos(%g) = %g, %g want %g, %g", vf[i], s, c, sin[i], cos[i]) + } + } +} + +func TestSinh(t *testing.T) { + for i := 0; i < len(vf); i++ { + if f := Sinh(vf[i]); !close(sinh[i], f) { + t.Errorf("Sinh(%g) = %g, want %g", vf[i], f, sinh[i]) + } + } + for i := 0; i < len(vfsinhSC); i++ { + if f := Sinh(vfsinhSC[i]); !alike(sinhSC[i], f) { + t.Errorf("Sinh(%g) = %g, want %g", vfsinhSC[i], f, sinhSC[i]) + } + } +} + +func TestSqrt(t *testing.T) { + for i := 0; i < len(vf); i++ { + a := Abs(vf[i]) + if f := SqrtGo(a); sqrt[i] != f { + t.Errorf("SqrtGo(%g) = %g, want %g", a, f, sqrt[i]) + } + a = Abs(vf[i]) + if f := Sqrt(a); sqrt[i] != f { + t.Errorf("Sqrt(%g) = %g, want %g", a, f, sqrt[i]) + } + } + for i := 0; i < len(vfsqrtSC); i++ { + if f := SqrtGo(vfsqrtSC[i]); !alike(sqrtSC[i], f) { + t.Errorf("SqrtGo(%g) = %g, want %g", vfsqrtSC[i], f, sqrtSC[i]) + } + if f := Sqrt(vfsqrtSC[i]); !alike(sqrtSC[i], f) { + t.Errorf("Sqrt(%g) = %g, want %g", vfsqrtSC[i], f, sqrtSC[i]) + } + } +} + +func TestTan(t *testing.T) { + for i := 0; i < len(vf); i++ { + if f := Tan(vf[i]); !veryclose(tan[i], f) { + t.Errorf("Tan(%g) = %g, want %g", vf[i], f, tan[i]) + } + } + // same special cases as Sin + for i := 0; i < len(vfsinSC); i++ { + if f := Tan(vfsinSC[i]); !alike(sinSC[i], f) { + t.Errorf("Tan(%g) = %g, want %g", vfsinSC[i], f, sinSC[i]) + } + } +} + +func TestTanh(t *testing.T) { + for i := 0; i < len(vf); i++ { + if f := Tanh(vf[i]); !veryclose(tanh[i], f) { + t.Errorf("Tanh(%g) = %g, want %g", vf[i], f, tanh[i]) + } + } + for i := 0; i < len(vftanhSC); i++ { + if f := Tanh(vftanhSC[i]); !alike(tanhSC[i], f) { + t.Errorf("Tanh(%g) = %g, want %g", vftanhSC[i], f, tanhSC[i]) + } + } +} + +func TestTrunc(t *testing.T) { + for i := 0; i < len(vf); i++ { + if f := Trunc(vf[i]); !alike(trunc[i], f) { + t.Errorf("Trunc(%g) = %g, want %g", vf[i], f, trunc[i]) + } + } + for i := 0; i < len(vfceilSC); i++ { + if f := Trunc(vfceilSC[i]); !alike(ceilSC[i], f) { + t.Errorf("Trunc(%g) = %g, want %g", vfceilSC[i], f, ceilSC[i]) + } + } +} + +func TestY0(t *testing.T) { + for i := 0; i < len(vf); i++ { + a := Abs(vf[i]) + if f := Y0(a); !close(y0[i], f) { + t.Errorf("Y0(%g) = %g, want %g", a, f, y0[i]) + } + } + for i := 0; i < len(vfy0SC); i++ { + if f := Y0(vfy0SC[i]); !alike(y0SC[i], f) { + t.Errorf("Y0(%g) = %g, want %g", vfy0SC[i], f, y0SC[i]) + } + } +} + +func TestY1(t *testing.T) { + for i := 0; i < len(vf); i++ { + a := Abs(vf[i]) + if f := Y1(a); !soclose(y1[i], f, 2e-14) { + t.Errorf("Y1(%g) = %g, want %g", a, f, y1[i]) + } + } + for i := 0; i < len(vfy0SC); i++ { + if f := Y1(vfy0SC[i]); !alike(y1SC[i], f) { + t.Errorf("Y1(%g) = %g, want %g", vfy0SC[i], f, y1SC[i]) + } + } +} + +func TestYn(t *testing.T) { + for i := 0; i < len(vf); i++ { + a := Abs(vf[i]) + if f := Yn(2, a); !close(y2[i], f) { + t.Errorf("Yn(2, %g) = %g, want %g", a, f, y2[i]) + } + if f := Yn(-3, a); !close(yM3[i], f) { + t.Errorf("Yn(-3, %g) = %g, want %g", a, f, yM3[i]) + } + } + for i := 0; i < len(vfy0SC); i++ { + if f := Yn(2, vfy0SC[i]); !alike(y2SC[i], f) { + t.Errorf("Yn(2, %g) = %g, want %g", vfy0SC[i], f, y2SC[i]) + } + if f := Yn(-3, vfy0SC[i]); !alike(yM3SC[i], f) { + t.Errorf("Yn(-3, %g) = %g, want %g", vfy0SC[i], f, yM3SC[i]) + } + } + if f := Yn(0, 0); !alike(Inf(-1), f) { + t.Errorf("Yn(0, 0) = %g, want %g", f, Inf(-1)) + } +} + +var PortableFMA = FMA // hide call from compiler intrinsic; falls back to portable code + +func TestFMA(t *testing.T) { + for _, c := range fmaC { + got := FMA(c.x, c.y, c.z) + if !alike(got, c.want) { + t.Errorf("FMA(%g,%g,%g) == %g; want %g", c.x, c.y, c.z, got, c.want) + } + got = PortableFMA(c.x, c.y, c.z) + if !alike(got, c.want) { + t.Errorf("PortableFMA(%g,%g,%g) == %g; want %g", c.x, c.y, c.z, got, c.want) + } + } +} + +// Check that math functions of high angle values +// return accurate results. [Since (vf[i] + large) - large != vf[i], +// testing for Trig(vf[i] + large) == Trig(vf[i]), where large is +// a multiple of 2*Pi, is misleading.] +func TestLargeCos(t *testing.T) { + large := float64(100000 * Pi) + for i := 0; i < len(vf); i++ { + f1 := cosLarge[i] + f2 := Cos(vf[i] + large) + if !close(f1, f2) { + t.Errorf("Cos(%g) = %g, want %g", vf[i]+large, f2, f1) + } + } +} + +func TestLargeSin(t *testing.T) { + large := float64(100000 * Pi) + for i := 0; i < len(vf); i++ { + f1 := sinLarge[i] + f2 := Sin(vf[i] + large) + if !close(f1, f2) { + t.Errorf("Sin(%g) = %g, want %g", vf[i]+large, f2, f1) + } + } +} + +func TestLargeSincos(t *testing.T) { + large := float64(100000 * Pi) + for i := 0; i < len(vf); i++ { + f1, g1 := sinLarge[i], cosLarge[i] + f2, g2 := Sincos(vf[i] + large) + if !close(f1, f2) || !close(g1, g2) { + t.Errorf("Sincos(%g) = %g, %g, want %g, %g", vf[i]+large, f2, g2, f1, g1) + } + } +} + +func TestLargeTan(t *testing.T) { + large := float64(100000 * Pi) + for i := 0; i < len(vf); i++ { + f1 := tanLarge[i] + f2 := Tan(vf[i] + large) + if !close(f1, f2) { + t.Errorf("Tan(%g) = %g, want %g", vf[i]+large, f2, f1) + } + } +} + +// Check that trigReduce matches the standard reduction results for input values +// below reduceThreshold. +func TestTrigReduce(t *testing.T) { + inputs := make([]float64, len(vf)) + // all of the standard inputs + copy(inputs, vf) + // all of the large inputs + large := float64(100000 * Pi) + for _, v := range vf { + inputs = append(inputs, v+large) + } + // Also test some special inputs, Pi and right below the reduceThreshold + inputs = append(inputs, Pi, Nextafter(ReduceThreshold, 0)) + for _, x := range inputs { + // reduce the value to compare + j, z := TrigReduce(x) + xred := float64(j)*(Pi/4) + z + + if f, fred := Sin(x), Sin(xred); !close(f, fred) { + t.Errorf("Sin(trigReduce(%g)) != Sin(%g), got %g, want %g", x, x, fred, f) + } + if f, fred := Cos(x), Cos(xred); !close(f, fred) { + t.Errorf("Cos(trigReduce(%g)) != Cos(%g), got %g, want %g", x, x, fred, f) + } + if f, fred := Tan(x), Tan(xred); !close(f, fred) { + t.Errorf(" Tan(trigReduce(%g)) != Tan(%g), got %g, want %g", x, x, fred, f) + } + f, g := Sincos(x) + fred, gred := Sincos(xred) + if !close(f, fred) || !close(g, gred) { + t.Errorf(" Sincos(trigReduce(%g)) != Sincos(%g), got %g, %g, want %g, %g", x, x, fred, gred, f, g) + } + } +} + +// Check that math constants are accepted by compiler +// and have right value (assumes strconv.ParseFloat works). +// https://golang.org/issue/201 + +type floatTest struct { + val any + name string + str string +} + +var floatTests = []floatTest{ + {float64(MaxFloat64), "MaxFloat64", "1.7976931348623157e+308"}, + {float64(SmallestNonzeroFloat64), "SmallestNonzeroFloat64", "5e-324"}, + {float32(MaxFloat32), "MaxFloat32", "3.4028235e+38"}, + {float32(SmallestNonzeroFloat32), "SmallestNonzeroFloat32", "1e-45"}, +} + +func TestFloatMinMax(t *testing.T) { + for _, tt := range floatTests { + s := fmt.Sprint(tt.val) + if s != tt.str { + t.Errorf("Sprint(%v) = %s, want %s", tt.name, s, tt.str) + } + } +} + +func TestFloatMinima(t *testing.T) { + if q := float32(SmallestNonzeroFloat32 / 2); q != 0 { + t.Errorf("float32(SmallestNonzeroFloat32 / 2) = %g, want 0", q) + } + if q := float64(SmallestNonzeroFloat64 / 2); q != 0 { + t.Errorf("float64(SmallestNonzeroFloat64 / 2) = %g, want 0", q) + } +} + +var indirectSqrt = Sqrt + +// TestFloat32Sqrt checks the correctness of the float32 square root optimization result. +func TestFloat32Sqrt(t *testing.T) { + for _, v := range sqrt32 { + want := float32(indirectSqrt(float64(v))) + got := float32(Sqrt(float64(v))) + if IsNaN(float64(want)) { + if !IsNaN(float64(got)) { + t.Errorf("got=%#v want=NaN, v=%#v", got, v) + } + continue + } + if got != want { + t.Errorf("got=%#v want=%#v, v=%#v", got, want, v) + } + } +} + +// Benchmarks + +// Global exported variables are used to store the +// return values of functions measured in the benchmarks. +// Storing the results in these variables prevents the compiler +// from completely optimizing the benchmarked functions away. +var ( + GlobalI int + GlobalB bool + GlobalF float64 +) + +func BenchmarkAcos(b *testing.B) { + x := 0.0 + for i := 0; i < b.N; i++ { + x = Acos(.5) + } + GlobalF = x +} + +func BenchmarkAcosh(b *testing.B) { + x := 0.0 + for i := 0; i < b.N; i++ { + x = Acosh(1.5) + } + GlobalF = x +} + +func BenchmarkAsin(b *testing.B) { + x := 0.0 + for i := 0; i < b.N; i++ { + x = Asin(.5) + } + GlobalF = x +} + +func BenchmarkAsinh(b *testing.B) { + x := 0.0 + for i := 0; i < b.N; i++ { + x = Asinh(.5) + } + GlobalF = x +} + +func BenchmarkAtan(b *testing.B) { + x := 0.0 + for i := 0; i < b.N; i++ { + x = Atan(.5) + } + GlobalF = x +} + +func BenchmarkAtanh(b *testing.B) { + x := 0.0 + for i := 0; i < b.N; i++ { + x = Atanh(.5) + } + GlobalF = x +} + +func BenchmarkAtan2(b *testing.B) { + x := 0.0 + for i := 0; i < b.N; i++ { + x = Atan2(.5, 1) + } + GlobalF = x +} + +func BenchmarkCbrt(b *testing.B) { + x := 0.0 + for i := 0; i < b.N; i++ { + x = Cbrt(10) + } + GlobalF = x +} + +func BenchmarkCeil(b *testing.B) { + x := 0.0 + for i := 0; i < b.N; i++ { + x = Ceil(.5) + } + GlobalF = x +} + +var copysignNeg = -1.0 + +func BenchmarkCopysign(b *testing.B) { + x := 0.0 + for i := 0; i < b.N; i++ { + x = Copysign(.5, copysignNeg) + } + GlobalF = x +} + +func BenchmarkCos(b *testing.B) { + x := 0.0 + for i := 0; i < b.N; i++ { + x = Cos(.5) + } + GlobalF = x +} + +func BenchmarkCosh(b *testing.B) { + x := 0.0 + for i := 0; i < b.N; i++ { + x = Cosh(2.5) + } + GlobalF = x +} + +func BenchmarkErf(b *testing.B) { + x := 0.0 + for i := 0; i < b.N; i++ { + x = Erf(.5) + } + GlobalF = x +} + +func BenchmarkErfc(b *testing.B) { + x := 0.0 + for i := 0; i < b.N; i++ { + x = Erfc(.5) + } + GlobalF = x +} + +func BenchmarkErfinv(b *testing.B) { + x := 0.0 + for i := 0; i < b.N; i++ { + x = Erfinv(.5) + } + GlobalF = x +} + +func BenchmarkErfcinv(b *testing.B) { + x := 0.0 + for i := 0; i < b.N; i++ { + x = Erfcinv(.5) + } + GlobalF = x +} + +func BenchmarkExp(b *testing.B) { + x := 0.0 + for i := 0; i < b.N; i++ { + x = Exp(.5) + } + GlobalF = x +} + +func BenchmarkExpGo(b *testing.B) { + x := 0.0 + for i := 0; i < b.N; i++ { + x = ExpGo(.5) + } + GlobalF = x +} + +func BenchmarkExpm1(b *testing.B) { + x := 0.0 + for i := 0; i < b.N; i++ { + x = Expm1(.5) + } + GlobalF = x +} + +func BenchmarkExp2(b *testing.B) { + x := 0.0 + for i := 0; i < b.N; i++ { + x = Exp2(.5) + } + GlobalF = x +} + +func BenchmarkExp2Go(b *testing.B) { + x := 0.0 + for i := 0; i < b.N; i++ { + x = Exp2Go(.5) + } + GlobalF = x +} + +var absPos = .5 + +func BenchmarkAbs(b *testing.B) { + x := 0.0 + for i := 0; i < b.N; i++ { + x = Abs(absPos) + } + GlobalF = x + +} + +func BenchmarkDim(b *testing.B) { + x := 0.0 + for i := 0; i < b.N; i++ { + x = Dim(GlobalF, x) + } + GlobalF = x +} + +func BenchmarkFloor(b *testing.B) { + x := 0.0 + for i := 0; i < b.N; i++ { + x = Floor(.5) + } + GlobalF = x +} + +func BenchmarkMax(b *testing.B) { + x := 0.0 + for i := 0; i < b.N; i++ { + x = Max(10, 3) + } + GlobalF = x +} + +func BenchmarkMin(b *testing.B) { + x := 0.0 + for i := 0; i < b.N; i++ { + x = Min(10, 3) + } + GlobalF = x +} + +func BenchmarkMod(b *testing.B) { + x := 0.0 + for i := 0; i < b.N; i++ { + x = Mod(10, 3) + } + GlobalF = x +} + +func BenchmarkFrexp(b *testing.B) { + x := 0.0 + y := 0 + for i := 0; i < b.N; i++ { + x, y = Frexp(8) + } + GlobalF = x + GlobalI = y +} + +func BenchmarkGamma(b *testing.B) { + x := 0.0 + for i := 0; i < b.N; i++ { + x = Gamma(2.5) + } + GlobalF = x +} + +func BenchmarkHypot(b *testing.B) { + x := 0.0 + for i := 0; i < b.N; i++ { + x = Hypot(3, 4) + } + GlobalF = x +} + +func BenchmarkHypotGo(b *testing.B) { + x := 0.0 + for i := 0; i < b.N; i++ { + x = HypotGo(3, 4) + } + GlobalF = x +} + +func BenchmarkIlogb(b *testing.B) { + x := 0 + for i := 0; i < b.N; i++ { + x = Ilogb(.5) + } + GlobalI = x +} + +func BenchmarkJ0(b *testing.B) { + x := 0.0 + for i := 0; i < b.N; i++ { + x = J0(2.5) + } + GlobalF = x +} + +func BenchmarkJ1(b *testing.B) { + x := 0.0 + for i := 0; i < b.N; i++ { + x = J1(2.5) + } + GlobalF = x +} + +func BenchmarkJn(b *testing.B) { + x := 0.0 + for i := 0; i < b.N; i++ { + x = Jn(2, 2.5) + } + GlobalF = x +} + +func BenchmarkLdexp(b *testing.B) { + x := 0.0 + for i := 0; i < b.N; i++ { + x = Ldexp(.5, 2) + } + GlobalF = x +} + +func BenchmarkLgamma(b *testing.B) { + x := 0.0 + y := 0 + for i := 0; i < b.N; i++ { + x, y = Lgamma(2.5) + } + GlobalF = x + GlobalI = y +} + +func BenchmarkLog(b *testing.B) { + x := 0.0 + for i := 0; i < b.N; i++ { + x = Log(.5) + } + GlobalF = x +} + +func BenchmarkLogb(b *testing.B) { + x := 0.0 + for i := 0; i < b.N; i++ { + x = Logb(.5) + } + GlobalF = x +} + +func BenchmarkLog1p(b *testing.B) { + x := 0.0 + for i := 0; i < b.N; i++ { + x = Log1p(.5) + } + GlobalF = x +} + +func BenchmarkLog10(b *testing.B) { + x := 0.0 + for i := 0; i < b.N; i++ { + x = Log10(.5) + } + GlobalF = x +} + +func BenchmarkLog2(b *testing.B) { + x := 0.0 + for i := 0; i < b.N; i++ { + x = Log2(.5) + } + GlobalF += x +} + +func BenchmarkModf(b *testing.B) { + x := 0.0 + y := 0.0 + for i := 0; i < b.N; i++ { + x, y = Modf(1.5) + } + GlobalF += x + GlobalF += y +} + +func BenchmarkNextafter32(b *testing.B) { + x := float32(0.0) + for i := 0; i < b.N; i++ { + x = Nextafter32(.5, 1) + } + GlobalF = float64(x) +} + +func BenchmarkNextafter64(b *testing.B) { + x := 0.0 + for i := 0; i < b.N; i++ { + x = Nextafter(.5, 1) + } + GlobalF = x +} + +func BenchmarkPowInt(b *testing.B) { + x := 0.0 + for i := 0; i < b.N; i++ { + x = Pow(2, 2) + } + GlobalF = x +} + +func BenchmarkPowFrac(b *testing.B) { + x := 0.0 + for i := 0; i < b.N; i++ { + x = Pow(2.5, 1.5) + } + GlobalF = x +} + +var pow10pos = int(300) + +func BenchmarkPow10Pos(b *testing.B) { + x := 0.0 + for i := 0; i < b.N; i++ { + x = Pow10(pow10pos) + } + GlobalF = x +} + +var pow10neg = int(-300) + +func BenchmarkPow10Neg(b *testing.B) { + x := 0.0 + for i := 0; i < b.N; i++ { + x = Pow10(pow10neg) + } + GlobalF = x +} + +var roundNeg = float64(-2.5) + +func BenchmarkRound(b *testing.B) { + x := 0.0 + for i := 0; i < b.N; i++ { + x = Round(roundNeg) + } + GlobalF = x +} + +func BenchmarkRoundToEven(b *testing.B) { + x := 0.0 + for i := 0; i < b.N; i++ { + x = RoundToEven(roundNeg) + } + GlobalF = x +} + +func BenchmarkRemainder(b *testing.B) { + x := 0.0 + for i := 0; i < b.N; i++ { + x = Remainder(10, 3) + } + GlobalF = x +} + +var signbitPos = 2.5 + +func BenchmarkSignbit(b *testing.B) { + x := false + for i := 0; i < b.N; i++ { + x = Signbit(signbitPos) + } + GlobalB = x +} + +func BenchmarkSin(b *testing.B) { + x := 0.0 + for i := 0; i < b.N; i++ { + x = Sin(.5) + } + GlobalF = x +} + +func BenchmarkSincos(b *testing.B) { + x := 0.0 + y := 0.0 + for i := 0; i < b.N; i++ { + x, y = Sincos(.5) + } + GlobalF += x + GlobalF += y +} + +func BenchmarkSinh(b *testing.B) { + x := 0.0 + for i := 0; i < b.N; i++ { + x = Sinh(2.5) + } + GlobalF = x +} + +func BenchmarkSqrtIndirect(b *testing.B) { + x, y := 0.0, 10.0 + f := Sqrt + for i := 0; i < b.N; i++ { + x += f(y) + } + GlobalF = x +} + +func BenchmarkSqrtLatency(b *testing.B) { + x := 10.0 + for i := 0; i < b.N; i++ { + x = Sqrt(x) + } + GlobalF = x +} + +func BenchmarkSqrtIndirectLatency(b *testing.B) { + x := 10.0 + f := Sqrt + for i := 0; i < b.N; i++ { + x = f(x) + } + GlobalF = x +} + +func BenchmarkSqrtGoLatency(b *testing.B) { + x := 10.0 + for i := 0; i < b.N; i++ { + x = SqrtGo(x) + } + GlobalF = x +} + +func isPrime(i int) bool { + // Yes, this is a dumb way to write this code, + // but calling Sqrt repeatedly in this way demonstrates + // the benefit of using a direct SQRT instruction on systems + // that have one, whereas the obvious loop seems not to + // demonstrate such a benefit. + for j := 2; float64(j) <= Sqrt(float64(i)); j++ { + if i%j == 0 { + return false + } + } + return true +} + +func BenchmarkSqrtPrime(b *testing.B) { + x := false + for i := 0; i < b.N; i++ { + x = isPrime(100003) + } + GlobalB = x +} + +func BenchmarkTan(b *testing.B) { + x := 0.0 + for i := 0; i < b.N; i++ { + x = Tan(.5) + } + GlobalF = x +} + +func BenchmarkTanh(b *testing.B) { + x := 0.0 + for i := 0; i < b.N; i++ { + x = Tanh(2.5) + } + GlobalF = x +} +func BenchmarkTrunc(b *testing.B) { + x := 0.0 + for i := 0; i < b.N; i++ { + x = Trunc(.5) + } + GlobalF = x +} + +func BenchmarkY0(b *testing.B) { + x := 0.0 + for i := 0; i < b.N; i++ { + x = Y0(2.5) + } + GlobalF = x +} + +func BenchmarkY1(b *testing.B) { + x := 0.0 + for i := 0; i < b.N; i++ { + x = Y1(2.5) + } + GlobalF = x +} + +func BenchmarkYn(b *testing.B) { + x := 0.0 + for i := 0; i < b.N; i++ { + x = Yn(2, 2.5) + } + GlobalF = x +} + +func BenchmarkFloat64bits(b *testing.B) { + y := uint64(0) + for i := 0; i < b.N; i++ { + y = Float64bits(roundNeg) + } + GlobalI = int(y) +} + +var roundUint64 = uint64(5) + +func BenchmarkFloat64frombits(b *testing.B) { + x := 0.0 + for i := 0; i < b.N; i++ { + x = Float64frombits(roundUint64) + } + GlobalF = x +} + +var roundFloat32 = float32(-2.5) + +func BenchmarkFloat32bits(b *testing.B) { + y := uint32(0) + for i := 0; i < b.N; i++ { + y = Float32bits(roundFloat32) + } + GlobalI = int(y) +} + +var roundUint32 = uint32(5) + +func BenchmarkFloat32frombits(b *testing.B) { + x := float32(0.0) + for i := 0; i < b.N; i++ { + x = Float32frombits(roundUint32) + } + GlobalF = float64(x) +} + +func BenchmarkFMA(b *testing.B) { + x := 0.0 + for i := 0; i < b.N; i++ { + x = FMA(E, Pi, x) + } + GlobalF = x +} diff --git a/src/math/arith_s390x.go b/src/math/arith_s390x.go new file mode 100644 index 0000000..129156a --- /dev/null +++ b/src/math/arith_s390x.go @@ -0,0 +1,170 @@ +// Copyright 2016 The Go Authors. All rights reserved. +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. + +package math + +import "internal/cpu" + +func expTrampolineSetup(x float64) float64 +func expAsm(x float64) float64 + +func logTrampolineSetup(x float64) float64 +func logAsm(x float64) float64 + +// Below here all functions are grouped in stubs.go for other +// architectures. + +const haveArchLog10 = true + +func archLog10(x float64) float64 +func log10TrampolineSetup(x float64) float64 +func log10Asm(x float64) float64 + +const haveArchCos = true + +func archCos(x float64) float64 +func cosTrampolineSetup(x float64) float64 +func cosAsm(x float64) float64 + +const haveArchCosh = true + +func archCosh(x float64) float64 +func coshTrampolineSetup(x float64) float64 +func coshAsm(x float64) float64 + +const haveArchSin = true + +func archSin(x float64) float64 +func sinTrampolineSetup(x float64) float64 +func sinAsm(x float64) float64 + +const haveArchSinh = true + +func archSinh(x float64) float64 +func sinhTrampolineSetup(x float64) float64 +func sinhAsm(x float64) float64 + +const haveArchTanh = true + +func archTanh(x float64) float64 +func tanhTrampolineSetup(x float64) float64 +func tanhAsm(x float64) float64 + +const haveArchLog1p = true + +func archLog1p(x float64) float64 +func log1pTrampolineSetup(x float64) float64 +func log1pAsm(x float64) float64 + +const haveArchAtanh = true + +func archAtanh(x float64) float64 +func atanhTrampolineSetup(x float64) float64 +func atanhAsm(x float64) float64 + +const haveArchAcos = true + +func archAcos(x float64) float64 +func acosTrampolineSetup(x float64) float64 +func acosAsm(x float64) float64 + +const haveArchAcosh = true + +func archAcosh(x float64) float64 +func acoshTrampolineSetup(x float64) float64 +func acoshAsm(x float64) float64 + +const haveArchAsin = true + +func archAsin(x float64) float64 +func asinTrampolineSetup(x float64) float64 +func asinAsm(x float64) float64 + +const haveArchAsinh = true + +func archAsinh(x float64) float64 +func asinhTrampolineSetup(x float64) float64 +func asinhAsm(x float64) float64 + +const haveArchErf = true + +func archErf(x float64) float64 +func erfTrampolineSetup(x float64) float64 +func erfAsm(x float64) float64 + +const haveArchErfc = true + +func archErfc(x float64) float64 +func erfcTrampolineSetup(x float64) float64 +func erfcAsm(x float64) float64 + +const haveArchAtan = true + +func archAtan(x float64) float64 +func atanTrampolineSetup(x float64) float64 +func atanAsm(x float64) float64 + +const haveArchAtan2 = true + +func archAtan2(y, x float64) float64 +func atan2TrampolineSetup(x, y float64) float64 +func atan2Asm(x, y float64) float64 + +const haveArchCbrt = true + +func archCbrt(x float64) float64 +func cbrtTrampolineSetup(x float64) float64 +func cbrtAsm(x float64) float64 + +const haveArchTan = true + +func archTan(x float64) float64 +func tanTrampolineSetup(x float64) float64 +func tanAsm(x float64) float64 + +const haveArchExpm1 = true + +func archExpm1(x float64) float64 +func expm1TrampolineSetup(x float64) float64 +func expm1Asm(x float64) float64 + +const haveArchPow = true + +func archPow(x, y float64) float64 +func powTrampolineSetup(x, y float64) float64 +func powAsm(x, y float64) float64 + +const haveArchFrexp = false + +func archFrexp(x float64) (float64, int) { + panic("not implemented") +} + +const haveArchLdexp = false + +func archLdexp(frac float64, exp int) float64 { + panic("not implemented") +} + +const haveArchLog2 = false + +func archLog2(x float64) float64 { + panic("not implemented") +} + +const haveArchMod = false + +func archMod(x, y float64) float64 { + panic("not implemented") +} + +const haveArchRemainder = false + +func archRemainder(x, y float64) float64 { + panic("not implemented") +} + +// hasVX reports whether the machine has the z/Architecture +// vector facility installed and enabled. +var hasVX = cpu.S390X.HasVX diff --git a/src/math/arith_s390x_test.go b/src/math/arith_s390x_test.go new file mode 100644 index 0000000..cfbc7b7 --- /dev/null +++ b/src/math/arith_s390x_test.go @@ -0,0 +1,442 @@ +// Copyright 2016 The Go Authors. All rights reserved. +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. + +// Tests whether the non vector routines are working, even when the tests are run on a +// vector-capable machine. +package math_test + +import ( + . "math" + "testing" +) + +func TestCosNovec(t *testing.T) { + if !HasVX { + t.Skipf("no vector support") + } + for i := 0; i < len(vf); i++ { + if f := CosNoVec(vf[i]); !veryclose(cos[i], f) { + t.Errorf("Cos(%g) = %g, want %g", vf[i], f, cos[i]) + } + } + for i := 0; i < len(vfcosSC); i++ { + if f := CosNoVec(vfcosSC[i]); !alike(cosSC[i], f) { + t.Errorf("Cos(%g) = %g, want %g", vfcosSC[i], f, cosSC[i]) + } + } +} + +func TestCoshNovec(t *testing.T) { + if !HasVX { + t.Skipf("no vector support") + } + for i := 0; i < len(vf); i++ { + if f := CoshNoVec(vf[i]); !close(cosh[i], f) { + t.Errorf("Cosh(%g) = %g, want %g", vf[i], f, cosh[i]) + } + } + for i := 0; i < len(vfcoshSC); i++ { + if f := CoshNoVec(vfcoshSC[i]); !alike(coshSC[i], f) { + t.Errorf("Cosh(%g) = %g, want %g", vfcoshSC[i], f, coshSC[i]) + } + } +} +func TestSinNovec(t *testing.T) { + if !HasVX { + t.Skipf("no vector support") + } + for i := 0; i < len(vf); i++ { + if f := SinNoVec(vf[i]); !veryclose(sin[i], f) { + t.Errorf("Sin(%g) = %g, want %g", vf[i], f, sin[i]) + } + } + for i := 0; i < len(vfsinSC); i++ { + if f := SinNoVec(vfsinSC[i]); !alike(sinSC[i], f) { + t.Errorf("Sin(%g) = %g, want %g", vfsinSC[i], f, sinSC[i]) + } + } +} + +func TestSinhNovec(t *testing.T) { + if !HasVX { + t.Skipf("no vector support") + } + for i := 0; i < len(vf); i++ { + if f := SinhNoVec(vf[i]); !close(sinh[i], f) { + t.Errorf("Sinh(%g) = %g, want %g", vf[i], f, sinh[i]) + } + } + for i := 0; i < len(vfsinhSC); i++ { + if f := SinhNoVec(vfsinhSC[i]); !alike(sinhSC[i], f) { + t.Errorf("Sinh(%g) = %g, want %g", vfsinhSC[i], f, sinhSC[i]) + } + } +} + +// Check that math functions of high angle values +// return accurate results. [Since (vf[i] + large) - large != vf[i], +// testing for Trig(vf[i] + large) == Trig(vf[i]), where large is +// a multiple of 2*Pi, is misleading.] +func TestLargeCosNovec(t *testing.T) { + if !HasVX { + t.Skipf("no vector support") + } + large := float64(100000 * Pi) + for i := 0; i < len(vf); i++ { + f1 := cosLarge[i] + f2 := CosNoVec(vf[i] + large) + if !close(f1, f2) { + t.Errorf("Cos(%g) = %g, want %g", vf[i]+large, f2, f1) + } + } +} + +func TestLargeSinNovec(t *testing.T) { + if !HasVX { + t.Skipf("no vector support") + } + large := float64(100000 * Pi) + for i := 0; i < len(vf); i++ { + f1 := sinLarge[i] + f2 := SinNoVec(vf[i] + large) + if !close(f1, f2) { + t.Errorf("Sin(%g) = %g, want %g", vf[i]+large, f2, f1) + } + } +} + +func TestLargeTanNovec(t *testing.T) { + if !HasVX { + t.Skipf("no vector support") + } + large := float64(100000 * Pi) + for i := 0; i < len(vf); i++ { + f1 := tanLarge[i] + f2 := TanNovec(vf[i] + large) + if !close(f1, f2) { + t.Errorf("Tan(%g) = %g, want %g", vf[i]+large, f2, f1) + } + } +} + +func TestTanNovec(t *testing.T) { + if !HasVX { + t.Skipf("no vector support") + } + for i := 0; i < len(vf); i++ { + if f := TanNovec(vf[i]); !veryclose(tan[i], f) { + t.Errorf("Tan(%g) = %g, want %g", vf[i], f, tan[i]) + } + } + // same special cases as Sin + for i := 0; i < len(vfsinSC); i++ { + if f := TanNovec(vfsinSC[i]); !alike(sinSC[i], f) { + t.Errorf("Tan(%g) = %g, want %g", vfsinSC[i], f, sinSC[i]) + } + } +} + +func TestTanhNovec(t *testing.T) { + if !HasVX { + t.Skipf("no vector support") + } + for i := 0; i < len(vf); i++ { + if f := TanhNoVec(vf[i]); !veryclose(tanh[i], f) { + t.Errorf("Tanh(%g) = %g, want %g", vf[i], f, tanh[i]) + } + } + for i := 0; i < len(vftanhSC); i++ { + if f := TanhNoVec(vftanhSC[i]); !alike(tanhSC[i], f) { + t.Errorf("Tanh(%g) = %g, want %g", vftanhSC[i], f, tanhSC[i]) + } + } + +} + +func TestLog10Novec(t *testing.T) { + if !HasVX { + t.Skipf("no vector support") + } + for i := 0; i < len(vf); i++ { + a := Abs(vf[i]) + if f := Log10NoVec(a); !veryclose(log10[i], f) { + t.Errorf("Log10(%g) = %g, want %g", a, f, log10[i]) + } + } + if f := Log10NoVec(E); f != Log10E { + t.Errorf("Log10(%g) = %g, want %g", E, f, Log10E) + } + for i := 0; i < len(vflogSC); i++ { + if f := Log10NoVec(vflogSC[i]); !alike(logSC[i], f) { + t.Errorf("Log10(%g) = %g, want %g", vflogSC[i], f, logSC[i]) + } + } +} + +func TestLog1pNovec(t *testing.T) { + if !HasVX { + t.Skipf("no vector support") + } + for i := 0; i < len(vf); i++ { + a := vf[i] / 100 + if f := Log1pNovec(a); !veryclose(log1p[i], f) { + t.Errorf("Log1p(%g) = %g, want %g", a, f, log1p[i]) + } + } + a := 9.0 + if f := Log1pNovec(a); f != Ln10 { + t.Errorf("Log1p(%g) = %g, want %g", a, f, Ln10) + } + for i := 0; i < len(vflogSC); i++ { + if f := Log1pNovec(vflog1pSC[i]); !alike(log1pSC[i], f) { + t.Errorf("Log1p(%g) = %g, want %g", vflog1pSC[i], f, log1pSC[i]) + } + } +} + +func TestAtanhNovec(t *testing.T) { + if !HasVX { + t.Skipf("no vector support") + } + for i := 0; i < len(vf); i++ { + a := vf[i] / 10 + if f := AtanhNovec(a); !veryclose(atanh[i], f) { + t.Errorf("Atanh(%g) = %g, want %g", a, f, atanh[i]) + } + } + for i := 0; i < len(vfatanhSC); i++ { + if f := AtanhNovec(vfatanhSC[i]); !alike(atanhSC[i], f) { + t.Errorf("Atanh(%g) = %g, want %g", vfatanhSC[i], f, atanhSC[i]) + } + } +} + +func TestAcosNovec(t *testing.T) { + if !HasVX { + t.Skipf("no vector support") + } + for i := 0; i < len(vf); i++ { + a := vf[i] / 10 + if f := AcosNovec(a); !close(acos[i], f) { + t.Errorf("Acos(%g) = %g, want %g", a, f, acos[i]) + } + } + for i := 0; i < len(vfacosSC); i++ { + if f := AcosNovec(vfacosSC[i]); !alike(acosSC[i], f) { + t.Errorf("Acos(%g) = %g, want %g", vfacosSC[i], f, acosSC[i]) + } + } +} + +func TestAsinNovec(t *testing.T) { + if !HasVX { + t.Skipf("no vector support") + } + for i := 0; i < len(vf); i++ { + a := vf[i] / 10 + if f := AsinNovec(a); !veryclose(asin[i], f) { + t.Errorf("Asin(%g) = %g, want %g", a, f, asin[i]) + } + } + for i := 0; i < len(vfasinSC); i++ { + if f := AsinNovec(vfasinSC[i]); !alike(asinSC[i], f) { + t.Errorf("Asin(%g) = %g, want %g", vfasinSC[i], f, asinSC[i]) + } + } +} + +func TestAcoshNovec(t *testing.T) { + if !HasVX { + t.Skipf("no vector support") + } + for i := 0; i < len(vf); i++ { + a := 1 + Abs(vf[i]) + if f := AcoshNovec(a); !veryclose(acosh[i], f) { + t.Errorf("Acosh(%g) = %g, want %g", a, f, acosh[i]) + } + } + for i := 0; i < len(vfacoshSC); i++ { + if f := AcoshNovec(vfacoshSC[i]); !alike(acoshSC[i], f) { + t.Errorf("Acosh(%g) = %g, want %g", vfacoshSC[i], f, acoshSC[i]) + } + } +} + +func TestAsinhNovec(t *testing.T) { + if !HasVX { + t.Skipf("no vector support") + } + for i := 0; i < len(vf); i++ { + if f := AsinhNovec(vf[i]); !veryclose(asinh[i], f) { + t.Errorf("Asinh(%g) = %g, want %g", vf[i], f, asinh[i]) + } + } + for i := 0; i < len(vfasinhSC); i++ { + if f := AsinhNovec(vfasinhSC[i]); !alike(asinhSC[i], f) { + t.Errorf("Asinh(%g) = %g, want %g", vfasinhSC[i], f, asinhSC[i]) + } + } +} + +func TestErfNovec(t *testing.T) { + if !HasVX { + t.Skipf("no vector support") + } + for i := 0; i < len(vf); i++ { + a := vf[i] / 10 + if f := ErfNovec(a); !veryclose(erf[i], f) { + t.Errorf("Erf(%g) = %g, want %g", a, f, erf[i]) + } + } + for i := 0; i < len(vferfSC); i++ { + if f := ErfNovec(vferfSC[i]); !alike(erfSC[i], f) { + t.Errorf("Erf(%g) = %g, want %g", vferfSC[i], f, erfSC[i]) + } + } +} + +func TestErfcNovec(t *testing.T) { + if !HasVX { + t.Skipf("no vector support") + } + for i := 0; i < len(vf); i++ { + a := vf[i] / 10 + if f := ErfcNovec(a); !veryclose(erfc[i], f) { + t.Errorf("Erfc(%g) = %g, want %g", a, f, erfc[i]) + } + } + for i := 0; i < len(vferfcSC); i++ { + if f := ErfcNovec(vferfcSC[i]); !alike(erfcSC[i], f) { + t.Errorf("Erfc(%g) = %g, want %g", vferfcSC[i], f, erfcSC[i]) + } + } +} + +func TestAtanNovec(t *testing.T) { + if !HasVX { + t.Skipf("no vector support") + } + for i := 0; i < len(vf); i++ { + if f := AtanNovec(vf[i]); !veryclose(atan[i], f) { + t.Errorf("Atan(%g) = %g, want %g", vf[i], f, atan[i]) + } + } + for i := 0; i < len(vfatanSC); i++ { + if f := AtanNovec(vfatanSC[i]); !alike(atanSC[i], f) { + t.Errorf("Atan(%g) = %g, want %g", vfatanSC[i], f, atanSC[i]) + } + } +} + +func TestAtan2Novec(t *testing.T) { + if !HasVX { + t.Skipf("no vector support") + } + for i := 0; i < len(vf); i++ { + if f := Atan2Novec(10, vf[i]); !veryclose(atan2[i], f) { + t.Errorf("Atan2(10, %g) = %g, want %g", vf[i], f, atan2[i]) + } + } + for i := 0; i < len(vfatan2SC); i++ { + if f := Atan2Novec(vfatan2SC[i][0], vfatan2SC[i][1]); !alike(atan2SC[i], f) { + t.Errorf("Atan2(%g, %g) = %g, want %g", vfatan2SC[i][0], vfatan2SC[i][1], f, atan2SC[i]) + } + } +} + +func TestCbrtNovec(t *testing.T) { + if !HasVX { + t.Skipf("no vector support") + } + for i := 0; i < len(vf); i++ { + if f := CbrtNovec(vf[i]); !veryclose(cbrt[i], f) { + t.Errorf("Cbrt(%g) = %g, want %g", vf[i], f, cbrt[i]) + } + } + for i := 0; i < len(vfcbrtSC); i++ { + if f := CbrtNovec(vfcbrtSC[i]); !alike(cbrtSC[i], f) { + t.Errorf("Cbrt(%g) = %g, want %g", vfcbrtSC[i], f, cbrtSC[i]) + } + } +} + +func TestLogNovec(t *testing.T) { + if !HasVX { + t.Skipf("no vector support") + } + for i := 0; i < len(vf); i++ { + a := Abs(vf[i]) + if f := LogNovec(a); log[i] != f { + t.Errorf("Log(%g) = %g, want %g", a, f, log[i]) + } + } + if f := LogNovec(10); f != Ln10 { + t.Errorf("Log(%g) = %g, want %g", 10.0, f, Ln10) + } + for i := 0; i < len(vflogSC); i++ { + if f := LogNovec(vflogSC[i]); !alike(logSC[i], f) { + t.Errorf("Log(%g) = %g, want %g", vflogSC[i], f, logSC[i]) + } + } +} + +func TestExpNovec(t *testing.T) { + if !HasVX { + t.Skipf("no vector support") + } + testExpNovec(t, Exp, "Exp") + testExpNovec(t, ExpGo, "ExpGo") +} + +func testExpNovec(t *testing.T, Exp func(float64) float64, name string) { + for i := 0; i < len(vf); i++ { + if f := ExpNovec(vf[i]); !veryclose(exp[i], f) { + t.Errorf("%s(%g) = %g, want %g", name, vf[i], f, exp[i]) + } + } + for i := 0; i < len(vfexpSC); i++ { + if f := ExpNovec(vfexpSC[i]); !alike(expSC[i], f) { + t.Errorf("%s(%g) = %g, want %g", name, vfexpSC[i], f, expSC[i]) + } + } +} + +func TestExpm1Novec(t *testing.T) { + if !HasVX { + t.Skipf("no vector support") + } + for i := 0; i < len(vf); i++ { + a := vf[i] / 100 + if f := Expm1Novec(a); !veryclose(expm1[i], f) { + t.Errorf("Expm1(%g) = %g, want %g", a, f, expm1[i]) + } + } + for i := 0; i < len(vf); i++ { + a := vf[i] * 10 + if f := Expm1Novec(a); !close(expm1Large[i], f) { + t.Errorf("Expm1(%g) = %g, want %g", a, f, expm1Large[i]) + } + } + for i := 0; i < len(vfexpm1SC); i++ { + if f := Expm1Novec(vfexpm1SC[i]); !alike(expm1SC[i], f) { + t.Errorf("Expm1(%g) = %g, want %g", vfexpm1SC[i], f, expm1SC[i]) + } + } +} + +func TestPowNovec(t *testing.T) { + if !HasVX { + t.Skipf("no vector support") + } + for i := 0; i < len(vf); i++ { + if f := PowNovec(10, vf[i]); !close(pow[i], f) { + t.Errorf("Pow(10, %g) = %g, want %g", vf[i], f, pow[i]) + } + } + for i := 0; i < len(vfpowSC); i++ { + if f := PowNovec(vfpowSC[i][0], vfpowSC[i][1]); !alike(powSC[i], f) { + t.Errorf("Pow(%g, %g) = %g, want %g", vfpowSC[i][0], vfpowSC[i][1], f, powSC[i]) + } + } +} diff --git a/src/math/asin.go b/src/math/asin.go new file mode 100644 index 0000000..989a741 --- /dev/null +++ b/src/math/asin.go @@ -0,0 +1,65 @@ +// Copyright 2009 The Go Authors. All rights reserved. +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. + +package math + +/* + Floating-point arcsine and arccosine. + + They are implemented by computing the arctangent + after appropriate range reduction. +*/ + +// Asin returns the arcsine, in radians, of x. +// +// Special cases are: +// Asin(±0) = ±0 +// Asin(x) = NaN if x < -1 or x > 1 +func Asin(x float64) float64 { + if haveArchAsin { + return archAsin(x) + } + return asin(x) +} + +func asin(x float64) float64 { + if x == 0 { + return x // special case + } + sign := false + if x < 0 { + x = -x + sign = true + } + if x > 1 { + return NaN() // special case + } + + temp := Sqrt(1 - x*x) + if x > 0.7 { + temp = Pi/2 - satan(temp/x) + } else { + temp = satan(x / temp) + } + + if sign { + temp = -temp + } + return temp +} + +// Acos returns the arccosine, in radians, of x. +// +// Special case is: +// Acos(x) = NaN if x < -1 or x > 1 +func Acos(x float64) float64 { + if haveArchAcos { + return archAcos(x) + } + return acos(x) +} + +func acos(x float64) float64 { + return Pi/2 - Asin(x) +} diff --git a/src/math/asin_s390x.s b/src/math/asin_s390x.s new file mode 100644 index 0000000..dc54d05 --- /dev/null +++ b/src/math/asin_s390x.s @@ -0,0 +1,162 @@ +// Copyright 2017 The Go Authors. All rights reserved. +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. + +#include "textflag.h" + +// Minimax polynomial coefficients and other constants +DATA ·asinrodataL15<> + 0(SB)/8, $-1.309611320495605469 +DATA ·asinrodataL15<> + 8(SB)/8, $0x3ff921fb54442d18 +DATA ·asinrodataL15<> + 16(SB)/8, $0xbff921fb54442d18 +DATA ·asinrodataL15<> + 24(SB)/8, $1.309611320495605469 +DATA ·asinrodataL15<> + 32(SB)/8, $-0.0 +DATA ·asinrodataL15<> + 40(SB)/8, $1.199437040755305217 +DATA ·asinrodataL15<> + 48(SB)/8, $0.166666666666651626E+00 +DATA ·asinrodataL15<> + 56(SB)/8, $0.750000000042621169E-01 +DATA ·asinrodataL15<> + 64(SB)/8, $0.446428567178116477E-01 +DATA ·asinrodataL15<> + 72(SB)/8, $0.303819660378071894E-01 +DATA ·asinrodataL15<> + 80(SB)/8, $0.223715011892010405E-01 +DATA ·asinrodataL15<> + 88(SB)/8, $0.173659424522364952E-01 +DATA ·asinrodataL15<> + 96(SB)/8, $0.137810186504372266E-01 +DATA ·asinrodataL15<> + 104(SB)/8, $0.134066870961173521E-01 +DATA ·asinrodataL15<> + 112(SB)/8, $-.412335502831898721E-02 +DATA ·asinrodataL15<> + 120(SB)/8, $0.867383739532082719E-01 +DATA ·asinrodataL15<> + 128(SB)/8, $-.328765950607171649E+00 +DATA ·asinrodataL15<> + 136(SB)/8, $0.110401073869414626E+01 +DATA ·asinrodataL15<> + 144(SB)/8, $-.270694366992537307E+01 +DATA ·asinrodataL15<> + 152(SB)/8, $0.500196500770928669E+01 +DATA ·asinrodataL15<> + 160(SB)/8, $-.665866959108585165E+01 +DATA ·asinrodataL15<> + 168(SB)/8, $-.344895269334086578E+01 +DATA ·asinrodataL15<> + 176(SB)/8, $0.927437952918301659E+00 +DATA ·asinrodataL15<> + 184(SB)/8, $0.610487478874645653E+01 +DATA ·asinrodataL15<> + 192(SB)/8, $0x7ff8000000000000 //+Inf +DATA ·asinrodataL15<> + 200(SB)/8, $-1.0 +DATA ·asinrodataL15<> + 208(SB)/8, $1.0 +DATA ·asinrodataL15<> + 216(SB)/8, $1.00000000000000000e-20 +GLOBL ·asinrodataL15<> + 0(SB), RODATA, $224 + +// Asin returns the arcsine, in radians, of the argument. +// +// Special cases are: +// Asin(±0) = ±0= +// Asin(x) = NaN if x < -1 or x > 1 +// The algorithm used is minimax polynomial approximation +// with coefficients determined with a Remez exchange algorithm. + +TEXT ·asinAsm(SB), NOSPLIT, $0-16 + FMOVD x+0(FP), F0 + MOVD $·asinrodataL15<>+0(SB), R9 + LGDR F0, R7 + FMOVD F0, F8 + SRAD $32, R7 + WORD $0xC0193FE6 //iilf %r1,1072079005 + BYTE $0xA0 + BYTE $0x9D + WORD $0xB91700C7 //llgtr %r12,%r7 + MOVW R12, R8 + MOVW R1, R6 + CMPBGT R8, R6, L2 + WORD $0xC0193BFF //iilf %r1,1006632959 + BYTE $0xFF + BYTE $0xFF + MOVW R1, R6 + CMPBGT R8, R6, L13 +L3: + FMOVD 216(R9), F0 + FMADD F0, F8, F8 +L1: + FMOVD F8, ret+8(FP) + RET +L2: + WORD $0xC0193FEF //iilf %r1,1072693247 + BYTE $0xFF + BYTE $0xFF + CMPW R12, R1 + BLE L14 +L5: + WORD $0xED0090D0 //cdb %f0,.L17-.L15(%r9) + BYTE $0x00 + BYTE $0x19 + BEQ L9 + WORD $0xED0090C8 //cdb %f0,.L18-.L15(%r9) + BYTE $0x00 + BYTE $0x19 + BEQ L10 + WFCEDBS V8, V8, V0 + BVS L1 + FMOVD 192(R9), F8 + BR L1 +L13: + WFMDB V0, V0, V10 +L4: + WFMDB V10, V10, V0 + FMOVD 184(R9), F6 + FMOVD 176(R9), F2 + FMOVD 168(R9), F4 + WFMADB V0, V2, V6, V2 + FMOVD 160(R9), F6 + WFMADB V0, V4, V6, V4 + FMOVD 152(R9), F6 + WFMADB V0, V2, V6, V2 + FMOVD 144(R9), F6 + WFMADB V0, V4, V6, V4 + FMOVD 136(R9), F6 + WFMADB V0, V2, V6, V2 + WORD $0xC0193FE6 //iilf %r1,1072079005 + BYTE $0xA0 + BYTE $0x9D + FMOVD 128(R9), F6 + WFMADB V0, V4, V6, V4 + FMOVD 120(R9), F6 + WFMADB V0, V2, V6, V2 + FMOVD 112(R9), F6 + WFMADB V0, V4, V6, V4 + FMOVD 104(R9), F6 + WFMADB V0, V2, V6, V2 + FMOVD 96(R9), F6 + WFMADB V0, V4, V6, V4 + FMOVD 88(R9), F6 + WFMADB V0, V2, V6, V2 + FMOVD 80(R9), F6 + WFMADB V0, V4, V6, V4 + FMOVD 72(R9), F6 + WFMADB V0, V2, V6, V2 + FMOVD 64(R9), F6 + WFMADB V0, V4, V6, V4 + FMOVD 56(R9), F6 + WFMADB V0, V2, V6, V2 + FMOVD 48(R9), F6 + WFMADB V0, V4, V6, V0 + WFMDB V8, V10, V4 + FMADD F2, F10, F0 + FMADD F0, F4, F8 + CMPW R12, R1 + BLE L1 + FMOVD 40(R9), F0 + FMADD F0, F1, F8 + FMOVD F8, ret+8(FP) + RET +L14: + FMOVD 200(R9), F0 + FMADD F8, F8, F0 + WORD $0xB31300A0 //lcdbr %f10,%f0 + WORD $0xED009020 //cdb %f0,.L39-.L15(%r9) + BYTE $0x00 + BYTE $0x19 + FSQRT F10, F8 +L6: + MOVW R7, R6 + CMPBLE R6, $0, L8 + WORD $0xB3130088 //lcdbr %f8,%f8 + FMOVD 24(R9), F1 + BR L4 +L10: + FMOVD 16(R9), F8 + BR L1 +L9: + FMOVD 8(R9), F8 + FMOVD F8, ret+8(FP) + RET +L8: + FMOVD 0(R9), F1 + BR L4 diff --git a/src/math/asinh.go b/src/math/asinh.go new file mode 100644 index 0000000..6dcb241 --- /dev/null +++ b/src/math/asinh.go @@ -0,0 +1,76 @@ +// Copyright 2010 The Go Authors. All rights reserved. +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. + +package math + +// The original C code, the long comment, and the constants +// below are from FreeBSD's /usr/src/lib/msun/src/s_asinh.c +// and came with this notice. The go code is a simplified +// version of the original C. +// +// ==================================================== +// Copyright (C) 1993 by Sun Microsystems, Inc. All rights reserved. +// +// Developed at SunPro, a Sun Microsystems, Inc. business. +// Permission to use, copy, modify, and distribute this +// software is freely granted, provided that this notice +// is preserved. +// ==================================================== +// +// +// asinh(x) +// Method : +// Based on +// asinh(x) = sign(x) * log [ |x| + sqrt(x*x+1) ] +// we have +// asinh(x) := x if 1+x*x=1, +// := sign(x)*(log(x)+ln2)) for large |x|, else +// := sign(x)*log(2|x|+1/(|x|+sqrt(x*x+1))) if|x|>2, else +// := sign(x)*log1p(|x| + x**2/(1 + sqrt(1+x**2))) +// + +// Asinh returns the inverse hyperbolic sine of x. +// +// Special cases are: +// Asinh(±0) = ±0 +// Asinh(±Inf) = ±Inf +// Asinh(NaN) = NaN +func Asinh(x float64) float64 { + if haveArchAsinh { + return archAsinh(x) + } + return asinh(x) +} + +func asinh(x float64) float64 { + const ( + Ln2 = 6.93147180559945286227e-01 // 0x3FE62E42FEFA39EF + NearZero = 1.0 / (1 << 28) // 2**-28 + Large = 1 << 28 // 2**28 + ) + // special cases + if IsNaN(x) || IsInf(x, 0) { + return x + } + sign := false + if x < 0 { + x = -x + sign = true + } + var temp float64 + switch { + case x > Large: + temp = Log(x) + Ln2 // |x| > 2**28 + case x > 2: + temp = Log(2*x + 1/(Sqrt(x*x+1)+x)) // 2**28 > |x| > 2.0 + case x < NearZero: + temp = x // |x| < 2**-28 + default: + temp = Log1p(x + x*x/(1+Sqrt(1+x*x))) // 2.0 > |x| > 2**-28 + } + if sign { + temp = -temp + } + return temp +} diff --git a/src/math/asinh_s390x.s b/src/math/asinh_s390x.s new file mode 100644 index 0000000..1bcf295 --- /dev/null +++ b/src/math/asinh_s390x.s @@ -0,0 +1,213 @@ +// Copyright 2017 The Go Authors. All rights reserved. +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. + +#include "textflag.h" + +// Minimax polynomial coefficients and other constants +DATA ·asinhrodataL18<> + 0(SB)/8, $0.749999999977387502E-01 +DATA ·asinhrodataL18<> + 8(SB)/8, $-.166666666666657082E+00 +DATA ·asinhrodataL18<> + 16(SB)/8, $0.303819368237360639E-01 +DATA ·asinhrodataL18<> + 24(SB)/8, $-.446428569571752982E-01 +DATA ·asinhrodataL18<> + 32(SB)/8, $0.173500047922695924E-01 +DATA ·asinhrodataL18<> + 40(SB)/8, $-.223719767210027185E-01 +DATA ·asinhrodataL18<> + 48(SB)/8, $0.113655037946822130E-01 +DATA ·asinhrodataL18<> + 56(SB)/8, $0.579747490622448943E-02 +DATA ·asinhrodataL18<> + 64(SB)/8, $-.139372433914359122E-01 +DATA ·asinhrodataL18<> + 72(SB)/8, $-.218674325255800840E-02 +DATA ·asinhrodataL18<> + 80(SB)/8, $-.891074277756961157E-02 +DATA ·asinhrodataL18<> + 88(SB)/8, $.41375273347623353626 +DATA ·asinhrodataL18<> + 96(SB)/8, $.51487302528619766235E+04 +DATA ·asinhrodataL18<> + 104(SB)/8, $-1.67526912689208984375 +DATA ·asinhrodataL18<> + 112(SB)/8, $0.181818181818181826E+00 +DATA ·asinhrodataL18<> + 120(SB)/8, $-.165289256198351540E-01 +DATA ·asinhrodataL18<> + 128(SB)/8, $0.200350613573012186E-02 +DATA ·asinhrodataL18<> + 136(SB)/8, $-.273205381970859341E-03 +DATA ·asinhrodataL18<> + 144(SB)/8, $0.397389654305194527E-04 +DATA ·asinhrodataL18<> + 152(SB)/8, $0.938370938292558173E-06 +DATA ·asinhrodataL18<> + 160(SB)/8, $0.212881813645679599E-07 +DATA ·asinhrodataL18<> + 168(SB)/8, $-.602107458843052029E-05 +DATA ·asinhrodataL18<> + 176(SB)/8, $-.148682720127920854E-06 +DATA ·asinhrodataL18<> + 184(SB)/8, $-5.5 +DATA ·asinhrodataL18<> + 192(SB)/8, $1.0 +DATA ·asinhrodataL18<> + 200(SB)/8, $1.0E-20 +GLOBL ·asinhrodataL18<> + 0(SB), RODATA, $208 + +// Table of log correction terms +DATA ·asinhtab2080<> + 0(SB)/8, $0.585235384085551248E-01 +DATA ·asinhtab2080<> + 8(SB)/8, $0.412206153771168640E-01 +DATA ·asinhtab2080<> + 16(SB)/8, $0.273839003221648339E-01 +DATA ·asinhtab2080<> + 24(SB)/8, $0.166383778368856480E-01 +DATA ·asinhtab2080<> + 32(SB)/8, $0.866678223433169637E-02 +DATA ·asinhtab2080<> + 40(SB)/8, $0.319831684989627514E-02 +DATA ·asinhtab2080<> + 48(SB)/8, $0.0 +DATA ·asinhtab2080<> + 56(SB)/8, $-.113006378583725549E-02 +DATA ·asinhtab2080<> + 64(SB)/8, $-.367979419636602491E-03 +DATA ·asinhtab2080<> + 72(SB)/8, $0.213172484510484979E-02 +DATA ·asinhtab2080<> + 80(SB)/8, $0.623271047682013536E-02 +DATA ·asinhtab2080<> + 88(SB)/8, $0.118140812789696885E-01 +DATA ·asinhtab2080<> + 96(SB)/8, $0.187681358930914206E-01 +DATA ·asinhtab2080<> + 104(SB)/8, $0.269985148668178992E-01 +DATA ·asinhtab2080<> + 112(SB)/8, $0.364186619761331328E-01 +DATA ·asinhtab2080<> + 120(SB)/8, $0.469505379381388441E-01 +GLOBL ·asinhtab2080<> + 0(SB), RODATA, $128 + +// Asinh returns the inverse hyperbolic sine of the argument. +// +// Special cases are: +// Asinh(±0) = ±0 +// Asinh(±Inf) = ±Inf +// Asinh(NaN) = NaN +// The algorithm used is minimax polynomial approximation +// with coefficients determined with a Remez exchange algorithm. + +TEXT ·asinhAsm(SB), NOSPLIT, $0-16 + FMOVD x+0(FP), F0 + MOVD $·asinhrodataL18<>+0(SB), R9 + LGDR F0, R12 + WORD $0xC0293FDF //iilf %r2,1071644671 + BYTE $0xFF + BYTE $0xFF + SRAD $32, R12 + WORD $0xB917001C //llgtr %r1,%r12 + MOVW R1, R6 + MOVW R2, R7 + CMPBLE R6, R7, L2 + WORD $0xC0295FEF //iilf %r2,1609564159 + BYTE $0xFF + BYTE $0xFF + MOVW R2, R7 + CMPBLE R6, R7, L14 +L3: + WORD $0xC0297FEF //iilf %r2,2146435071 + BYTE $0xFF + BYTE $0xFF + CMPW R1, R2 + BGT L1 + LTDBR F0, F0 + FMOVD F0, F10 + BLTU L15 +L9: + FMOVD $0, F0 + WFADB V0, V10, V0 + WORD $0xC0398006 //iilf %r3,2147909631 + BYTE $0x7F + BYTE $0xFF + LGDR F0, R5 + SRAD $32, R5 + MOVH $0x0, R2 + SUBW R5, R3 + FMOVD $0, F8 + RISBGZ $32, $47, $0, R3, R4 + BYTE $0x18 //lr %r1,%r4 + BYTE $0x14 + RISBGN $0, $31, $32, R4, R2 + SUBW $0x100000, R1 + SRAW $8, R1, R1 + ORW $0x45000000, R1 + BR L6 +L2: + MOVD $0x30000000, R2 + CMPW R1, R2 + BGT L16 + FMOVD 200(R9), F2 + FMADD F2, F0, F0 +L1: + FMOVD F0, ret+8(FP) + RET +L14: + LTDBR F0, F0 + BLTU L17 + FMOVD F0, F10 +L4: + FMOVD 192(R9), F2 + WFMADB V0, V0, V2, V0 + LTDBR F0, F0 + FSQRT F0, F8 +L5: + WFADB V8, V10, V0 + WORD $0xC0398006 //iilf %r3,2147909631 + BYTE $0x7F + BYTE $0xFF + LGDR F0, R5 + SRAD $32, R5 + MOVH $0x0, R2 + SUBW R5, R3 + RISBGZ $32, $47, $0, R3, R4 + SRAW $8, R4, R1 + RISBGN $0, $31, $32, R4, R2 + ORW $0x45000000, R1 +L6: + LDGR R2, F2 + FMOVD 184(R9), F0 + WFMADB V8, V2, V0, V8 + FMOVD 176(R9), F4 + WFMADB V10, V2, V8, V2 + FMOVD 168(R9), F0 + FMOVD 160(R9), F6 + FMOVD 152(R9), F1 + WFMADB V2, V6, V4, V6 + WFMADB V2, V1, V0, V1 + WFMDB V2, V2, V4 + FMOVD 144(R9), F0 + WFMADB V6, V4, V1, V6 + FMOVD 136(R9), F1 + RISBGZ $57, $60, $51, R3, R3 + WFMADB V2, V0, V1, V0 + FMOVD 128(R9), F1 + WFMADB V4, V6, V0, V6 + FMOVD 120(R9), F0 + WFMADB V2, V1, V0, V1 + VLVGF $0, R1, V0 + WFMADB V4, V6, V1, V4 + LDEBR F0, F0 + FMOVD 112(R9), F6 + WFMADB V2, V4, V6, V4 + MOVD $·asinhtab2080<>+0(SB), R1 + FMOVD 104(R9), F1 + WORD $0x68331000 //ld %f3,0(%r3,%r1) + FMOVD 96(R9), F6 + WFMADB V2, V4, V3, V2 + WFMADB V0, V1, V6, V0 + FMOVD 88(R9), F4 + WFMADB V0, V4, V2, V0 + MOVD R12, R6 + CMPBGT R6, $0, L1 + + WORD $0xB3130000 //lcdbr %f0,%f0 + FMOVD F0, ret+8(FP) + RET +L16: + WFMDB V0, V0, V1 + FMOVD 80(R9), F6 + WFMDB V1, V1, V4 + FMOVD 72(R9), F2 + WFMADB V4, V2, V6, V2 + FMOVD 64(R9), F3 + FMOVD 56(R9), F6 + WFMADB V4, V2, V3, V2 + FMOVD 48(R9), F3 + WFMADB V4, V6, V3, V6 + FMOVD 40(R9), F5 + FMOVD 32(R9), F3 + WFMADB V4, V2, V5, V2 + WFMADB V4, V6, V3, V6 + FMOVD 24(R9), F5 + FMOVD 16(R9), F3 + WFMADB V4, V2, V5, V2 + WFMADB V4, V6, V3, V6 + FMOVD 8(R9), F5 + FMOVD 0(R9), F3 + WFMADB V4, V2, V5, V2 + WFMADB V4, V6, V3, V4 + WFMDB V0, V1, V6 + WFMADB V1, V4, V2, V4 + FMADD F4, F6, F0 + FMOVD F0, ret+8(FP) + RET +L17: + WORD $0xB31300A0 //lcdbr %f10,%f0 + BR L4 +L15: + WORD $0xB31300A0 //lcdbr %f10,%f0 + BR L9 diff --git a/src/math/atan.go b/src/math/atan.go new file mode 100644 index 0000000..69af860 --- /dev/null +++ b/src/math/atan.go @@ -0,0 +1,110 @@ +// Copyright 2009 The Go Authors. All rights reserved. +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. + +package math + +/* + Floating-point arctangent. +*/ + +// The original C code, the long comment, and the constants below were +// from http://netlib.sandia.gov/cephes/cmath/atan.c, available from +// http://www.netlib.org/cephes/cmath.tgz. +// The go code is a version of the original C. +// +// atan.c +// Inverse circular tangent (arctangent) +// +// SYNOPSIS: +// double x, y, atan(); +// y = atan( x ); +// +// DESCRIPTION: +// Returns radian angle between -pi/2 and +pi/2 whose tangent is x. +// +// Range reduction is from three intervals into the interval from zero to 0.66. +// The approximant uses a rational function of degree 4/5 of the form +// x + x**3 P(x)/Q(x). +// +// ACCURACY: +// Relative error: +// arithmetic domain # trials peak rms +// DEC -10, 10 50000 2.4e-17 8.3e-18 +// IEEE -10, 10 10^6 1.8e-16 5.0e-17 +// +// Cephes Math Library Release 2.8: June, 2000 +// Copyright 1984, 1987, 1989, 1992, 2000 by Stephen L. Moshier +// +// The readme file at http://netlib.sandia.gov/cephes/ says: +// Some software in this archive may be from the book _Methods and +// Programs for Mathematical Functions_ (Prentice-Hall or Simon & Schuster +// International, 1989) or from the Cephes Mathematical Library, a +// commercial product. In either event, it is copyrighted by the author. +// What you see here may be used freely but it comes with no support or +// guarantee. +// +// The two known misprints in the book are repaired here in the +// source listings for the gamma function and the incomplete beta +// integral. +// +// Stephen L. Moshier +// moshier@na-net.ornl.gov + +// xatan evaluates a series valid in the range [0, 0.66]. +func xatan(x float64) float64 { + const ( + P0 = -8.750608600031904122785e-01 + P1 = -1.615753718733365076637e+01 + P2 = -7.500855792314704667340e+01 + P3 = -1.228866684490136173410e+02 + P4 = -6.485021904942025371773e+01 + Q0 = +2.485846490142306297962e+01 + Q1 = +1.650270098316988542046e+02 + Q2 = +4.328810604912902668951e+02 + Q3 = +4.853903996359136964868e+02 + Q4 = +1.945506571482613964425e+02 + ) + z := x * x + z = z * ((((P0*z+P1)*z+P2)*z+P3)*z + P4) / (((((z+Q0)*z+Q1)*z+Q2)*z+Q3)*z + Q4) + z = x*z + x + return z +} + +// satan reduces its argument (known to be positive) +// to the range [0, 0.66] and calls xatan. +func satan(x float64) float64 { + const ( + Morebits = 6.123233995736765886130e-17 // pi/2 = PIO2 + Morebits + Tan3pio8 = 2.41421356237309504880 // tan(3*pi/8) + ) + if x <= 0.66 { + return xatan(x) + } + if x > Tan3pio8 { + return Pi/2 - xatan(1/x) + Morebits + } + return Pi/4 + xatan((x-1)/(x+1)) + 0.5*Morebits +} + +// Atan returns the arctangent, in radians, of x. +// +// Special cases are: +// Atan(±0) = ±0 +// Atan(±Inf) = ±Pi/2 +func Atan(x float64) float64 { + if haveArchAtan { + return archAtan(x) + } + return atan(x) +} + +func atan(x float64) float64 { + if x == 0 { + return x + } + if x > 0 { + return satan(x) + } + return -satan(-x) +} diff --git a/src/math/atan2.go b/src/math/atan2.go new file mode 100644 index 0000000..11d7e81 --- /dev/null +++ b/src/math/atan2.go @@ -0,0 +1,76 @@ +// Copyright 2009 The Go Authors. All rights reserved. +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. + +package math + +// Atan2 returns the arc tangent of y/x, using +// the signs of the two to determine the quadrant +// of the return value. +// +// Special cases are (in order): +// Atan2(y, NaN) = NaN +// Atan2(NaN, x) = NaN +// Atan2(+0, x>=0) = +0 +// Atan2(-0, x>=0) = -0 +// Atan2(+0, x<=-0) = +Pi +// Atan2(-0, x<=-0) = -Pi +// Atan2(y>0, 0) = +Pi/2 +// Atan2(y<0, 0) = -Pi/2 +// Atan2(+Inf, +Inf) = +Pi/4 +// Atan2(-Inf, +Inf) = -Pi/4 +// Atan2(+Inf, -Inf) = 3Pi/4 +// Atan2(-Inf, -Inf) = -3Pi/4 +// Atan2(y, +Inf) = 0 +// Atan2(y>0, -Inf) = +Pi +// Atan2(y<0, -Inf) = -Pi +// Atan2(+Inf, x) = +Pi/2 +// Atan2(-Inf, x) = -Pi/2 +func Atan2(y, x float64) float64 { + if haveArchAtan2 { + return archAtan2(y, x) + } + return atan2(y, x) +} + +func atan2(y, x float64) float64 { + // special cases + switch { + case IsNaN(y) || IsNaN(x): + return NaN() + case y == 0: + if x >= 0 && !Signbit(x) { + return Copysign(0, y) + } + return Copysign(Pi, y) + case x == 0: + return Copysign(Pi/2, y) + case IsInf(x, 0): + if IsInf(x, 1) { + switch { + case IsInf(y, 0): + return Copysign(Pi/4, y) + default: + return Copysign(0, y) + } + } + switch { + case IsInf(y, 0): + return Copysign(3*Pi/4, y) + default: + return Copysign(Pi, y) + } + case IsInf(y, 0): + return Copysign(Pi/2, y) + } + + // Call atan and determine the quadrant. + q := Atan(y / x) + if x < 0 { + if q <= 0 { + return q + Pi + } + return q - Pi + } + return q +} diff --git a/src/math/atan2_s390x.s b/src/math/atan2_s390x.s new file mode 100644 index 0000000..587b89e --- /dev/null +++ b/src/math/atan2_s390x.s @@ -0,0 +1,297 @@ +// Copyright 2017 The Go Authors. All rights reserved. +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. + +#include "textflag.h" + +#define PosInf 0x7FF0000000000000 +#define NegInf 0xFFF0000000000000 +#define NegZero 0x8000000000000000 +#define Pi 0x400921FB54442D18 +#define NegPi 0xC00921FB54442D18 +#define Pi3Div4 0x4002D97C7F3321D2 // 3Pi/4 +#define NegPi3Div4 0xC002D97C7F3321D2 // -3Pi/4 +#define PiDiv4 0x3FE921FB54442D18 // Pi/4 +#define NegPiDiv4 0xBFE921FB54442D18 // -Pi/4 + +// Minimax polynomial coefficients and other constants +DATA ·atan2rodataL25<> + 0(SB)/8, $0.199999999999554423E+00 +DATA ·atan2rodataL25<> + 8(SB)/8, $-.333333333333330928E+00 +DATA ·atan2rodataL25<> + 16(SB)/8, $0.111111110136634272E+00 +DATA ·atan2rodataL25<> + 24(SB)/8, $-.142857142828026806E+00 +DATA ·atan2rodataL25<> + 32(SB)/8, $0.769228118888682505E-01 +DATA ·atan2rodataL25<> + 40(SB)/8, $0.588059263575587687E-01 +DATA ·atan2rodataL25<> + 48(SB)/8, $-.909090711945939878E-01 +DATA ·atan2rodataL25<> + 56(SB)/8, $-.666641501287528609E-01 +DATA ·atan2rodataL25<> + 64(SB)/8, $0.472329433805024762E-01 +DATA ·atan2rodataL25<> + 72(SB)/8, $-.525380587584426406E-01 +DATA ·atan2rodataL25<> + 80(SB)/8, $-.422172007412067035E-01 +DATA ·atan2rodataL25<> + 88(SB)/8, $0.366935664549587481E-01 +DATA ·atan2rodataL25<> + 96(SB)/8, $0.220852012160300086E-01 +DATA ·atan2rodataL25<> + 104(SB)/8, $-.299856214685512712E-01 +DATA ·atan2rodataL25<> + 112(SB)/8, $0.726338160757602439E-02 +DATA ·atan2rodataL25<> + 120(SB)/8, $0.134893651284712515E-04 +DATA ·atan2rodataL25<> + 128(SB)/8, $-.291935324869629616E-02 +DATA ·atan2rodataL25<> + 136(SB)/8, $-.154797890856877418E-03 +DATA ·atan2rodataL25<> + 144(SB)/8, $0.843488472994227321E-03 +DATA ·atan2rodataL25<> + 152(SB)/8, $-.139950258898989925E-01 +GLOBL ·atan2rodataL25<> + 0(SB), RODATA, $160 + +DATA ·atan2xpi2h<> + 0(SB)/8, $0x3ff330e4e4fa7b1b +DATA ·atan2xpi2h<> + 8(SB)/8, $0xbff330e4e4fa7b1b +DATA ·atan2xpi2h<> + 16(SB)/8, $0x400330e4e4fa7b1b +DATA ·atan2xpi2h<> + 24(SB)/8, $0xc00330e4e4fa7b1b +GLOBL ·atan2xpi2h<> + 0(SB), RODATA, $32 +DATA ·atan2xpim<> + 0(SB)/8, $0x3ff4f42b00000000 +GLOBL ·atan2xpim<> + 0(SB), RODATA, $8 + +// Atan2 returns the arc tangent of y/x, using +// the signs of the two to determine the quadrant +// of the return value. +// +// Special cases are (in order): +// Atan2(y, NaN) = NaN +// Atan2(NaN, x) = NaN +// Atan2(+0, x>=0) = +0 +// Atan2(-0, x>=0) = -0 +// Atan2(+0, x<=-0) = +Pi +// Atan2(-0, x<=-0) = -Pi +// Atan2(y>0, 0) = +Pi/2 +// Atan2(y<0, 0) = -Pi/2 +// Atan2(+Inf, +Inf) = +Pi/4 +// Atan2(-Inf, +Inf) = -Pi/4 +// Atan2(+Inf, -Inf) = 3Pi/4 +// Atan2(-Inf, -Inf) = -3Pi/4 +// Atan2(y, +Inf) = 0 +// Atan2(y>0, -Inf) = +Pi +// Atan2(y<0, -Inf) = -Pi +// Atan2(+Inf, x) = +Pi/2 +// Atan2(-Inf, x) = -Pi/2 +// The algorithm used is minimax polynomial approximation +// with coefficients determined with a Remez exchange algorithm. + +TEXT ·atan2Asm(SB), NOSPLIT, $0-24 + // special case + MOVD x+0(FP), R1 + MOVD y+8(FP), R2 + + // special case Atan2(NaN, y) = NaN + MOVD $~(1<<63), R5 + AND R1, R5 // x = |x| + MOVD $PosInf, R3 + CMPUBLT R3, R5, returnX + + // special case Atan2(x, NaN) = NaN + MOVD $~(1<<63), R5 + AND R2, R5 + CMPUBLT R3, R5, returnY + + MOVD $NegZero, R3 + CMPUBEQ R3, R1, xIsNegZero + + MOVD $0, R3 + CMPUBEQ R3, R1, xIsPosZero + + MOVD $PosInf, R4 + CMPUBEQ R4, R2, yIsPosInf + + MOVD $NegInf, R4 + CMPUBEQ R4, R2, yIsNegInf + BR Normal +xIsNegZero: + // special case Atan(-0, y>=0) = -0 + MOVD $0, R4 + CMPBLE R4, R2, returnX + + //special case Atan2(-0, y<=-0) = -Pi + MOVD $NegZero, R4 + CMPBGE R4, R2, returnNegPi + BR Normal +xIsPosZero: + //special case Atan2(0, 0) = 0 + MOVD $0, R4 + CMPUBEQ R4, R2, returnX + + //special case Atan2(0, y<=-0) = Pi + MOVD $NegZero, R4 + CMPBGE R4, R2, returnPi + BR Normal +yIsNegInf: + //special case Atan2(+Inf, -Inf) = 3Pi/4 + MOVD $PosInf, R3 + CMPUBEQ R3, R1, posInfNegInf + + //special case Atan2(-Inf, -Inf) = -3Pi/4 + MOVD $NegInf, R3 + CMPUBEQ R3, R1, negInfNegInf + BR Normal +yIsPosInf: + //special case Atan2(+Inf, +Inf) = Pi/4 + MOVD $PosInf, R3 + CMPUBEQ R3, R1, posInfPosInf + + //special case Atan2(-Inf, +Inf) = -Pi/4 + MOVD $NegInf, R3 + CMPUBEQ R3, R1, negInfPosInf + + //special case Atan2(x, +Inf) = Copysign(0, x) + CMPBLT R1, $0, returnNegZero + BR returnPosZero + +Normal: + FMOVD x+0(FP), F0 + FMOVD y+8(FP), F2 + MOVD $·atan2rodataL25<>+0(SB), R9 + LGDR F0, R2 + LGDR F2, R1 + RISBGNZ $32, $63, $32, R2, R2 + RISBGNZ $32, $63, $32, R1, R1 + WORD $0xB9170032 //llgtr %r3,%r2 + RISBGZ $63, $63, $33, R2, R5 + WORD $0xB9170041 //llgtr %r4,%r1 + WFLCDB V0, V20 + MOVW R4, R6 + MOVW R3, R7 + CMPUBLT R6, R7, L17 + WFDDB V2, V0, V3 + ADDW $2, R5, R2 + MOVW R4, R6 + MOVW R3, R7 + CMPUBLE R6, R7, L20 +L3: + WFMDB V3, V3, V4 + VLEG $0, 152(R9), V18 + VLEG $0, 144(R9), V16 + FMOVD 136(R9), F1 + FMOVD 128(R9), F5 + FMOVD 120(R9), F6 + WFMADB V4, V16, V5, V16 + WFMADB V4, V6, V1, V6 + FMOVD 112(R9), F7 + WFMDB V4, V4, V1 + WFMADB V4, V7, V18, V7 + VLEG $0, 104(R9), V18 + WFMADB V1, V6, V16, V6 + CMPWU R4, R3 + FMOVD 96(R9), F5 + VLEG $0, 88(R9), V16 + WFMADB V4, V5, V18, V5 + VLEG $0, 80(R9), V18 + VLEG $0, 72(R9), V22 + WFMADB V4, V16, V18, V16 + VLEG $0, 64(R9), V18 + WFMADB V1, V7, V5, V7 + WFMADB V4, V18, V22, V18 + WFMDB V1, V1, V5 + WFMADB V1, V16, V18, V16 + VLEG $0, 56(R9), V18 + WFMADB V5, V6, V7, V6 + VLEG $0, 48(R9), V22 + FMOVD 40(R9), F7 + WFMADB V4, V7, V18, V7 + VLEG $0, 32(R9), V18 + WFMADB V5, V6, V16, V6 + WFMADB V4, V18, V22, V18 + VLEG $0, 24(R9), V16 + WFMADB V1, V7, V18, V7 + VLEG $0, 16(R9), V18 + VLEG $0, 8(R9), V22 + WFMADB V4, V18, V16, V18 + VLEG $0, 0(R9), V16 + WFMADB V5, V6, V7, V6 + WFMADB V4, V16, V22, V16 + FMUL F3, F4 + WFMADB V1, V18, V16, V1 + FMADD F6, F5, F1 + WFMADB V4, V1, V3, V4 + BLT L18 + BGT L7 + LTDBR F2, F2 + BLTU L21 +L8: + LTDBR F0, F0 + BLTU L22 +L9: + WFCHDBS V2, V0, V0 + BNE L18 +L7: + MOVW R1, R6 + CMPBGE R6, $0, L1 +L18: + RISBGZ $58, $60, $3, R2, R2 + MOVD $·atan2xpi2h<>+0(SB), R1 + MOVD ·atan2xpim<>+0(SB), R3 + LDGR R3, F0 + WORD $0xED021000 //madb %f4,%f0,0(%r2,%r1) + BYTE $0x40 + BYTE $0x1E +L1: + FMOVD F4, ret+16(FP) + RET + +L20: + LTDBR F2, F2 + BLTU L23 + FMOVD F2, F6 +L4: + LTDBR F0, F0 + BLTU L24 + FMOVD F0, F4 +L5: + WFCHDBS V6, V4, V4 + BEQ L3 +L17: + WFDDB V0, V2, V4 + BYTE $0x18 //lr %r2,%r5 + BYTE $0x25 + WORD $0xB3130034 //lcdbr %f3,%f4 + BR L3 +L23: + WORD $0xB3130062 //lcdbr %f6,%f2 + BR L4 +L22: + VLR V20, V0 + BR L9 +L21: + WORD $0xB3130022 //lcdbr %f2,%f2 + BR L8 +L24: + VLR V20, V4 + BR L5 +returnX: //the result is same as the first argument + MOVD R1, ret+16(FP) + RET +returnY: //the result is same as the second argument + MOVD R2, ret+16(FP) + RET +returnPi: + MOVD $Pi, R1 + MOVD R1, ret+16(FP) + RET +returnNegPi: + MOVD $NegPi, R1 + MOVD R1, ret+16(FP) + RET +posInfNegInf: + MOVD $Pi3Div4, R1 + MOVD R1, ret+16(FP) + RET +negInfNegInf: + MOVD $NegPi3Div4, R1 + MOVD R1, ret+16(FP) + RET +posInfPosInf: + MOVD $PiDiv4, R1 + MOVD R1, ret+16(FP) + RET +negInfPosInf: + MOVD $NegPiDiv4, R1 + MOVD R1, ret+16(FP) + RET +returnNegZero: + MOVD $NegZero, R1 + MOVD R1, ret+16(FP) + RET +returnPosZero: + MOVD $0, ret+16(FP) + RET diff --git a/src/math/atan_s390x.s b/src/math/atan_s390x.s new file mode 100644 index 0000000..3a7e59b --- /dev/null +++ b/src/math/atan_s390x.s @@ -0,0 +1,128 @@ +// Copyright 2017 The Go Authors. All rights reserved. +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. + +#include "textflag.h" + +// Minimax polynomial coefficients and other constants +DATA ·atanrodataL8<> + 0(SB)/8, $0.199999999999554423E+00 +DATA ·atanrodataL8<> + 8(SB)/8, $0.111111110136634272E+00 +DATA ·atanrodataL8<> + 16(SB)/8, $-.142857142828026806E+00 +DATA ·atanrodataL8<> + 24(SB)/8, $-.333333333333330928E+00 +DATA ·atanrodataL8<> + 32(SB)/8, $0.769228118888682505E-01 +DATA ·atanrodataL8<> + 40(SB)/8, $0.588059263575587687E-01 +DATA ·atanrodataL8<> + 48(SB)/8, $-.666641501287528609E-01 +DATA ·atanrodataL8<> + 56(SB)/8, $-.909090711945939878E-01 +DATA ·atanrodataL8<> + 64(SB)/8, $0.472329433805024762E-01 +DATA ·atanrodataL8<> + 72(SB)/8, $0.366935664549587481E-01 +DATA ·atanrodataL8<> + 80(SB)/8, $-.422172007412067035E-01 +DATA ·atanrodataL8<> + 88(SB)/8, $-.299856214685512712E-01 +DATA ·atanrodataL8<> + 96(SB)/8, $0.220852012160300086E-01 +DATA ·atanrodataL8<> + 104(SB)/8, $0.726338160757602439E-02 +DATA ·atanrodataL8<> + 112(SB)/8, $0.843488472994227321E-03 +DATA ·atanrodataL8<> + 120(SB)/8, $0.134893651284712515E-04 +DATA ·atanrodataL8<> + 128(SB)/8, $-.525380587584426406E-01 +DATA ·atanrodataL8<> + 136(SB)/8, $-.139950258898989925E-01 +DATA ·atanrodataL8<> + 144(SB)/8, $-.291935324869629616E-02 +DATA ·atanrodataL8<> + 152(SB)/8, $-.154797890856877418E-03 +GLOBL ·atanrodataL8<> + 0(SB), RODATA, $160 + +DATA ·atanxpi2h<> + 0(SB)/8, $0x3ff330e4e4fa7b1b +DATA ·atanxpi2h<> + 8(SB)/8, $0xbff330e4e4fa7b1b +DATA ·atanxpi2h<> + 16(SB)/8, $0x400330e4e4fa7b1b +DATA ·atanxpi2h<> + 24(SB)/4, $0xc00330e4e4fa7b1b +GLOBL ·atanxpi2h<> + 0(SB), RODATA, $32 +DATA ·atanxpim<> + 0(SB)/8, $0x3ff4f42b00000000 +GLOBL ·atanxpim<> + 0(SB), RODATA, $8 +DATA ·atanxmone<> + 0(SB)/8, $-1.0 +GLOBL ·atanxmone<> + 0(SB), RODATA, $8 + +// Atan returns the arctangent, in radians, of the argument. +// +// Special cases are: +// Atan(±0) = ±0 +// Atan(±Inf) = ±Pi/2Pi +// The algorithm used is minimax polynomial approximation +// with coefficients determined with a Remez exchange algorithm. + +TEXT ·atanAsm(SB), NOSPLIT, $0-16 + FMOVD x+0(FP), F0 + //special case Atan(±0) = ±0 + FMOVD $(0.0), F1 + FCMPU F0, F1 + BEQ atanIsZero + + MOVD $·atanrodataL8<>+0(SB), R5 + MOVH $0x3FE0, R3 + LGDR F0, R1 + RISBGNZ $32, $63, $32, R1, R1 + RLL $16, R1, R2 + ANDW $0x7FF0, R2 + MOVW R2, R6 + MOVW R3, R7 + CMPUBLE R6, R7, L6 + MOVD $·atanxmone<>+0(SB), R3 + FMOVD 0(R3), F2 + WFDDB V0, V2, V0 + RISBGZ $63, $63, $33, R1, R1 + MOVD $·atanxpi2h<>+0(SB), R3 + MOVWZ R1, R1 + SLD $3, R1, R1 + WORD $0x68813000 //ld %f8,0(%r1,%r3) +L6: + WFMDB V0, V0, V2 + FMOVD 152(R5), F6 + FMOVD 144(R5), F1 + FMOVD 136(R5), F7 + VLEG $0, 128(R5), V16 + FMOVD 120(R5), F4 + FMOVD 112(R5), F5 + WFMADB V2, V4, V6, V4 + WFMADB V2, V5, V1, V5 + WFMDB V2, V2, V6 + FMOVD 104(R5), F3 + FMOVD 96(R5), F1 + WFMADB V2, V3, V7, V3 + MOVH $0x3FE0, R1 + FMOVD 88(R5), F7 + WFMADB V2, V1, V7, V1 + FMOVD 80(R5), F7 + WFMADB V6, V3, V1, V3 + WFMADB V6, V4, V5, V4 + WFMDB V6, V6, V1 + FMOVD 72(R5), F5 + WFMADB V2, V5, V7, V5 + FMOVD 64(R5), F7 + WFMADB V2, V7, V16, V7 + VLEG $0, 56(R5), V16 + WFMADB V6, V5, V7, V5 + WFMADB V1, V4, V3, V4 + FMOVD 48(R5), F7 + FMOVD 40(R5), F3 + WFMADB V2, V3, V7, V3 + FMOVD 32(R5), F7 + WFMADB V2, V7, V16, V7 + VLEG $0, 24(R5), V16 + WFMADB V1, V4, V5, V4 + FMOVD 16(R5), F5 + WFMADB V6, V3, V7, V3 + FMOVD 8(R5), F7 + WFMADB V2, V7, V5, V7 + FMOVD 0(R5), F5 + WFMADB V2, V5, V16, V5 + WFMADB V1, V4, V3, V4 + WFMADB V6, V7, V5, V6 + FMUL F0, F2 + FMADD F4, F1, F6 + FMADD F6, F2, F0 + MOVW R2, R6 + MOVW R1, R7 + CMPUBLE R6, R7, L1 + MOVD $·atanxpim<>+0(SB), R1 + WORD $0xED801000 //madb %f0,%f8,0(%r1) + BYTE $0x00 + BYTE $0x1E +L1: +atanIsZero: + FMOVD F0, ret+8(FP) + RET diff --git a/src/math/atanh.go b/src/math/atanh.go new file mode 100644 index 0000000..fe8bd6d --- /dev/null +++ b/src/math/atanh.go @@ -0,0 +1,84 @@ +// Copyright 2010 The Go Authors. All rights reserved. +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. + +package math + +// The original C code, the long comment, and the constants +// below are from FreeBSD's /usr/src/lib/msun/src/e_atanh.c +// and came with this notice. The go code is a simplified +// version of the original C. +// +// ==================================================== +// Copyright (C) 1993 by Sun Microsystems, Inc. All rights reserved. +// +// Developed at SunPro, a Sun Microsystems, Inc. business. +// Permission to use, copy, modify, and distribute this +// software is freely granted, provided that this notice +// is preserved. +// ==================================================== +// +// +// __ieee754_atanh(x) +// Method : +// 1. Reduce x to positive by atanh(-x) = -atanh(x) +// 2. For x>=0.5 +// 1 2x x +// atanh(x) = --- * log(1 + -------) = 0.5 * log1p(2 * --------) +// 2 1 - x 1 - x +// +// For x<0.5 +// atanh(x) = 0.5*log1p(2x+2x*x/(1-x)) +// +// Special cases: +// atanh(x) is NaN if |x| > 1 with signal; +// atanh(NaN) is that NaN with no signal; +// atanh(+-1) is +-INF with signal. +// + +// Atanh returns the inverse hyperbolic tangent of x. +// +// Special cases are: +// Atanh(1) = +Inf +// Atanh(±0) = ±0 +// Atanh(-1) = -Inf +// Atanh(x) = NaN if x < -1 or x > 1 +// Atanh(NaN) = NaN +func Atanh(x float64) float64 { + if haveArchAtanh { + return archAtanh(x) + } + return atanh(x) +} + +func atanh(x float64) float64 { + const NearZero = 1.0 / (1 << 28) // 2**-28 + // special cases + switch { + case x < -1 || x > 1 || IsNaN(x): + return NaN() + case x == 1: + return Inf(1) + case x == -1: + return Inf(-1) + } + sign := false + if x < 0 { + x = -x + sign = true + } + var temp float64 + switch { + case x < NearZero: + temp = x + case x < 0.5: + temp = x + x + temp = 0.5 * Log1p(temp+temp*x/(1-x)) + default: + temp = 0.5 * Log1p((x+x)/(1-x)) + } + if sign { + temp = -temp + } + return temp +} diff --git a/src/math/atanh_s390x.s b/src/math/atanh_s390x.s new file mode 100644 index 0000000..c4ec2b2 --- /dev/null +++ b/src/math/atanh_s390x.s @@ -0,0 +1,174 @@ +// Copyright 2017 The Go Authors. All rights reserved. +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. + +#include "textflag.h" + +// Minimax polynomial coefficients and other constants +DATA ·atanhrodataL10<> + 0(SB)/8, $.41375273347623353626 +DATA ·atanhrodataL10<> + 8(SB)/8, $.51487302528619766235E+04 +DATA ·atanhrodataL10<> + 16(SB)/8, $-1.67526912689208984375 +DATA ·atanhrodataL10<> + 24(SB)/8, $0.181818181818181826E+00 +DATA ·atanhrodataL10<> + 32(SB)/8, $-.165289256198351540E-01 +DATA ·atanhrodataL10<> + 40(SB)/8, $0.200350613573012186E-02 +DATA ·atanhrodataL10<> + 48(SB)/8, $0.397389654305194527E-04 +DATA ·atanhrodataL10<> + 56(SB)/8, $-.273205381970859341E-03 +DATA ·atanhrodataL10<> + 64(SB)/8, $0.938370938292558173E-06 +DATA ·atanhrodataL10<> + 72(SB)/8, $-.148682720127920854E-06 +DATA ·atanhrodataL10<> + 80(SB)/8, $ 0.212881813645679599E-07 +DATA ·atanhrodataL10<> + 88(SB)/8, $-.602107458843052029E-05 +DATA ·atanhrodataL10<> + 96(SB)/8, $-5.5 +DATA ·atanhrodataL10<> + 104(SB)/8, $-0.5 +DATA ·atanhrodataL10<> + 112(SB)/8, $0.0 +DATA ·atanhrodataL10<> + 120(SB)/8, $0x7ff8000000000000 //Nan +DATA ·atanhrodataL10<> + 128(SB)/8, $-1.0 +DATA ·atanhrodataL10<> + 136(SB)/8, $1.0 +DATA ·atanhrodataL10<> + 144(SB)/8, $1.0E-20 +GLOBL ·atanhrodataL10<> + 0(SB), RODATA, $152 + +// Table of log correction terms +DATA ·atanhtab2076<> + 0(SB)/8, $0.585235384085551248E-01 +DATA ·atanhtab2076<> + 8(SB)/8, $0.412206153771168640E-01 +DATA ·atanhtab2076<> + 16(SB)/8, $0.273839003221648339E-01 +DATA ·atanhtab2076<> + 24(SB)/8, $0.166383778368856480E-01 +DATA ·atanhtab2076<> + 32(SB)/8, $0.866678223433169637E-02 +DATA ·atanhtab2076<> + 40(SB)/8, $0.319831684989627514E-02 +DATA ·atanhtab2076<> + 48(SB)/8, $0.000000000000000000E+00 +DATA ·atanhtab2076<> + 56(SB)/8, $-.113006378583725549E-02 +DATA ·atanhtab2076<> + 64(SB)/8, $-.367979419636602491E-03 +DATA ·atanhtab2076<> + 72(SB)/8, $0.213172484510484979E-02 +DATA ·atanhtab2076<> + 80(SB)/8, $0.623271047682013536E-02 +DATA ·atanhtab2076<> + 88(SB)/8, $0.118140812789696885E-01 +DATA ·atanhtab2076<> + 96(SB)/8, $0.187681358930914206E-01 +DATA ·atanhtab2076<> + 104(SB)/8, $0.269985148668178992E-01 +DATA ·atanhtab2076<> + 112(SB)/8, $0.364186619761331328E-01 +DATA ·atanhtab2076<> + 120(SB)/8, $0.469505379381388441E-01 +GLOBL ·atanhtab2076<> + 0(SB), RODATA, $128 + +// Table of +/- .5 +DATA ·atanhtabh2075<> + 0(SB)/8, $0.5 +DATA ·atanhtabh2075<> + 8(SB)/8, $-.5 +GLOBL ·atanhtabh2075<> + 0(SB), RODATA, $16 + +// Atanh returns the inverse hyperbolic tangent of the argument. +// +// Special cases are: +// Atanh(1) = +Inf +// Atanh(±0) = ±0 +// Atanh(-1) = -Inf +// Atanh(x) = NaN if x < -1 or x > 1 +// Atanh(NaN) = NaN +// The algorithm used is minimax polynomial approximation +// with coefficients determined with a Remez exchange algorithm. + +TEXT ·atanhAsm(SB), NOSPLIT, $0-16 + FMOVD x+0(FP), F0 + MOVD $·atanhrodataL10<>+0(SB), R5 + LGDR F0, R1 + WORD $0xC0393FEF //iilf %r3,1072693247 + BYTE $0xFF + BYTE $0xFF + SRAD $32, R1 + WORD $0xB9170021 //llgtr %r2,%r1 + MOVW R2, R6 + MOVW R3, R7 + CMPBGT R6, R7, L2 + WORD $0xC0392FFF //iilf %r3,805306367 + BYTE $0xFF + BYTE $0xFF + MOVW R2, R6 + MOVW R3, R7 + CMPBGT R6, R7, L9 +L3: + FMOVD 144(R5), F2 + FMADD F2, F0, F0 +L1: + FMOVD F0, ret+8(FP) + RET + +L2: + WORD $0xED005088 //cdb %f0,.L12-.L10(%r5) + BYTE $0x00 + BYTE $0x19 + BEQ L5 + WORD $0xED005080 //cdb %f0,.L13-.L10(%r5) + BYTE $0x00 + BYTE $0x19 + BEQ L5 + WFCEDBS V0, V0, V2 + BVS L1 + FMOVD 120(R5), F0 + BR L1 +L5: + WORD $0xED005070 //ddb %f0,.L15-.L10(%r5) + BYTE $0x00 + BYTE $0x1D + FMOVD F0, ret+8(FP) + RET + +L9: + FMOVD F0, F2 + MOVD $·atanhtabh2075<>+0(SB), R2 + SRW $31, R1, R1 + FMOVD 104(R5), F4 + MOVW R1, R1 + SLD $3, R1, R1 + WORD $0x68012000 //ld %f0,0(%r1,%r2) + WFMADB V2, V4, V0, V4 + VLEG $0, 96(R5), V16 + FDIV F4, F2 + WORD $0xC0298006 //iilf %r2,2147909631 + BYTE $0x7F + BYTE $0xFF + FMOVD 88(R5), F6 + FMOVD 80(R5), F1 + FMOVD 72(R5), F7 + FMOVD 64(R5), F5 + FMOVD F2, F4 + WORD $0xED405088 //adb %f4,.L12-.L10(%r5) + BYTE $0x00 + BYTE $0x1A + LGDR F4, R4 + SRAD $32, R4 + FMOVD F4, F3 + WORD $0xED305088 //sdb %f3,.L12-.L10(%r5) + BYTE $0x00 + BYTE $0x1B + SUBW R4, R2 + WFSDB V3, V2, V3 + RISBGZ $32, $47, $0, R2, R1 + SLD $32, R1, R1 + LDGR R1, F2 + WFMADB V4, V2, V16, V4 + SRAW $8, R2, R1 + WFMADB V4, V5, V6, V5 + WFMDB V4, V4, V6 + WFMADB V4, V1, V7, V1 + WFMADB V2, V3, V4, V2 + WFMADB V1, V6, V5, V1 + FMOVD 56(R5), F3 + FMOVD 48(R5), F5 + WFMADB V4, V5, V3, V4 + FMOVD 40(R5), F3 + FMADD F1, F6, F4 + FMOVD 32(R5), F1 + FMADD F3, F2, F1 + ANDW $0xFFFFFF00, R1 + WFMADB V6, V4, V1, V6 + FMOVD 24(R5), F3 + ORW $0x45000000, R1 + WFMADB V2, V6, V3, V6 + VLVGF $0, R1, V4 + LDEBR F4, F4 + RISBGZ $57, $60, $51, R2, R2 + MOVD $·atanhtab2076<>+0(SB), R1 + FMOVD 16(R5), F3 + WORD $0x68521000 //ld %f5,0(%r2,%r1) + FMOVD 8(R5), F1 + WFMADB V2, V6, V5, V2 + WFMADB V4, V3, V1, V4 + FMOVD 0(R5), F6 + FMADD F6, F4, F2 + FMUL F2, F0 + FMOVD F0, ret+8(FP) + RET diff --git a/src/math/big/accuracy_string.go b/src/math/big/accuracy_string.go new file mode 100644 index 0000000..1501ace --- /dev/null +++ b/src/math/big/accuracy_string.go @@ -0,0 +1,17 @@ +// Code generated by "stringer -type=Accuracy"; DO NOT EDIT. + +package big + +import "strconv" + +const _Accuracy_name = "BelowExactAbove" + +var _Accuracy_index = [...]uint8{0, 5, 10, 15} + +func (i Accuracy) String() string { + i -= -1 + if i < 0 || i >= Accuracy(len(_Accuracy_index)-1) { + return "Accuracy(" + strconv.FormatInt(int64(i+-1), 10) + ")" + } + return _Accuracy_name[_Accuracy_index[i]:_Accuracy_index[i+1]] +} diff --git a/src/math/big/arith.go b/src/math/big/arith.go new file mode 100644 index 0000000..8f55c19 --- /dev/null +++ b/src/math/big/arith.go @@ -0,0 +1,277 @@ +// Copyright 2009 The Go Authors. All rights reserved. +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. + +// This file provides Go implementations of elementary multi-precision +// arithmetic operations on word vectors. These have the suffix _g. +// These are needed for platforms without assembly implementations of these routines. +// This file also contains elementary operations that can be implemented +// sufficiently efficiently in Go. + +package big + +import "math/bits" + +// A Word represents a single digit of a multi-precision unsigned integer. +type Word uint + +const ( + _S = _W / 8 // word size in bytes + + _W = bits.UintSize // word size in bits + _B = 1 << _W // digit base + _M = _B - 1 // digit mask +) + +// Many of the loops in this file are of the form +// for i := 0; i < len(z) && i < len(x) && i < len(y); i++ +// i < len(z) is the real condition. +// However, checking i < len(x) && i < len(y) as well is faster than +// having the compiler do a bounds check in the body of the loop; +// remarkably it is even faster than hoisting the bounds check +// out of the loop, by doing something like +// _, _ = x[len(z)-1], y[len(z)-1] +// There are other ways to hoist the bounds check out of the loop, +// but the compiler's BCE isn't powerful enough for them (yet?). +// See the discussion in CL 164966. + +// ---------------------------------------------------------------------------- +// Elementary operations on words +// +// These operations are used by the vector operations below. + +// z1<<_W + z0 = x*y +func mulWW_g(x, y Word) (z1, z0 Word) { + hi, lo := bits.Mul(uint(x), uint(y)) + return Word(hi), Word(lo) +} + +// z1<<_W + z0 = x*y + c +func mulAddWWW_g(x, y, c Word) (z1, z0 Word) { + hi, lo := bits.Mul(uint(x), uint(y)) + var cc uint + lo, cc = bits.Add(lo, uint(c), 0) + return Word(hi + cc), Word(lo) +} + +// nlz returns the number of leading zeros in x. +// Wraps bits.LeadingZeros call for convenience. +func nlz(x Word) uint { + return uint(bits.LeadingZeros(uint(x))) +} + +// The resulting carry c is either 0 or 1. +func addVV_g(z, x, y []Word) (c Word) { + // The comment near the top of this file discusses this for loop condition. + for i := 0; i < len(z) && i < len(x) && i < len(y); i++ { + zi, cc := bits.Add(uint(x[i]), uint(y[i]), uint(c)) + z[i] = Word(zi) + c = Word(cc) + } + return +} + +// The resulting carry c is either 0 or 1. +func subVV_g(z, x, y []Word) (c Word) { + // The comment near the top of this file discusses this for loop condition. + for i := 0; i < len(z) && i < len(x) && i < len(y); i++ { + zi, cc := bits.Sub(uint(x[i]), uint(y[i]), uint(c)) + z[i] = Word(zi) + c = Word(cc) + } + return +} + +// The resulting carry c is either 0 or 1. +func addVW_g(z, x []Word, y Word) (c Word) { + c = y + // The comment near the top of this file discusses this for loop condition. + for i := 0; i < len(z) && i < len(x); i++ { + zi, cc := bits.Add(uint(x[i]), uint(c), 0) + z[i] = Word(zi) + c = Word(cc) + } + return +} + +// addVWlarge is addVW, but intended for large z. +// The only difference is that we check on every iteration +// whether we are done with carries, +// and if so, switch to a much faster copy instead. +// This is only a good idea for large z, +// because the overhead of the check and the function call +// outweigh the benefits when z is small. +func addVWlarge(z, x []Word, y Word) (c Word) { + c = y + // The comment near the top of this file discusses this for loop condition. + for i := 0; i < len(z) && i < len(x); i++ { + if c == 0 { + copy(z[i:], x[i:]) + return + } + zi, cc := bits.Add(uint(x[i]), uint(c), 0) + z[i] = Word(zi) + c = Word(cc) + } + return +} + +func subVW_g(z, x []Word, y Word) (c Word) { + c = y + // The comment near the top of this file discusses this for loop condition. + for i := 0; i < len(z) && i < len(x); i++ { + zi, cc := bits.Sub(uint(x[i]), uint(c), 0) + z[i] = Word(zi) + c = Word(cc) + } + return +} + +// subVWlarge is to subVW as addVWlarge is to addVW. +func subVWlarge(z, x []Word, y Word) (c Word) { + c = y + // The comment near the top of this file discusses this for loop condition. + for i := 0; i < len(z) && i < len(x); i++ { + if c == 0 { + copy(z[i:], x[i:]) + return + } + zi, cc := bits.Sub(uint(x[i]), uint(c), 0) + z[i] = Word(zi) + c = Word(cc) + } + return +} + +func shlVU_g(z, x []Word, s uint) (c Word) { + if s == 0 { + copy(z, x) + return + } + if len(z) == 0 { + return + } + s &= _W - 1 // hint to the compiler that shifts by s don't need guard code + ŝ := _W - s + ŝ &= _W - 1 // ditto + c = x[len(z)-1] >> ŝ + for i := len(z) - 1; i > 0; i-- { + z[i] = x[i]<<s | x[i-1]>>ŝ + } + z[0] = x[0] << s + return +} + +func shrVU_g(z, x []Word, s uint) (c Word) { + if s == 0 { + copy(z, x) + return + } + if len(z) == 0 { + return + } + if len(x) != len(z) { + // This is an invariant guaranteed by the caller. + panic("len(x) != len(z)") + } + s &= _W - 1 // hint to the compiler that shifts by s don't need guard code + ŝ := _W - s + ŝ &= _W - 1 // ditto + c = x[0] << ŝ + for i := 1; i < len(z); i++ { + z[i-1] = x[i-1]>>s | x[i]<<ŝ + } + z[len(z)-1] = x[len(z)-1] >> s + return +} + +func mulAddVWW_g(z, x []Word, y, r Word) (c Word) { + c = r + // The comment near the top of this file discusses this for loop condition. + for i := 0; i < len(z) && i < len(x); i++ { + c, z[i] = mulAddWWW_g(x[i], y, c) + } + return +} + +func addMulVVW_g(z, x []Word, y Word) (c Word) { + // The comment near the top of this file discusses this for loop condition. + for i := 0; i < len(z) && i < len(x); i++ { + z1, z0 := mulAddWWW_g(x[i], y, z[i]) + lo, cc := bits.Add(uint(z0), uint(c), 0) + c, z[i] = Word(cc), Word(lo) + c += z1 + } + return +} + +// q = ( x1 << _W + x0 - r)/y. m = floor(( _B^2 - 1 ) / d - _B). Requiring x1<y. +// An approximate reciprocal with a reference to "Improved Division by Invariant Integers +// (IEEE Transactions on Computers, 11 Jun. 2010)" +func divWW(x1, x0, y, m Word) (q, r Word) { + s := nlz(y) + if s != 0 { + x1 = x1<<s | x0>>(_W-s) + x0 <<= s + y <<= s + } + d := uint(y) + // We know that + // m = ⎣(B^2-1)/d⎦-B + // ⎣(B^2-1)/d⎦ = m+B + // (B^2-1)/d = m+B+delta1 0 <= delta1 <= (d-1)/d + // B^2/d = m+B+delta2 0 <= delta2 <= 1 + // The quotient we're trying to compute is + // quotient = ⎣(x1*B+x0)/d⎦ + // = ⎣(x1*B*(B^2/d)+x0*(B^2/d))/B^2⎦ + // = ⎣(x1*B*(m+B+delta2)+x0*(m+B+delta2))/B^2⎦ + // = ⎣(x1*m+x1*B+x0)/B + x0*m/B^2 + delta2*(x1*B+x0)/B^2⎦ + // The latter two terms of this three-term sum are between 0 and 1. + // So we can compute just the first term, and we will be low by at most 2. + t1, t0 := bits.Mul(uint(m), uint(x1)) + _, c := bits.Add(t0, uint(x0), 0) + t1, _ = bits.Add(t1, uint(x1), c) + // The quotient is either t1, t1+1, or t1+2. + // We'll try t1 and adjust if needed. + qq := t1 + // compute remainder r=x-d*q. + dq1, dq0 := bits.Mul(d, qq) + r0, b := bits.Sub(uint(x0), dq0, 0) + r1, _ := bits.Sub(uint(x1), dq1, b) + // The remainder we just computed is bounded above by B+d: + // r = x1*B + x0 - d*q. + // = x1*B + x0 - d*⎣(x1*m+x1*B+x0)/B⎦ + // = x1*B + x0 - d*((x1*m+x1*B+x0)/B-alpha) 0 <= alpha < 1 + // = x1*B + x0 - x1*d/B*m - x1*d - x0*d/B + d*alpha + // = x1*B + x0 - x1*d/B*⎣(B^2-1)/d-B⎦ - x1*d - x0*d/B + d*alpha + // = x1*B + x0 - x1*d/B*⎣(B^2-1)/d-B⎦ - x1*d - x0*d/B + d*alpha + // = x1*B + x0 - x1*d/B*((B^2-1)/d-B-beta) - x1*d - x0*d/B + d*alpha 0 <= beta < 1 + // = x1*B + x0 - x1*B + x1/B + x1*d + x1*d/B*beta - x1*d - x0*d/B + d*alpha + // = x0 + x1/B + x1*d/B*beta - x0*d/B + d*alpha + // = x0*(1-d/B) + x1*(1+d*beta)/B + d*alpha + // < B*(1-d/B) + d*B/B + d because x0<B (and 1-d/B>0), x1<d, 1+d*beta<=B, alpha<1 + // = B - d + d + d + // = B+d + // So r1 can only be 0 or 1. If r1 is 1, then we know q was too small. + // Add 1 to q and subtract d from r. That guarantees that r is <B, so + // we no longer need to keep track of r1. + if r1 != 0 { + qq++ + r0 -= d + } + // If the remainder is still too large, increment q one more time. + if r0 >= d { + qq++ + r0 -= d + } + return Word(qq), Word(r0 >> s) +} + +// reciprocalWord return the reciprocal of the divisor. rec = floor(( _B^2 - 1 ) / u - _B). u = d1 << nlz(d1). +func reciprocalWord(d1 Word) Word { + u := uint(d1 << nlz(d1)) + x1 := ^u + x0 := uint(_M) + rec, _ := bits.Div(x1, x0, u) // (_B^2-1)/U-_B = (_B*(_M-C)+_M)/U + return Word(rec) +} diff --git a/src/math/big/arith_386.s b/src/math/big/arith_386.s new file mode 100644 index 0000000..acf2b06 --- /dev/null +++ b/src/math/big/arith_386.s @@ -0,0 +1,245 @@ +// Copyright 2009 The Go Authors. All rights reserved. +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. + +//go:build !math_big_pure_go +// +build !math_big_pure_go + +#include "textflag.h" + +// This file provides fast assembly versions for the elementary +// arithmetic operations on vectors implemented in arith.go. + +// func mulWW(x, y Word) (z1, z0 Word) +TEXT ·mulWW(SB),NOSPLIT,$0 + MOVL x+0(FP), AX + MULL y+4(FP) + MOVL DX, z1+8(FP) + MOVL AX, z0+12(FP) + RET + + +// func addVV(z, x, y []Word) (c Word) +TEXT ·addVV(SB),NOSPLIT,$0 + MOVL z+0(FP), DI + MOVL x+12(FP), SI + MOVL y+24(FP), CX + MOVL z_len+4(FP), BP + MOVL $0, BX // i = 0 + MOVL $0, DX // c = 0 + JMP E1 + +L1: MOVL (SI)(BX*4), AX + ADDL DX, DX // restore CF + ADCL (CX)(BX*4), AX + SBBL DX, DX // save CF + MOVL AX, (DI)(BX*4) + ADDL $1, BX // i++ + +E1: CMPL BX, BP // i < n + JL L1 + + NEGL DX + MOVL DX, c+36(FP) + RET + + +// func subVV(z, x, y []Word) (c Word) +// (same as addVV except for SBBL instead of ADCL and label names) +TEXT ·subVV(SB),NOSPLIT,$0 + MOVL z+0(FP), DI + MOVL x+12(FP), SI + MOVL y+24(FP), CX + MOVL z_len+4(FP), BP + MOVL $0, BX // i = 0 + MOVL $0, DX // c = 0 + JMP E2 + +L2: MOVL (SI)(BX*4), AX + ADDL DX, DX // restore CF + SBBL (CX)(BX*4), AX + SBBL DX, DX // save CF + MOVL AX, (DI)(BX*4) + ADDL $1, BX // i++ + +E2: CMPL BX, BP // i < n + JL L2 + + NEGL DX + MOVL DX, c+36(FP) + RET + + +// func addVW(z, x []Word, y Word) (c Word) +TEXT ·addVW(SB),NOSPLIT,$0 + MOVL z+0(FP), DI + MOVL x+12(FP), SI + MOVL y+24(FP), AX // c = y + MOVL z_len+4(FP), BP + MOVL $0, BX // i = 0 + JMP E3 + +L3: ADDL (SI)(BX*4), AX + MOVL AX, (DI)(BX*4) + SBBL AX, AX // save CF + NEGL AX + ADDL $1, BX // i++ + +E3: CMPL BX, BP // i < n + JL L3 + + MOVL AX, c+28(FP) + RET + + +// func subVW(z, x []Word, y Word) (c Word) +TEXT ·subVW(SB),NOSPLIT,$0 + MOVL z+0(FP), DI + MOVL x+12(FP), SI + MOVL y+24(FP), AX // c = y + MOVL z_len+4(FP), BP + MOVL $0, BX // i = 0 + JMP E4 + +L4: MOVL (SI)(BX*4), DX + SUBL AX, DX + MOVL DX, (DI)(BX*4) + SBBL AX, AX // save CF + NEGL AX + ADDL $1, BX // i++ + +E4: CMPL BX, BP // i < n + JL L4 + + MOVL AX, c+28(FP) + RET + + +// func shlVU(z, x []Word, s uint) (c Word) +TEXT ·shlVU(SB),NOSPLIT,$0 + MOVL z_len+4(FP), BX // i = z + SUBL $1, BX // i-- + JL X8b // i < 0 (n <= 0) + + // n > 0 + MOVL z+0(FP), DI + MOVL x+12(FP), SI + MOVL s+24(FP), CX + MOVL (SI)(BX*4), AX // w1 = x[n-1] + MOVL $0, DX + SHLL CX, AX, DX // w1>>ŝ + MOVL DX, c+28(FP) + + CMPL BX, $0 + JLE X8a // i <= 0 + + // i > 0 +L8: MOVL AX, DX // w = w1 + MOVL -4(SI)(BX*4), AX // w1 = x[i-1] + SHLL CX, AX, DX // w<<s | w1>>ŝ + MOVL DX, (DI)(BX*4) // z[i] = w<<s | w1>>ŝ + SUBL $1, BX // i-- + JG L8 // i > 0 + + // i <= 0 +X8a: SHLL CX, AX // w1<<s + MOVL AX, (DI) // z[0] = w1<<s + RET + +X8b: MOVL $0, c+28(FP) + RET + + +// func shrVU(z, x []Word, s uint) (c Word) +TEXT ·shrVU(SB),NOSPLIT,$0 + MOVL z_len+4(FP), BP + SUBL $1, BP // n-- + JL X9b // n < 0 (n <= 0) + + // n > 0 + MOVL z+0(FP), DI + MOVL x+12(FP), SI + MOVL s+24(FP), CX + MOVL (SI), AX // w1 = x[0] + MOVL $0, DX + SHRL CX, AX, DX // w1<<ŝ + MOVL DX, c+28(FP) + + MOVL $0, BX // i = 0 + JMP E9 + + // i < n-1 +L9: MOVL AX, DX // w = w1 + MOVL 4(SI)(BX*4), AX // w1 = x[i+1] + SHRL CX, AX, DX // w>>s | w1<<ŝ + MOVL DX, (DI)(BX*4) // z[i] = w>>s | w1<<ŝ + ADDL $1, BX // i++ + +E9: CMPL BX, BP + JL L9 // i < n-1 + + // i >= n-1 +X9a: SHRL CX, AX // w1>>s + MOVL AX, (DI)(BP*4) // z[n-1] = w1>>s + RET + +X9b: MOVL $0, c+28(FP) + RET + + +// func mulAddVWW(z, x []Word, y, r Word) (c Word) +TEXT ·mulAddVWW(SB),NOSPLIT,$0 + MOVL z+0(FP), DI + MOVL x+12(FP), SI + MOVL y+24(FP), BP + MOVL r+28(FP), CX // c = r + MOVL z_len+4(FP), BX + LEAL (DI)(BX*4), DI + LEAL (SI)(BX*4), SI + NEGL BX // i = -n + JMP E5 + +L5: MOVL (SI)(BX*4), AX + MULL BP + ADDL CX, AX + ADCL $0, DX + MOVL AX, (DI)(BX*4) + MOVL DX, CX + ADDL $1, BX // i++ + +E5: CMPL BX, $0 // i < 0 + JL L5 + + MOVL CX, c+32(FP) + RET + + +// func addMulVVW(z, x []Word, y Word) (c Word) +TEXT ·addMulVVW(SB),NOSPLIT,$0 + MOVL z+0(FP), DI + MOVL x+12(FP), SI + MOVL y+24(FP), BP + MOVL z_len+4(FP), BX + LEAL (DI)(BX*4), DI + LEAL (SI)(BX*4), SI + NEGL BX // i = -n + MOVL $0, CX // c = 0 + JMP E6 + +L6: MOVL (SI)(BX*4), AX + MULL BP + ADDL CX, AX + ADCL $0, DX + ADDL AX, (DI)(BX*4) + ADCL $0, DX + MOVL DX, CX + ADDL $1, BX // i++ + +E6: CMPL BX, $0 // i < 0 + JL L6 + + MOVL CX, c+28(FP) + RET + + + diff --git a/src/math/big/arith_amd64.go b/src/math/big/arith_amd64.go new file mode 100644 index 0000000..89108fe --- /dev/null +++ b/src/math/big/arith_amd64.go @@ -0,0 +1,12 @@ +// Copyright 2017 The Go Authors. All rights reserved. +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. + +//go:build !math_big_pure_go +// +build !math_big_pure_go + +package big + +import "internal/cpu" + +var support_adx = cpu.X86.HasADX && cpu.X86.HasBMI2 diff --git a/src/math/big/arith_amd64.s b/src/math/big/arith_amd64.s new file mode 100644 index 0000000..5c72a27 --- /dev/null +++ b/src/math/big/arith_amd64.s @@ -0,0 +1,526 @@ +// Copyright 2009 The Go Authors. All rights reserved. +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. + +//go:build !math_big_pure_go +// +build !math_big_pure_go + +#include "textflag.h" + +// This file provides fast assembly versions for the elementary +// arithmetic operations on vectors implemented in arith.go. + +// func mulWW(x, y Word) (z1, z0 Word) +TEXT ·mulWW(SB),NOSPLIT,$0 + MOVQ x+0(FP), AX + MULQ y+8(FP) + MOVQ DX, z1+16(FP) + MOVQ AX, z0+24(FP) + RET + + + +// The carry bit is saved with SBBQ Rx, Rx: if the carry was set, Rx is -1, otherwise it is 0. +// It is restored with ADDQ Rx, Rx: if Rx was -1 the carry is set, otherwise it is cleared. +// This is faster than using rotate instructions. + +// func addVV(z, x, y []Word) (c Word) +TEXT ·addVV(SB),NOSPLIT,$0 + MOVQ z_len+8(FP), DI + MOVQ x+24(FP), R8 + MOVQ y+48(FP), R9 + MOVQ z+0(FP), R10 + + MOVQ $0, CX // c = 0 + MOVQ $0, SI // i = 0 + + // s/JL/JMP/ below to disable the unrolled loop + SUBQ $4, DI // n -= 4 + JL V1 // if n < 0 goto V1 + +U1: // n >= 0 + // regular loop body unrolled 4x + ADDQ CX, CX // restore CF + MOVQ 0(R8)(SI*8), R11 + MOVQ 8(R8)(SI*8), R12 + MOVQ 16(R8)(SI*8), R13 + MOVQ 24(R8)(SI*8), R14 + ADCQ 0(R9)(SI*8), R11 + ADCQ 8(R9)(SI*8), R12 + ADCQ 16(R9)(SI*8), R13 + ADCQ 24(R9)(SI*8), R14 + MOVQ R11, 0(R10)(SI*8) + MOVQ R12, 8(R10)(SI*8) + MOVQ R13, 16(R10)(SI*8) + MOVQ R14, 24(R10)(SI*8) + SBBQ CX, CX // save CF + + ADDQ $4, SI // i += 4 + SUBQ $4, DI // n -= 4 + JGE U1 // if n >= 0 goto U1 + +V1: ADDQ $4, DI // n += 4 + JLE E1 // if n <= 0 goto E1 + +L1: // n > 0 + ADDQ CX, CX // restore CF + MOVQ 0(R8)(SI*8), R11 + ADCQ 0(R9)(SI*8), R11 + MOVQ R11, 0(R10)(SI*8) + SBBQ CX, CX // save CF + + ADDQ $1, SI // i++ + SUBQ $1, DI // n-- + JG L1 // if n > 0 goto L1 + +E1: NEGQ CX + MOVQ CX, c+72(FP) // return c + RET + + +// func subVV(z, x, y []Word) (c Word) +// (same as addVV except for SBBQ instead of ADCQ and label names) +TEXT ·subVV(SB),NOSPLIT,$0 + MOVQ z_len+8(FP), DI + MOVQ x+24(FP), R8 + MOVQ y+48(FP), R9 + MOVQ z+0(FP), R10 + + MOVQ $0, CX // c = 0 + MOVQ $0, SI // i = 0 + + // s/JL/JMP/ below to disable the unrolled loop + SUBQ $4, DI // n -= 4 + JL V2 // if n < 0 goto V2 + +U2: // n >= 0 + // regular loop body unrolled 4x + ADDQ CX, CX // restore CF + MOVQ 0(R8)(SI*8), R11 + MOVQ 8(R8)(SI*8), R12 + MOVQ 16(R8)(SI*8), R13 + MOVQ 24(R8)(SI*8), R14 + SBBQ 0(R9)(SI*8), R11 + SBBQ 8(R9)(SI*8), R12 + SBBQ 16(R9)(SI*8), R13 + SBBQ 24(R9)(SI*8), R14 + MOVQ R11, 0(R10)(SI*8) + MOVQ R12, 8(R10)(SI*8) + MOVQ R13, 16(R10)(SI*8) + MOVQ R14, 24(R10)(SI*8) + SBBQ CX, CX // save CF + + ADDQ $4, SI // i += 4 + SUBQ $4, DI // n -= 4 + JGE U2 // if n >= 0 goto U2 + +V2: ADDQ $4, DI // n += 4 + JLE E2 // if n <= 0 goto E2 + +L2: // n > 0 + ADDQ CX, CX // restore CF + MOVQ 0(R8)(SI*8), R11 + SBBQ 0(R9)(SI*8), R11 + MOVQ R11, 0(R10)(SI*8) + SBBQ CX, CX // save CF + + ADDQ $1, SI // i++ + SUBQ $1, DI // n-- + JG L2 // if n > 0 goto L2 + +E2: NEGQ CX + MOVQ CX, c+72(FP) // return c + RET + + +// func addVW(z, x []Word, y Word) (c Word) +TEXT ·addVW(SB),NOSPLIT,$0 + MOVQ z_len+8(FP), DI + CMPQ DI, $32 + JG large + MOVQ x+24(FP), R8 + MOVQ y+48(FP), CX // c = y + MOVQ z+0(FP), R10 + + MOVQ $0, SI // i = 0 + + // s/JL/JMP/ below to disable the unrolled loop + SUBQ $4, DI // n -= 4 + JL V3 // if n < 4 goto V3 + +U3: // n >= 0 + // regular loop body unrolled 4x + MOVQ 0(R8)(SI*8), R11 + MOVQ 8(R8)(SI*8), R12 + MOVQ 16(R8)(SI*8), R13 + MOVQ 24(R8)(SI*8), R14 + ADDQ CX, R11 + ADCQ $0, R12 + ADCQ $0, R13 + ADCQ $0, R14 + SBBQ CX, CX // save CF + NEGQ CX + MOVQ R11, 0(R10)(SI*8) + MOVQ R12, 8(R10)(SI*8) + MOVQ R13, 16(R10)(SI*8) + MOVQ R14, 24(R10)(SI*8) + + ADDQ $4, SI // i += 4 + SUBQ $4, DI // n -= 4 + JGE U3 // if n >= 0 goto U3 + +V3: ADDQ $4, DI // n += 4 + JLE E3 // if n <= 0 goto E3 + +L3: // n > 0 + ADDQ 0(R8)(SI*8), CX + MOVQ CX, 0(R10)(SI*8) + SBBQ CX, CX // save CF + NEGQ CX + + ADDQ $1, SI // i++ + SUBQ $1, DI // n-- + JG L3 // if n > 0 goto L3 + +E3: MOVQ CX, c+56(FP) // return c + RET +large: + JMP ·addVWlarge(SB) + + +// func subVW(z, x []Word, y Word) (c Word) +// (same as addVW except for SUBQ/SBBQ instead of ADDQ/ADCQ and label names) +TEXT ·subVW(SB),NOSPLIT,$0 + MOVQ z_len+8(FP), DI + CMPQ DI, $32 + JG large + MOVQ x+24(FP), R8 + MOVQ y+48(FP), CX // c = y + MOVQ z+0(FP), R10 + + MOVQ $0, SI // i = 0 + + // s/JL/JMP/ below to disable the unrolled loop + SUBQ $4, DI // n -= 4 + JL V4 // if n < 4 goto V4 + +U4: // n >= 0 + // regular loop body unrolled 4x + MOVQ 0(R8)(SI*8), R11 + MOVQ 8(R8)(SI*8), R12 + MOVQ 16(R8)(SI*8), R13 + MOVQ 24(R8)(SI*8), R14 + SUBQ CX, R11 + SBBQ $0, R12 + SBBQ $0, R13 + SBBQ $0, R14 + SBBQ CX, CX // save CF + NEGQ CX + MOVQ R11, 0(R10)(SI*8) + MOVQ R12, 8(R10)(SI*8) + MOVQ R13, 16(R10)(SI*8) + MOVQ R14, 24(R10)(SI*8) + + ADDQ $4, SI // i += 4 + SUBQ $4, DI // n -= 4 + JGE U4 // if n >= 0 goto U4 + +V4: ADDQ $4, DI // n += 4 + JLE E4 // if n <= 0 goto E4 + +L4: // n > 0 + MOVQ 0(R8)(SI*8), R11 + SUBQ CX, R11 + MOVQ R11, 0(R10)(SI*8) + SBBQ CX, CX // save CF + NEGQ CX + + ADDQ $1, SI // i++ + SUBQ $1, DI // n-- + JG L4 // if n > 0 goto L4 + +E4: MOVQ CX, c+56(FP) // return c + RET +large: + JMP ·subVWlarge(SB) + + +// func shlVU(z, x []Word, s uint) (c Word) +TEXT ·shlVU(SB),NOSPLIT,$0 + MOVQ z_len+8(FP), BX // i = z + SUBQ $1, BX // i-- + JL X8b // i < 0 (n <= 0) + + // n > 0 + MOVQ z+0(FP), R10 + MOVQ x+24(FP), R8 + MOVQ s+48(FP), CX + MOVQ (R8)(BX*8), AX // w1 = x[n-1] + MOVQ $0, DX + SHLQ CX, AX, DX // w1>>ŝ + MOVQ DX, c+56(FP) + + CMPQ BX, $0 + JLE X8a // i <= 0 + + // i > 0 +L8: MOVQ AX, DX // w = w1 + MOVQ -8(R8)(BX*8), AX // w1 = x[i-1] + SHLQ CX, AX, DX // w<<s | w1>>ŝ + MOVQ DX, (R10)(BX*8) // z[i] = w<<s | w1>>ŝ + SUBQ $1, BX // i-- + JG L8 // i > 0 + + // i <= 0 +X8a: SHLQ CX, AX // w1<<s + MOVQ AX, (R10) // z[0] = w1<<s + RET + +X8b: MOVQ $0, c+56(FP) + RET + + +// func shrVU(z, x []Word, s uint) (c Word) +TEXT ·shrVU(SB),NOSPLIT,$0 + MOVQ z_len+8(FP), R11 + SUBQ $1, R11 // n-- + JL X9b // n < 0 (n <= 0) + + // n > 0 + MOVQ z+0(FP), R10 + MOVQ x+24(FP), R8 + MOVQ s+48(FP), CX + MOVQ (R8), AX // w1 = x[0] + MOVQ $0, DX + SHRQ CX, AX, DX // w1<<ŝ + MOVQ DX, c+56(FP) + + MOVQ $0, BX // i = 0 + JMP E9 + + // i < n-1 +L9: MOVQ AX, DX // w = w1 + MOVQ 8(R8)(BX*8), AX // w1 = x[i+1] + SHRQ CX, AX, DX // w>>s | w1<<ŝ + MOVQ DX, (R10)(BX*8) // z[i] = w>>s | w1<<ŝ + ADDQ $1, BX // i++ + +E9: CMPQ BX, R11 + JL L9 // i < n-1 + + // i >= n-1 +X9a: SHRQ CX, AX // w1>>s + MOVQ AX, (R10)(R11*8) // z[n-1] = w1>>s + RET + +X9b: MOVQ $0, c+56(FP) + RET + + +// func mulAddVWW(z, x []Word, y, r Word) (c Word) +TEXT ·mulAddVWW(SB),NOSPLIT,$0 + MOVQ z+0(FP), R10 + MOVQ x+24(FP), R8 + MOVQ y+48(FP), R9 + MOVQ r+56(FP), CX // c = r + MOVQ z_len+8(FP), R11 + MOVQ $0, BX // i = 0 + + CMPQ R11, $4 + JL E5 + +U5: // i+4 <= n + // regular loop body unrolled 4x + MOVQ (0*8)(R8)(BX*8), AX + MULQ R9 + ADDQ CX, AX + ADCQ $0, DX + MOVQ AX, (0*8)(R10)(BX*8) + MOVQ DX, CX + MOVQ (1*8)(R8)(BX*8), AX + MULQ R9 + ADDQ CX, AX + ADCQ $0, DX + MOVQ AX, (1*8)(R10)(BX*8) + MOVQ DX, CX + MOVQ (2*8)(R8)(BX*8), AX + MULQ R9 + ADDQ CX, AX + ADCQ $0, DX + MOVQ AX, (2*8)(R10)(BX*8) + MOVQ DX, CX + MOVQ (3*8)(R8)(BX*8), AX + MULQ R9 + ADDQ CX, AX + ADCQ $0, DX + MOVQ AX, (3*8)(R10)(BX*8) + MOVQ DX, CX + ADDQ $4, BX // i += 4 + + LEAQ 4(BX), DX + CMPQ DX, R11 + JLE U5 + JMP E5 + +L5: MOVQ (R8)(BX*8), AX + MULQ R9 + ADDQ CX, AX + ADCQ $0, DX + MOVQ AX, (R10)(BX*8) + MOVQ DX, CX + ADDQ $1, BX // i++ + +E5: CMPQ BX, R11 // i < n + JL L5 + + MOVQ CX, c+64(FP) + RET + + +// func addMulVVW(z, x []Word, y Word) (c Word) +TEXT ·addMulVVW(SB),NOSPLIT,$0 + CMPB ·support_adx(SB), $1 + JEQ adx + MOVQ z+0(FP), R10 + MOVQ x+24(FP), R8 + MOVQ y+48(FP), R9 + MOVQ z_len+8(FP), R11 + MOVQ $0, BX // i = 0 + MOVQ $0, CX // c = 0 + MOVQ R11, R12 + ANDQ $-2, R12 + CMPQ R11, $2 + JAE A6 + JMP E6 + +A6: + MOVQ (R8)(BX*8), AX + MULQ R9 + ADDQ (R10)(BX*8), AX + ADCQ $0, DX + ADDQ CX, AX + ADCQ $0, DX + MOVQ DX, CX + MOVQ AX, (R10)(BX*8) + + MOVQ (8)(R8)(BX*8), AX + MULQ R9 + ADDQ (8)(R10)(BX*8), AX + ADCQ $0, DX + ADDQ CX, AX + ADCQ $0, DX + MOVQ DX, CX + MOVQ AX, (8)(R10)(BX*8) + + ADDQ $2, BX + CMPQ BX, R12 + JL A6 + JMP E6 + +L6: MOVQ (R8)(BX*8), AX + MULQ R9 + ADDQ CX, AX + ADCQ $0, DX + ADDQ AX, (R10)(BX*8) + ADCQ $0, DX + MOVQ DX, CX + ADDQ $1, BX // i++ + +E6: CMPQ BX, R11 // i < n + JL L6 + + MOVQ CX, c+56(FP) + RET + +adx: + MOVQ z_len+8(FP), R11 + MOVQ z+0(FP), R10 + MOVQ x+24(FP), R8 + MOVQ y+48(FP), DX + MOVQ $0, BX // i = 0 + MOVQ $0, CX // carry + CMPQ R11, $8 + JAE adx_loop_header + CMPQ BX, R11 + JL adx_short + MOVQ CX, c+56(FP) + RET + +adx_loop_header: + MOVQ R11, R13 + ANDQ $-8, R13 +adx_loop: + XORQ R9, R9 // unset flags + MULXQ (R8), SI, DI + ADCXQ CX,SI + ADOXQ (R10), SI + MOVQ SI,(R10) + + MULXQ 8(R8), AX, CX + ADCXQ DI, AX + ADOXQ 8(R10), AX + MOVQ AX, 8(R10) + + MULXQ 16(R8), SI, DI + ADCXQ CX, SI + ADOXQ 16(R10), SI + MOVQ SI, 16(R10) + + MULXQ 24(R8), AX, CX + ADCXQ DI, AX + ADOXQ 24(R10), AX + MOVQ AX, 24(R10) + + MULXQ 32(R8), SI, DI + ADCXQ CX, SI + ADOXQ 32(R10), SI + MOVQ SI, 32(R10) + + MULXQ 40(R8), AX, CX + ADCXQ DI, AX + ADOXQ 40(R10), AX + MOVQ AX, 40(R10) + + MULXQ 48(R8), SI, DI + ADCXQ CX, SI + ADOXQ 48(R10), SI + MOVQ SI, 48(R10) + + MULXQ 56(R8), AX, CX + ADCXQ DI, AX + ADOXQ 56(R10), AX + MOVQ AX, 56(R10) + + ADCXQ R9, CX + ADOXQ R9, CX + + ADDQ $64, R8 + ADDQ $64, R10 + ADDQ $8, BX + + CMPQ BX, R13 + JL adx_loop + MOVQ z+0(FP), R10 + MOVQ x+24(FP), R8 + CMPQ BX, R11 + JL adx_short + MOVQ CX, c+56(FP) + RET + +adx_short: + MULXQ (R8)(BX*8), SI, DI + ADDQ CX, SI + ADCQ $0, DI + ADDQ SI, (R10)(BX*8) + ADCQ $0, DI + MOVQ DI, CX + ADDQ $1, BX // i++ + + CMPQ BX, R11 + JL adx_short + + MOVQ CX, c+56(FP) + RET + + + diff --git a/src/math/big/arith_arm.s b/src/math/big/arith_arm.s new file mode 100644 index 0000000..f2872d8 --- /dev/null +++ b/src/math/big/arith_arm.s @@ -0,0 +1,284 @@ +// Copyright 2009 The Go Authors. All rights reserved. +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. + +//go:build !math_big_pure_go +// +build !math_big_pure_go + +#include "textflag.h" + +// This file provides fast assembly versions for the elementary +// arithmetic operations on vectors implemented in arith.go. + +// func addVV(z, x, y []Word) (c Word) +TEXT ·addVV(SB),NOSPLIT,$0 + ADD.S $0, R0 // clear carry flag + MOVW z+0(FP), R1 + MOVW z_len+4(FP), R4 + MOVW x+12(FP), R2 + MOVW y+24(FP), R3 + ADD R4<<2, R1, R4 + B E1 +L1: + MOVW.P 4(R2), R5 + MOVW.P 4(R3), R6 + ADC.S R6, R5 + MOVW.P R5, 4(R1) +E1: + TEQ R1, R4 + BNE L1 + + MOVW $0, R0 + MOVW.CS $1, R0 + MOVW R0, c+36(FP) + RET + + +// func subVV(z, x, y []Word) (c Word) +// (same as addVV except for SBC instead of ADC and label names) +TEXT ·subVV(SB),NOSPLIT,$0 + SUB.S $0, R0 // clear borrow flag + MOVW z+0(FP), R1 + MOVW z_len+4(FP), R4 + MOVW x+12(FP), R2 + MOVW y+24(FP), R3 + ADD R4<<2, R1, R4 + B E2 +L2: + MOVW.P 4(R2), R5 + MOVW.P 4(R3), R6 + SBC.S R6, R5 + MOVW.P R5, 4(R1) +E2: + TEQ R1, R4 + BNE L2 + + MOVW $0, R0 + MOVW.CC $1, R0 + MOVW R0, c+36(FP) + RET + + +// func addVW(z, x []Word, y Word) (c Word) +TEXT ·addVW(SB),NOSPLIT,$0 + MOVW z+0(FP), R1 + MOVW z_len+4(FP), R4 + MOVW x+12(FP), R2 + MOVW y+24(FP), R3 + ADD R4<<2, R1, R4 + TEQ R1, R4 + BNE L3a + MOVW R3, c+28(FP) + RET +L3a: + MOVW.P 4(R2), R5 + ADD.S R3, R5 + MOVW.P R5, 4(R1) + B E3 +L3: + MOVW.P 4(R2), R5 + ADC.S $0, R5 + MOVW.P R5, 4(R1) +E3: + TEQ R1, R4 + BNE L3 + + MOVW $0, R0 + MOVW.CS $1, R0 + MOVW R0, c+28(FP) + RET + + +// func subVW(z, x []Word, y Word) (c Word) +TEXT ·subVW(SB),NOSPLIT,$0 + MOVW z+0(FP), R1 + MOVW z_len+4(FP), R4 + MOVW x+12(FP), R2 + MOVW y+24(FP), R3 + ADD R4<<2, R1, R4 + TEQ R1, R4 + BNE L4a + MOVW R3, c+28(FP) + RET +L4a: + MOVW.P 4(R2), R5 + SUB.S R3, R5 + MOVW.P R5, 4(R1) + B E4 +L4: + MOVW.P 4(R2), R5 + SBC.S $0, R5 + MOVW.P R5, 4(R1) +E4: + TEQ R1, R4 + BNE L4 + + MOVW $0, R0 + MOVW.CC $1, R0 + MOVW R0, c+28(FP) + RET + + +// func shlVU(z, x []Word, s uint) (c Word) +TEXT ·shlVU(SB),NOSPLIT,$0 + MOVW z_len+4(FP), R5 + TEQ $0, R5 + BEQ X7 + + MOVW z+0(FP), R1 + MOVW x+12(FP), R2 + ADD R5<<2, R2, R2 + ADD R5<<2, R1, R5 + MOVW s+24(FP), R3 + TEQ $0, R3 // shift 0 is special + BEQ Y7 + ADD $4, R1 // stop one word early + MOVW $32, R4 + SUB R3, R4 + MOVW $0, R7 + + MOVW.W -4(R2), R6 + MOVW R6<<R3, R7 + MOVW R6>>R4, R6 + MOVW R6, c+28(FP) + B E7 + +L7: + MOVW.W -4(R2), R6 + ORR R6>>R4, R7 + MOVW.W R7, -4(R5) + MOVW R6<<R3, R7 +E7: + TEQ R1, R5 + BNE L7 + + MOVW R7, -4(R5) + RET + +Y7: // copy loop, because shift 0 == shift 32 + MOVW.W -4(R2), R6 + MOVW.W R6, -4(R5) + TEQ R1, R5 + BNE Y7 + +X7: + MOVW $0, R1 + MOVW R1, c+28(FP) + RET + + +// func shrVU(z, x []Word, s uint) (c Word) +TEXT ·shrVU(SB),NOSPLIT,$0 + MOVW z_len+4(FP), R5 + TEQ $0, R5 + BEQ X6 + + MOVW z+0(FP), R1 + MOVW x+12(FP), R2 + ADD R5<<2, R1, R5 + MOVW s+24(FP), R3 + TEQ $0, R3 // shift 0 is special + BEQ Y6 + SUB $4, R5 // stop one word early + MOVW $32, R4 + SUB R3, R4 + MOVW $0, R7 + + // first word + MOVW.P 4(R2), R6 + MOVW R6>>R3, R7 + MOVW R6<<R4, R6 + MOVW R6, c+28(FP) + B E6 + + // word loop +L6: + MOVW.P 4(R2), R6 + ORR R6<<R4, R7 + MOVW.P R7, 4(R1) + MOVW R6>>R3, R7 +E6: + TEQ R1, R5 + BNE L6 + + MOVW R7, 0(R1) + RET + +Y6: // copy loop, because shift 0 == shift 32 + MOVW.P 4(R2), R6 + MOVW.P R6, 4(R1) + TEQ R1, R5 + BNE Y6 + +X6: + MOVW $0, R1 + MOVW R1, c+28(FP) + RET + + +// func mulAddVWW(z, x []Word, y, r Word) (c Word) +TEXT ·mulAddVWW(SB),NOSPLIT,$0 + MOVW $0, R0 + MOVW z+0(FP), R1 + MOVW z_len+4(FP), R5 + MOVW x+12(FP), R2 + MOVW y+24(FP), R3 + MOVW r+28(FP), R4 + ADD R5<<2, R1, R5 + B E8 + + // word loop +L8: + MOVW.P 4(R2), R6 + MULLU R6, R3, (R7, R6) + ADD.S R4, R6 + ADC R0, R7 + MOVW.P R6, 4(R1) + MOVW R7, R4 +E8: + TEQ R1, R5 + BNE L8 + + MOVW R4, c+32(FP) + RET + + +// func addMulVVW(z, x []Word, y Word) (c Word) +TEXT ·addMulVVW(SB),NOSPLIT,$0 + MOVW $0, R0 + MOVW z+0(FP), R1 + MOVW z_len+4(FP), R5 + MOVW x+12(FP), R2 + MOVW y+24(FP), R3 + ADD R5<<2, R1, R5 + MOVW $0, R4 + B E9 + + // word loop +L9: + MOVW.P 4(R2), R6 + MULLU R6, R3, (R7, R6) + ADD.S R4, R6 + ADC R0, R7 + MOVW 0(R1), R4 + ADD.S R4, R6 + ADC R0, R7 + MOVW.P R6, 4(R1) + MOVW R7, R4 +E9: + TEQ R1, R5 + BNE L9 + + MOVW R4, c+28(FP) + RET + + + +// func mulWW(x, y Word) (z1, z0 Word) +TEXT ·mulWW(SB),NOSPLIT,$0 + MOVW x+0(FP), R1 + MOVW y+4(FP), R2 + MULLU R1, R2, (R4, R3) + MOVW R4, z1+8(FP) + MOVW R3, z0+12(FP) + RET diff --git a/src/math/big/arith_arm64.s b/src/math/big/arith_arm64.s new file mode 100644 index 0000000..7bfe08e --- /dev/null +++ b/src/math/big/arith_arm64.s @@ -0,0 +1,584 @@ +// Copyright 2013 The Go Authors. All rights reserved. +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. + +//go:build !math_big_pure_go +// +build !math_big_pure_go + +#include "textflag.h" + +// This file provides fast assembly versions for the elementary +// arithmetic operations on vectors implemented in arith.go. + +// TODO: Consider re-implementing using Advanced SIMD +// once the assembler supports those instructions. + +// func mulWW(x, y Word) (z1, z0 Word) +TEXT ·mulWW(SB),NOSPLIT,$0 + MOVD x+0(FP), R0 + MOVD y+8(FP), R1 + MUL R0, R1, R2 + UMULH R0, R1, R3 + MOVD R3, z1+16(FP) + MOVD R2, z0+24(FP) + RET + + +// func addVV(z, x, y []Word) (c Word) +TEXT ·addVV(SB),NOSPLIT,$0 + MOVD z_len+8(FP), R0 + MOVD x+24(FP), R8 + MOVD y+48(FP), R9 + MOVD z+0(FP), R10 + ADDS $0, R0 // clear carry flag + TBZ $0, R0, two + MOVD.P 8(R8), R11 + MOVD.P 8(R9), R15 + ADCS R15, R11 + MOVD.P R11, 8(R10) + SUB $1, R0 +two: + TBZ $1, R0, loop + LDP.P 16(R8), (R11, R12) + LDP.P 16(R9), (R15, R16) + ADCS R15, R11 + ADCS R16, R12 + STP.P (R11, R12), 16(R10) + SUB $2, R0 +loop: + CBZ R0, done // careful not to touch the carry flag + LDP.P 32(R8), (R11, R12) + LDP -16(R8), (R13, R14) + LDP.P 32(R9), (R15, R16) + LDP -16(R9), (R17, R19) + ADCS R15, R11 + ADCS R16, R12 + ADCS R17, R13 + ADCS R19, R14 + STP.P (R11, R12), 32(R10) + STP (R13, R14), -16(R10) + SUB $4, R0 + B loop +done: + CSET HS, R0 // extract carry flag + MOVD R0, c+72(FP) + RET + + +// func subVV(z, x, y []Word) (c Word) +TEXT ·subVV(SB),NOSPLIT,$0 + MOVD z_len+8(FP), R0 + MOVD x+24(FP), R8 + MOVD y+48(FP), R9 + MOVD z+0(FP), R10 + CMP R0, R0 // set carry flag + TBZ $0, R0, two + MOVD.P 8(R8), R11 + MOVD.P 8(R9), R15 + SBCS R15, R11 + MOVD.P R11, 8(R10) + SUB $1, R0 +two: + TBZ $1, R0, loop + LDP.P 16(R8), (R11, R12) + LDP.P 16(R9), (R15, R16) + SBCS R15, R11 + SBCS R16, R12 + STP.P (R11, R12), 16(R10) + SUB $2, R0 +loop: + CBZ R0, done // careful not to touch the carry flag + LDP.P 32(R8), (R11, R12) + LDP -16(R8), (R13, R14) + LDP.P 32(R9), (R15, R16) + LDP -16(R9), (R17, R19) + SBCS R15, R11 + SBCS R16, R12 + SBCS R17, R13 + SBCS R19, R14 + STP.P (R11, R12), 32(R10) + STP (R13, R14), -16(R10) + SUB $4, R0 + B loop +done: + CSET LO, R0 // extract carry flag + MOVD R0, c+72(FP) + RET + +#define vwOneOp(instr, op1) \ + MOVD.P 8(R1), R4; \ + instr op1, R4; \ + MOVD.P R4, 8(R3); + +// handle the first 1~4 elements before starting iteration in addVW/subVW +#define vwPreIter(instr1, instr2, counter, target) \ + vwOneOp(instr1, R2); \ + SUB $1, counter; \ + CBZ counter, target; \ + vwOneOp(instr2, $0); \ + SUB $1, counter; \ + CBZ counter, target; \ + vwOneOp(instr2, $0); \ + SUB $1, counter; \ + CBZ counter, target; \ + vwOneOp(instr2, $0); + +// do one iteration of add or sub in addVW/subVW +#define vwOneIter(instr, counter, exit) \ + CBZ counter, exit; \ // careful not to touch the carry flag + LDP.P 32(R1), (R4, R5); \ + LDP -16(R1), (R6, R7); \ + instr $0, R4, R8; \ + instr $0, R5, R9; \ + instr $0, R6, R10; \ + instr $0, R7, R11; \ + STP.P (R8, R9), 32(R3); \ + STP (R10, R11), -16(R3); \ + SUB $4, counter; + +// do one iteration of copy in addVW/subVW +#define vwOneIterCopy(counter, exit) \ + CBZ counter, exit; \ + LDP.P 32(R1), (R4, R5); \ + LDP -16(R1), (R6, R7); \ + STP.P (R4, R5), 32(R3); \ + STP (R6, R7), -16(R3); \ + SUB $4, counter; + +// func addVW(z, x []Word, y Word) (c Word) +// The 'large' branch handles large 'z'. It checks the carry flag on every iteration +// and switches to copy if we are done with carries. The copying is skipped as well +// if 'x' and 'z' happen to share the same underlying storage. +// The overhead of the checking and branching is visible when 'z' are small (~5%), +// so set a threshold of 32, and remain the small-sized part entirely untouched. +TEXT ·addVW(SB),NOSPLIT,$0 + MOVD z+0(FP), R3 + MOVD z_len+8(FP), R0 + MOVD x+24(FP), R1 + MOVD y+48(FP), R2 + CMP $32, R0 + BGE large // large-sized 'z' and 'x' + CBZ R0, len0 // the length of z is 0 + MOVD.P 8(R1), R4 + ADDS R2, R4 // z[0] = x[0] + y, set carry + MOVD.P R4, 8(R3) + SUB $1, R0 + CBZ R0, len1 // the length of z is 1 + TBZ $0, R0, two + MOVD.P 8(R1), R4 // do it once + ADCS $0, R4 + MOVD.P R4, 8(R3) + SUB $1, R0 +two: // do it twice + TBZ $1, R0, loop + LDP.P 16(R1), (R4, R5) + ADCS $0, R4, R8 // c, z[i] = x[i] + c + ADCS $0, R5, R9 + STP.P (R8, R9), 16(R3) + SUB $2, R0 +loop: // do four times per round + vwOneIter(ADCS, R0, len1) + B loop +len1: + CSET HS, R2 // extract carry flag +len0: + MOVD R2, c+56(FP) +done: + RET +large: + AND $0x3, R0, R10 + AND $~0x3, R0 + // unrolling for the first 1~4 elements to avoid saving the carry + // flag in each step, adjust $R0 if we unrolled 4 elements + vwPreIter(ADDS, ADCS, R10, add4) + SUB $4, R0 +add4: + BCC copy + vwOneIter(ADCS, R0, len1) + B add4 +copy: + MOVD ZR, c+56(FP) + CMP R1, R3 + BEQ done +copy_4: // no carry flag, copy the rest + vwOneIterCopy(R0, done) + B copy_4 + +// func subVW(z, x []Word, y Word) (c Word) +// The 'large' branch handles large 'z'. It checks the carry flag on every iteration +// and switches to copy if we are done with carries. The copying is skipped as well +// if 'x' and 'z' happen to share the same underlying storage. +// The overhead of the checking and branching is visible when 'z' are small (~5%), +// so set a threshold of 32, and remain the small-sized part entirely untouched. +TEXT ·subVW(SB),NOSPLIT,$0 + MOVD z+0(FP), R3 + MOVD z_len+8(FP), R0 + MOVD x+24(FP), R1 + MOVD y+48(FP), R2 + CMP $32, R0 + BGE large // large-sized 'z' and 'x' + CBZ R0, len0 // the length of z is 0 + MOVD.P 8(R1), R4 + SUBS R2, R4 // z[0] = x[0] - y, set carry + MOVD.P R4, 8(R3) + SUB $1, R0 + CBZ R0, len1 // the length of z is 1 + TBZ $0, R0, two // do it once + MOVD.P 8(R1), R4 + SBCS $0, R4 + MOVD.P R4, 8(R3) + SUB $1, R0 +two: // do it twice + TBZ $1, R0, loop + LDP.P 16(R1), (R4, R5) + SBCS $0, R4, R8 // c, z[i] = x[i] + c + SBCS $0, R5, R9 + STP.P (R8, R9), 16(R3) + SUB $2, R0 +loop: // do four times per round + vwOneIter(SBCS, R0, len1) + B loop +len1: + CSET LO, R2 // extract carry flag +len0: + MOVD R2, c+56(FP) +done: + RET +large: + AND $0x3, R0, R10 + AND $~0x3, R0 + // unrolling for the first 1~4 elements to avoid saving the carry + // flag in each step, adjust $R0 if we unrolled 4 elements + vwPreIter(SUBS, SBCS, R10, sub4) + SUB $4, R0 +sub4: + BCS copy + vwOneIter(SBCS, R0, len1) + B sub4 +copy: + MOVD ZR, c+56(FP) + CMP R1, R3 + BEQ done +copy_4: // no carry flag, copy the rest + vwOneIterCopy(R0, done) + B copy_4 + +// func shlVU(z, x []Word, s uint) (c Word) +// This implementation handles the shift operation from the high word to the low word, +// which may be an error for the case where the low word of x overlaps with the high +// word of z. When calling this function directly, you need to pay attention to this +// situation. +TEXT ·shlVU(SB),NOSPLIT,$0 + LDP z+0(FP), (R0, R1) // R0 = z.ptr, R1 = len(z) + MOVD x+24(FP), R2 + MOVD s+48(FP), R3 + ADD R1<<3, R0 // R0 = &z[n] + ADD R1<<3, R2 // R2 = &x[n] + CBZ R1, len0 + CBZ R3, copy // if the number of shift is 0, just copy x to z + MOVD $64, R4 + SUB R3, R4 + // handling the most significant element x[n-1] + MOVD.W -8(R2), R6 + LSR R4, R6, R5 // return value + LSL R3, R6, R8 // x[i] << s + SUB $1, R1 +one: TBZ $0, R1, two + MOVD.W -8(R2), R6 + LSR R4, R6, R7 + ORR R8, R7 + LSL R3, R6, R8 + SUB $1, R1 + MOVD.W R7, -8(R0) +two: + TBZ $1, R1, loop + LDP.W -16(R2), (R6, R7) + LSR R4, R7, R10 + ORR R8, R10 + LSL R3, R7 + LSR R4, R6, R9 + ORR R7, R9 + LSL R3, R6, R8 + SUB $2, R1 + STP.W (R9, R10), -16(R0) +loop: + CBZ R1, done + LDP.W -32(R2), (R10, R11) + LDP 16(R2), (R12, R13) + LSR R4, R13, R23 + ORR R8, R23 // z[i] = (x[i] << s) | (x[i-1] >> (64 - s)) + LSL R3, R13 + LSR R4, R12, R22 + ORR R13, R22 + LSL R3, R12 + LSR R4, R11, R21 + ORR R12, R21 + LSL R3, R11 + LSR R4, R10, R20 + ORR R11, R20 + LSL R3, R10, R8 + STP.W (R20, R21), -32(R0) + STP (R22, R23), 16(R0) + SUB $4, R1 + B loop +done: + MOVD.W R8, -8(R0) // the first element x[0] + MOVD R5, c+56(FP) // the part moved out from x[n-1] + RET +copy: + CMP R0, R2 + BEQ len0 + TBZ $0, R1, ctwo + MOVD.W -8(R2), R4 + MOVD.W R4, -8(R0) + SUB $1, R1 +ctwo: + TBZ $1, R1, cloop + LDP.W -16(R2), (R4, R5) + STP.W (R4, R5), -16(R0) + SUB $2, R1 +cloop: + CBZ R1, len0 + LDP.W -32(R2), (R4, R5) + LDP 16(R2), (R6, R7) + STP.W (R4, R5), -32(R0) + STP (R6, R7), 16(R0) + SUB $4, R1 + B cloop +len0: + MOVD $0, c+56(FP) + RET + +// func shrVU(z, x []Word, s uint) (c Word) +// This implementation handles the shift operation from the low word to the high word, +// which may be an error for the case where the high word of x overlaps with the low +// word of z. When calling this function directly, you need to pay attention to this +// situation. +TEXT ·shrVU(SB),NOSPLIT,$0 + MOVD z+0(FP), R0 + MOVD z_len+8(FP), R1 + MOVD x+24(FP), R2 + MOVD s+48(FP), R3 + MOVD $0, R8 + MOVD $64, R4 + SUB R3, R4 + CBZ R1, len0 + CBZ R3, copy // if the number of shift is 0, just copy x to z + + MOVD.P 8(R2), R20 + LSR R3, R20, R8 + LSL R4, R20 + MOVD R20, c+56(FP) // deal with the first element + SUB $1, R1 + + TBZ $0, R1, two + MOVD.P 8(R2), R6 + LSL R4, R6, R20 + ORR R8, R20 + LSR R3, R6, R8 + MOVD.P R20, 8(R0) + SUB $1, R1 +two: + TBZ $1, R1, loop + LDP.P 16(R2), (R6, R7) + LSL R4, R6, R20 + LSR R3, R6 + ORR R8, R20 + LSL R4, R7, R21 + LSR R3, R7, R8 + ORR R6, R21 + STP.P (R20, R21), 16(R0) + SUB $2, R1 +loop: + CBZ R1, done + LDP.P 32(R2), (R10, R11) + LDP -16(R2), (R12, R13) + LSL R4, R10, R20 + LSR R3, R10 + ORR R8, R20 // z[i] = (x[i] >> s) | (x[i+1] << (64 - s)) + LSL R4, R11, R21 + LSR R3, R11 + ORR R10, R21 + LSL R4, R12, R22 + LSR R3, R12 + ORR R11, R22 + LSL R4, R13, R23 + LSR R3, R13, R8 + ORR R12, R23 + STP.P (R20, R21), 32(R0) + STP (R22, R23), -16(R0) + SUB $4, R1 + B loop +done: + MOVD R8, (R0) // deal with the last element + RET +copy: + CMP R0, R2 + BEQ len0 + TBZ $0, R1, ctwo + MOVD.P 8(R2), R3 + MOVD.P R3, 8(R0) + SUB $1, R1 +ctwo: + TBZ $1, R1, cloop + LDP.P 16(R2), (R4, R5) + STP.P (R4, R5), 16(R0) + SUB $2, R1 +cloop: + CBZ R1, len0 + LDP.P 32(R2), (R4, R5) + LDP -16(R2), (R6, R7) + STP.P (R4, R5), 32(R0) + STP (R6, R7), -16(R0) + SUB $4, R1 + B cloop +len0: + MOVD $0, c+56(FP) + RET + + +// func mulAddVWW(z, x []Word, y, r Word) (c Word) +TEXT ·mulAddVWW(SB),NOSPLIT,$0 + MOVD z+0(FP), R1 + MOVD z_len+8(FP), R0 + MOVD x+24(FP), R2 + MOVD y+48(FP), R3 + MOVD r+56(FP), R4 + // c, z = x * y + r + TBZ $0, R0, two + MOVD.P 8(R2), R5 + MUL R3, R5, R7 + UMULH R3, R5, R8 + ADDS R4, R7 + ADC $0, R8, R4 // c, z[i] = x[i] * y + r + MOVD.P R7, 8(R1) + SUB $1, R0 +two: + TBZ $1, R0, loop + LDP.P 16(R2), (R5, R6) + MUL R3, R5, R10 + UMULH R3, R5, R11 + ADDS R4, R10 + MUL R3, R6, R12 + UMULH R3, R6, R13 + ADCS R12, R11 + ADC $0, R13, R4 + + STP.P (R10, R11), 16(R1) + SUB $2, R0 +loop: + CBZ R0, done + LDP.P 32(R2), (R5, R6) + LDP -16(R2), (R7, R8) + + MUL R3, R5, R10 + UMULH R3, R5, R11 + ADDS R4, R10 + MUL R3, R6, R12 + UMULH R3, R6, R13 + ADCS R11, R12 + + MUL R3, R7, R14 + UMULH R3, R7, R15 + ADCS R13, R14 + MUL R3, R8, R16 + UMULH R3, R8, R17 + ADCS R15, R16 + ADC $0, R17, R4 + + STP.P (R10, R12), 32(R1) + STP (R14, R16), -16(R1) + SUB $4, R0 + B loop +done: + MOVD R4, c+64(FP) + RET + + +// func addMulVVW(z, x []Word, y Word) (c Word) +TEXT ·addMulVVW(SB),NOSPLIT,$0 + MOVD z+0(FP), R1 + MOVD z_len+8(FP), R0 + MOVD x+24(FP), R2 + MOVD y+48(FP), R3 + MOVD $0, R4 + + TBZ $0, R0, two + + MOVD.P 8(R2), R5 + MOVD (R1), R6 + + MUL R5, R3, R7 + UMULH R5, R3, R8 + + ADDS R7, R6 + ADC $0, R8, R4 + + MOVD.P R6, 8(R1) + SUB $1, R0 + +two: + TBZ $1, R0, loop + + LDP.P 16(R2), (R5, R10) + LDP (R1), (R6, R11) + + MUL R10, R3, R13 + UMULH R10, R3, R12 + + MUL R5, R3, R7 + UMULH R5, R3, R8 + + ADDS R4, R6 + ADCS R13, R11 + ADC $0, R12 + + ADDS R7, R6 + ADCS R8, R11 + ADC $0, R12, R4 + + STP.P (R6, R11), 16(R1) + SUB $2, R0 + +// The main loop of this code operates on a block of 4 words every iteration +// performing [R4:R12:R11:R10:R9] = R4 + R3 * [R8:R7:R6:R5] + [R12:R11:R10:R9] +// where R4 is carried from the previous iteration, R8:R7:R6:R5 hold the next +// 4 words of x, R3 is y and R12:R11:R10:R9 are part of the result z. +loop: + CBZ R0, done + + LDP.P 16(R2), (R5, R6) + LDP.P 16(R2), (R7, R8) + + LDP (R1), (R9, R10) + ADDS R4, R9 + MUL R6, R3, R14 + ADCS R14, R10 + MUL R7, R3, R15 + LDP 16(R1), (R11, R12) + ADCS R15, R11 + MUL R8, R3, R16 + ADCS R16, R12 + UMULH R8, R3, R20 + ADC $0, R20 + + MUL R5, R3, R13 + ADDS R13, R9 + UMULH R5, R3, R17 + ADCS R17, R10 + UMULH R6, R3, R21 + STP.P (R9, R10), 16(R1) + ADCS R21, R11 + UMULH R7, R3, R19 + ADCS R19, R12 + STP.P (R11, R12), 16(R1) + ADC $0, R20, R4 + + SUB $4, R0 + B loop + +done: + MOVD R4, c+56(FP) + RET + + diff --git a/src/math/big/arith_decl.go b/src/math/big/arith_decl.go new file mode 100644 index 0000000..eea3d6b --- /dev/null +++ b/src/math/big/arith_decl.go @@ -0,0 +1,19 @@ +// Copyright 2010 The Go Authors. All rights reserved. +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. + +//go:build !math_big_pure_go +// +build !math_big_pure_go + +package big + +// implemented in arith_$GOARCH.s +func mulWW(x, y Word) (z1, z0 Word) +func addVV(z, x, y []Word) (c Word) +func subVV(z, x, y []Word) (c Word) +func addVW(z, x []Word, y Word) (c Word) +func subVW(z, x []Word, y Word) (c Word) +func shlVU(z, x []Word, s uint) (c Word) +func shrVU(z, x []Word, s uint) (c Word) +func mulAddVWW(z, x []Word, y, r Word) (c Word) +func addMulVVW(z, x []Word, y Word) (c Word) diff --git a/src/math/big/arith_decl_pure.go b/src/math/big/arith_decl_pure.go new file mode 100644 index 0000000..059f6f1 --- /dev/null +++ b/src/math/big/arith_decl_pure.go @@ -0,0 +1,54 @@ +// Copyright 2015 The Go Authors. All rights reserved. +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. + +//go:build math_big_pure_go +// +build math_big_pure_go + +package big + +func mulWW(x, y Word) (z1, z0 Word) { + return mulWW_g(x, y) +} + +func addVV(z, x, y []Word) (c Word) { + return addVV_g(z, x, y) +} + +func subVV(z, x, y []Word) (c Word) { + return subVV_g(z, x, y) +} + +func addVW(z, x []Word, y Word) (c Word) { + // TODO: remove indirect function call when golang.org/issue/30548 is fixed + fn := addVW_g + if len(z) > 32 { + fn = addVWlarge + } + return fn(z, x, y) +} + +func subVW(z, x []Word, y Word) (c Word) { + // TODO: remove indirect function call when golang.org/issue/30548 is fixed + fn := subVW_g + if len(z) > 32 { + fn = subVWlarge + } + return fn(z, x, y) +} + +func shlVU(z, x []Word, s uint) (c Word) { + return shlVU_g(z, x, s) +} + +func shrVU(z, x []Word, s uint) (c Word) { + return shrVU_g(z, x, s) +} + +func mulAddVWW(z, x []Word, y, r Word) (c Word) { + return mulAddVWW_g(z, x, y, r) +} + +func addMulVVW(z, x []Word, y Word) (c Word) { + return addMulVVW_g(z, x, y) +} diff --git a/src/math/big/arith_decl_s390x.go b/src/math/big/arith_decl_s390x.go new file mode 100644 index 0000000..4193f32 --- /dev/null +++ b/src/math/big/arith_decl_s390x.go @@ -0,0 +1,19 @@ +// Copyright 2016 The Go Authors. All rights reserved. +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. + +//go:build !math_big_pure_go +// +build !math_big_pure_go + +package big + +import "internal/cpu" + +func addVV_check(z, x, y []Word) (c Word) +func addVV_vec(z, x, y []Word) (c Word) +func addVV_novec(z, x, y []Word) (c Word) +func subVV_check(z, x, y []Word) (c Word) +func subVV_vec(z, x, y []Word) (c Word) +func subVV_novec(z, x, y []Word) (c Word) + +var hasVX = cpu.S390X.HasVX diff --git a/src/math/big/arith_mips64x.s b/src/math/big/arith_mips64x.s new file mode 100644 index 0000000..4b5c502 --- /dev/null +++ b/src/math/big/arith_mips64x.s @@ -0,0 +1,40 @@ +// Copyright 2013 The Go Authors. All rights reserved. +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. + +//go:build !math_big_pure_go && (mips64 || mips64le) +// +build !math_big_pure_go +// +build mips64 mips64le + +#include "textflag.h" + +// This file provides fast assembly versions for the elementary +// arithmetic operations on vectors implemented in arith.go. + +TEXT ·mulWW(SB),NOSPLIT,$0 + JMP ·mulWW_g(SB) + +TEXT ·addVV(SB),NOSPLIT,$0 + JMP ·addVV_g(SB) + +TEXT ·subVV(SB),NOSPLIT,$0 + JMP ·subVV_g(SB) + +TEXT ·addVW(SB),NOSPLIT,$0 + JMP ·addVW_g(SB) + +TEXT ·subVW(SB),NOSPLIT,$0 + JMP ·subVW_g(SB) + +TEXT ·shlVU(SB),NOSPLIT,$0 + JMP ·shlVU_g(SB) + +TEXT ·shrVU(SB),NOSPLIT,$0 + JMP ·shrVU_g(SB) + +TEXT ·mulAddVWW(SB),NOSPLIT,$0 + JMP ·mulAddVWW_g(SB) + +TEXT ·addMulVVW(SB),NOSPLIT,$0 + JMP ·addMulVVW_g(SB) + diff --git a/src/math/big/arith_mipsx.s b/src/math/big/arith_mipsx.s new file mode 100644 index 0000000..e72e6d6 --- /dev/null +++ b/src/math/big/arith_mipsx.s @@ -0,0 +1,40 @@ +// Copyright 2016 The Go Authors. All rights reserved. +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. + +//go:build !math_big_pure_go && (mips || mipsle) +// +build !math_big_pure_go +// +build mips mipsle + +#include "textflag.h" + +// This file provides fast assembly versions for the elementary +// arithmetic operations on vectors implemented in arith.go. + +TEXT ·mulWW(SB),NOSPLIT,$0 + JMP ·mulWW_g(SB) + +TEXT ·addVV(SB),NOSPLIT,$0 + JMP ·addVV_g(SB) + +TEXT ·subVV(SB),NOSPLIT,$0 + JMP ·subVV_g(SB) + +TEXT ·addVW(SB),NOSPLIT,$0 + JMP ·addVW_g(SB) + +TEXT ·subVW(SB),NOSPLIT,$0 + JMP ·subVW_g(SB) + +TEXT ·shlVU(SB),NOSPLIT,$0 + JMP ·shlVU_g(SB) + +TEXT ·shrVU(SB),NOSPLIT,$0 + JMP ·shrVU_g(SB) + +TEXT ·mulAddVWW(SB),NOSPLIT,$0 + JMP ·mulAddVWW_g(SB) + +TEXT ·addMulVVW(SB),NOSPLIT,$0 + JMP ·addMulVVW_g(SB) + diff --git a/src/math/big/arith_ppc64x.s b/src/math/big/arith_ppc64x.s new file mode 100644 index 0000000..68c6286 --- /dev/null +++ b/src/math/big/arith_ppc64x.s @@ -0,0 +1,483 @@ +// Copyright 2013 The Go Authors. All rights reserved. +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. + +//go:build !math_big_pure_go && (ppc64 || ppc64le) +// +build !math_big_pure_go +// +build ppc64 ppc64le + +#include "textflag.h" + +// This file provides fast assembly versions for the elementary +// arithmetic operations on vectors implemented in arith.go. + +// func mulWW(x, y Word) (z1, z0 Word) +TEXT ·mulWW(SB), NOSPLIT, $0 + MOVD x+0(FP), R4 + MOVD y+8(FP), R5 + MULHDU R4, R5, R6 + MULLD R4, R5, R7 + MOVD R6, z1+16(FP) + MOVD R7, z0+24(FP) + RET + +// func addVV(z, y, y []Word) (c Word) +// z[i] = x[i] + y[i] for all i, carrying +TEXT ·addVV(SB), NOSPLIT, $0 + MOVD z_len+8(FP), R7 // R7 = z_len + MOVD x+24(FP), R8 // R8 = x[] + MOVD y+48(FP), R9 // R9 = y[] + MOVD z+0(FP), R10 // R10 = z[] + + // If z_len = 0, we are done + CMP R0, R7 + MOVD R0, R4 + BEQ done + + // Process the first iteration out of the loop so we can + // use MOVDU and avoid 3 index registers updates. + MOVD 0(R8), R11 // R11 = x[i] + MOVD 0(R9), R12 // R12 = y[i] + ADD $-1, R7 // R7 = z_len - 1 + ADDC R12, R11, R15 // R15 = x[i] + y[i], set CA + CMP R0, R7 + MOVD R15, 0(R10) // z[i] + BEQ final // If z_len was 1, we are done + + SRD $2, R7, R5 // R5 = z_len/4 + CMP R0, R5 + MOVD R5, CTR // Set up loop counter + BEQ tail // If R5 = 0, we can't use the loop + + // Process 4 elements per iteration. Unrolling this loop + // means a performance trade-off: we will lose performance + // for small values of z_len (0.90x in the worst case), but + // gain significant performance as z_len increases (up to + // 1.45x). +loop: + MOVD 8(R8), R11 // R11 = x[i] + MOVD 16(R8), R12 // R12 = x[i+1] + MOVD 24(R8), R14 // R14 = x[i+2] + MOVDU 32(R8), R15 // R15 = x[i+3] + MOVD 8(R9), R16 // R16 = y[i] + MOVD 16(R9), R17 // R17 = y[i+1] + MOVD 24(R9), R18 // R18 = y[i+2] + MOVDU 32(R9), R19 // R19 = y[i+3] + ADDE R11, R16, R20 // R20 = x[i] + y[i] + CA + ADDE R12, R17, R21 // R21 = x[i+1] + y[i+1] + CA + ADDE R14, R18, R22 // R22 = x[i+2] + y[i+2] + CA + ADDE R15, R19, R23 // R23 = x[i+3] + y[i+3] + CA + MOVD R20, 8(R10) // z[i] + MOVD R21, 16(R10) // z[i+1] + MOVD R22, 24(R10) // z[i+2] + MOVDU R23, 32(R10) // z[i+3] + ADD $-4, R7 // R7 = z_len - 4 + BC 16, 0, loop // bdnz + + // We may have more elements to read + CMP R0, R7 + BEQ final + + // Process the remaining elements, one at a time +tail: + MOVDU 8(R8), R11 // R11 = x[i] + MOVDU 8(R9), R16 // R16 = y[i] + ADD $-1, R7 // R7 = z_len - 1 + ADDE R11, R16, R20 // R20 = x[i] + y[i] + CA + CMP R0, R7 + MOVDU R20, 8(R10) // z[i] + BEQ final // If R7 = 0, we are done + + MOVDU 8(R8), R11 + MOVDU 8(R9), R16 + ADD $-1, R7 + ADDE R11, R16, R20 + CMP R0, R7 + MOVDU R20, 8(R10) + BEQ final + + MOVD 8(R8), R11 + MOVD 8(R9), R16 + ADDE R11, R16, R20 + MOVD R20, 8(R10) + +final: + ADDZE R4 // Capture CA + +done: + MOVD R4, c+72(FP) + RET + +// func subVV(z, x, y []Word) (c Word) +// z[i] = x[i] - y[i] for all i, carrying +TEXT ·subVV(SB), NOSPLIT, $0 + MOVD z_len+8(FP), R7 // R7 = z_len + MOVD x+24(FP), R8 // R8 = x[] + MOVD y+48(FP), R9 // R9 = y[] + MOVD z+0(FP), R10 // R10 = z[] + + // If z_len = 0, we are done + CMP R0, R7 + MOVD R0, R4 + BEQ done + + // Process the first iteration out of the loop so we can + // use MOVDU and avoid 3 index registers updates. + MOVD 0(R8), R11 // R11 = x[i] + MOVD 0(R9), R12 // R12 = y[i] + ADD $-1, R7 // R7 = z_len - 1 + SUBC R12, R11, R15 // R15 = x[i] - y[i], set CA + CMP R0, R7 + MOVD R15, 0(R10) // z[i] + BEQ final // If z_len was 1, we are done + + SRD $2, R7, R5 // R5 = z_len/4 + CMP R0, R5 + MOVD R5, CTR // Set up loop counter + BEQ tail // If R5 = 0, we can't use the loop + + // Process 4 elements per iteration. Unrolling this loop + // means a performance trade-off: we will lose performance + // for small values of z_len (0.92x in the worst case), but + // gain significant performance as z_len increases (up to + // 1.45x). +loop: + MOVD 8(R8), R11 // R11 = x[i] + MOVD 16(R8), R12 // R12 = x[i+1] + MOVD 24(R8), R14 // R14 = x[i+2] + MOVDU 32(R8), R15 // R15 = x[i+3] + MOVD 8(R9), R16 // R16 = y[i] + MOVD 16(R9), R17 // R17 = y[i+1] + MOVD 24(R9), R18 // R18 = y[i+2] + MOVDU 32(R9), R19 // R19 = y[i+3] + SUBE R16, R11, R20 // R20 = x[i] - y[i] + CA + SUBE R17, R12, R21 // R21 = x[i+1] - y[i+1] + CA + SUBE R18, R14, R22 // R22 = x[i+2] - y[i+2] + CA + SUBE R19, R15, R23 // R23 = x[i+3] - y[i+3] + CA + MOVD R20, 8(R10) // z[i] + MOVD R21, 16(R10) // z[i+1] + MOVD R22, 24(R10) // z[i+2] + MOVDU R23, 32(R10) // z[i+3] + ADD $-4, R7 // R7 = z_len - 4 + BC 16, 0, loop // bdnz + + // We may have more elements to read + CMP R0, R7 + BEQ final + + // Process the remaining elements, one at a time +tail: + MOVDU 8(R8), R11 // R11 = x[i] + MOVDU 8(R9), R16 // R16 = y[i] + ADD $-1, R7 // R7 = z_len - 1 + SUBE R16, R11, R20 // R20 = x[i] - y[i] + CA + CMP R0, R7 + MOVDU R20, 8(R10) // z[i] + BEQ final // If R7 = 0, we are done + + MOVDU 8(R8), R11 + MOVDU 8(R9), R16 + ADD $-1, R7 + SUBE R16, R11, R20 + CMP R0, R7 + MOVDU R20, 8(R10) + BEQ final + + MOVD 8(R8), R11 + MOVD 8(R9), R16 + SUBE R16, R11, R20 + MOVD R20, 8(R10) + +final: + ADDZE R4 + XOR $1, R4 + +done: + MOVD R4, c+72(FP) + RET + +// func addVW(z, x []Word, y Word) (c Word) +TEXT ·addVW(SB), NOSPLIT, $0 + MOVD z+0(FP), R10 // R10 = z[] + MOVD x+24(FP), R8 // R8 = x[] + MOVD y+48(FP), R4 // R4 = y = c + MOVD z_len+8(FP), R11 // R11 = z_len + + CMP R0, R11 // If z_len is zero, return + BEQ done + + // We will process the first iteration out of the loop so we capture + // the value of c. In the subsequent iterations, we will rely on the + // value of CA set here. + MOVD 0(R8), R20 // R20 = x[i] + ADD $-1, R11 // R11 = z_len - 1 + ADDC R20, R4, R6 // R6 = x[i] + c + CMP R0, R11 // If z_len was 1, we are done + MOVD R6, 0(R10) // z[i] + BEQ final + + // We will read 4 elements per iteration + SRD $2, R11, R9 // R9 = z_len/4 + DCBT (R8) + CMP R0, R9 + MOVD R9, CTR // Set up the loop counter + BEQ tail // If R9 = 0, we can't use the loop + +loop: + MOVD 8(R8), R20 // R20 = x[i] + MOVD 16(R8), R21 // R21 = x[i+1] + MOVD 24(R8), R22 // R22 = x[i+2] + MOVDU 32(R8), R23 // R23 = x[i+3] + ADDZE R20, R24 // R24 = x[i] + CA + ADDZE R21, R25 // R25 = x[i+1] + CA + ADDZE R22, R26 // R26 = x[i+2] + CA + ADDZE R23, R27 // R27 = x[i+3] + CA + MOVD R24, 8(R10) // z[i] + MOVD R25, 16(R10) // z[i+1] + MOVD R26, 24(R10) // z[i+2] + MOVDU R27, 32(R10) // z[i+3] + ADD $-4, R11 // R11 = z_len - 4 + BC 16, 0, loop // bdnz + + // We may have some elements to read + CMP R0, R11 + BEQ final + +tail: + MOVDU 8(R8), R20 + ADDZE R20, R24 + ADD $-1, R11 + MOVDU R24, 8(R10) + CMP R0, R11 + BEQ final + + MOVDU 8(R8), R20 + ADDZE R20, R24 + ADD $-1, R11 + MOVDU R24, 8(R10) + CMP R0, R11 + BEQ final + + MOVD 8(R8), R20 + ADDZE R20, R24 + MOVD R24, 8(R10) + +final: + ADDZE R0, R4 // c = CA +done: + MOVD R4, c+56(FP) + RET + +// func subVW(z, x []Word, y Word) (c Word) +TEXT ·subVW(SB), NOSPLIT, $0 + MOVD z+0(FP), R10 // R10 = z[] + MOVD x+24(FP), R8 // R8 = x[] + MOVD y+48(FP), R4 // R4 = y = c + MOVD z_len+8(FP), R11 // R11 = z_len + + CMP R0, R11 // If z_len is zero, return + BEQ done + + // We will process the first iteration out of the loop so we capture + // the value of c. In the subsequent iterations, we will rely on the + // value of CA set here. + MOVD 0(R8), R20 // R20 = x[i] + ADD $-1, R11 // R11 = z_len - 1 + SUBC R4, R20, R6 // R6 = x[i] - c + CMP R0, R11 // If z_len was 1, we are done + MOVD R6, 0(R10) // z[i] + BEQ final + + // We will read 4 elements per iteration + SRD $2, R11, R9 // R9 = z_len/4 + DCBT (R8) + CMP R0, R9 + MOVD R9, CTR // Set up the loop counter + BEQ tail // If R9 = 0, we can't use the loop + + // The loop here is almost the same as the one used in s390x, but + // we don't need to capture CA every iteration because we've already + // done that above. +loop: + MOVD 8(R8), R20 + MOVD 16(R8), R21 + MOVD 24(R8), R22 + MOVDU 32(R8), R23 + SUBE R0, R20 + SUBE R0, R21 + SUBE R0, R22 + SUBE R0, R23 + MOVD R20, 8(R10) + MOVD R21, 16(R10) + MOVD R22, 24(R10) + MOVDU R23, 32(R10) + ADD $-4, R11 + BC 16, 0, loop // bdnz + + // We may have some elements to read + CMP R0, R11 + BEQ final + +tail: + MOVDU 8(R8), R20 + SUBE R0, R20 + ADD $-1, R11 + MOVDU R20, 8(R10) + CMP R0, R11 + BEQ final + + MOVDU 8(R8), R20 + SUBE R0, R20 + ADD $-1, R11 + MOVDU R20, 8(R10) + CMP R0, R11 + BEQ final + + MOVD 8(R8), R20 + SUBE R0, R20 + MOVD R20, 8(R10) + +final: + // Capture CA + SUBE R4, R4 + NEG R4, R4 + +done: + MOVD R4, c+56(FP) + RET + +TEXT ·shlVU(SB), NOSPLIT, $0 + BR ·shlVU_g(SB) + +TEXT ·shrVU(SB), NOSPLIT, $0 + BR ·shrVU_g(SB) + +// func mulAddVWW(z, x []Word, y, r Word) (c Word) +TEXT ·mulAddVWW(SB), NOSPLIT, $0 + MOVD z+0(FP), R10 // R10 = z[] + MOVD x+24(FP), R8 // R8 = x[] + MOVD y+48(FP), R9 // R9 = y + MOVD r+56(FP), R4 // R4 = r = c + MOVD z_len+8(FP), R11 // R11 = z_len + + CMP R0, R11 + BEQ done + + MOVD 0(R8), R20 + ADD $-1, R11 + MULLD R9, R20, R6 // R6 = z0 = Low-order(x[i]*y) + MULHDU R9, R20, R7 // R7 = z1 = High-order(x[i]*y) + ADDC R4, R6 // R6 = z0 + r + ADDZE R7 // R7 = z1 + CA + CMP R0, R11 + MOVD R7, R4 // R4 = c + MOVD R6, 0(R10) // z[i] + BEQ done + + // We will read 4 elements per iteration + SRD $2, R11, R14 // R14 = z_len/4 + DCBT (R8) + CMP R0, R14 + MOVD R14, CTR // Set up the loop counter + BEQ tail // If R9 = 0, we can't use the loop + +loop: + MOVD 8(R8), R20 // R20 = x[i] + MOVD 16(R8), R21 // R21 = x[i+1] + MOVD 24(R8), R22 // R22 = x[i+2] + MOVDU 32(R8), R23 // R23 = x[i+3] + MULLD R9, R20, R24 // R24 = z0[i] + MULHDU R9, R20, R20 // R20 = z1[i] + ADDC R4, R24 // R24 = z0[i] + c + ADDZE R20 // R7 = z1[i] + CA + MULLD R9, R21, R25 + MULHDU R9, R21, R21 + ADDC R20, R25 + ADDZE R21 + MULLD R9, R22, R26 + MULHDU R9, R22, R22 + MULLD R9, R23, R27 + MULHDU R9, R23, R23 + ADDC R21, R26 + ADDZE R22 + MOVD R24, 8(R10) // z[i] + MOVD R25, 16(R10) // z[i+1] + ADDC R22, R27 + ADDZE R23,R4 // update carry + MOVD R26, 24(R10) // z[i+2] + MOVDU R27, 32(R10) // z[i+3] + ADD $-4, R11 // R11 = z_len - 4 + BC 16, 0, loop // bdnz + + // We may have some elements to read + CMP R0, R11 + BEQ done + + // Process the remaining elements, one at a time +tail: + MOVDU 8(R8), R20 // R20 = x[i] + MULLD R9, R20, R24 // R24 = z0[i] + MULHDU R9, R20, R25 // R25 = z1[i] + ADD $-1, R11 // R11 = z_len - 1 + ADDC R4, R24 + ADDZE R25 + MOVDU R24, 8(R10) // z[i] + CMP R0, R11 + MOVD R25, R4 // R4 = c + BEQ done // If R11 = 0, we are done + + MOVDU 8(R8), R20 + MULLD R9, R20, R24 + MULHDU R9, R20, R25 + ADD $-1, R11 + ADDC R4, R24 + ADDZE R25 + MOVDU R24, 8(R10) + CMP R0, R11 + MOVD R25, R4 + BEQ done + + MOVD 8(R8), R20 + MULLD R9, R20, R24 + MULHDU R9, R20, R25 + ADD $-1, R11 + ADDC R4, R24 + ADDZE R25 + MOVD R24, 8(R10) + MOVD R25, R4 + +done: + MOVD R4, c+64(FP) + RET + +// func addMulVVW(z, x []Word, y Word) (c Word) +TEXT ·addMulVVW(SB), NOSPLIT, $0 + MOVD z+0(FP), R10 // R10 = z[] + MOVD x+24(FP), R8 // R8 = x[] + MOVD y+48(FP), R9 // R9 = y + MOVD z_len+8(FP), R22 // R22 = z_len + + MOVD R0, R3 // R3 will be the index register + CMP R0, R22 + MOVD R0, R4 // R4 = c = 0 + MOVD R22, CTR // Initialize loop counter + BEQ done + +loop: + MOVD (R8)(R3), R20 // Load x[i] + MOVD (R10)(R3), R21 // Load z[i] + MULLD R9, R20, R6 // R6 = Low-order(x[i]*y) + MULHDU R9, R20, R7 // R7 = High-order(x[i]*y) + ADDC R21, R6 // R6 = z0 + ADDZE R7 // R7 = z1 + ADDC R4, R6 // R6 = z0 + c + 0 + ADDZE R7, R4 // c += z1 + MOVD R6, (R10)(R3) // Store z[i] + ADD $8, R3 + BC 16, 0, loop // bdnz + +done: + MOVD R4, c+56(FP) + RET + + diff --git a/src/math/big/arith_riscv64.s b/src/math/big/arith_riscv64.s new file mode 100644 index 0000000..2e950dd --- /dev/null +++ b/src/math/big/arith_riscv64.s @@ -0,0 +1,47 @@ +// Copyright 2020 The Go Authors. All rights reserved. +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. + +//go:build !math_big_pure_go && riscv64 +// +build !math_big_pure_go,riscv64 + +#include "textflag.h" + +// This file provides fast assembly versions for the elementary +// arithmetic operations on vectors implemented in arith.go. + +// func mulWW(x, y Word) (z1, z0 Word) +TEXT ·mulWW(SB),NOSPLIT,$0 + MOV x+0(FP), X5 + MOV y+8(FP), X6 + MULHU X5, X6, X7 + MUL X5, X6, X8 + MOV X7, z1+16(FP) + MOV X8, z0+24(FP) + RET + + +TEXT ·addVV(SB),NOSPLIT,$0 + JMP ·addVV_g(SB) + +TEXT ·subVV(SB),NOSPLIT,$0 + JMP ·subVV_g(SB) + +TEXT ·addVW(SB),NOSPLIT,$0 + JMP ·addVW_g(SB) + +TEXT ·subVW(SB),NOSPLIT,$0 + JMP ·subVW_g(SB) + +TEXT ·shlVU(SB),NOSPLIT,$0 + JMP ·shlVU_g(SB) + +TEXT ·shrVU(SB),NOSPLIT,$0 + JMP ·shrVU_g(SB) + +TEXT ·mulAddVWW(SB),NOSPLIT,$0 + JMP ·mulAddVWW_g(SB) + +TEXT ·addMulVVW(SB),NOSPLIT,$0 + JMP ·addMulVVW_g(SB) + diff --git a/src/math/big/arith_s390x.s b/src/math/big/arith_s390x.s new file mode 100644 index 0000000..ad822f7 --- /dev/null +++ b/src/math/big/arith_s390x.s @@ -0,0 +1,796 @@ +// Copyright 2016 The Go Authors. All rights reserved. +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. + +//go:build !math_big_pure_go +// +build !math_big_pure_go + +#include "textflag.h" + +// This file provides fast assembly versions for the elementary +// arithmetic operations on vectors implemented in arith.go. + +TEXT ·mulWW(SB), NOSPLIT, $0 + MOVD x+0(FP), R3 + MOVD y+8(FP), R4 + MULHDU R3, R4 + MOVD R10, z1+16(FP) + MOVD R11, z0+24(FP) + RET + + +// DI = R3, CX = R4, SI = r10, r8 = r8, r9=r9, r10 = r2, r11 = r5, r12 = r6, r13 = r7, r14 = r1 (R0 set to 0) + use R11 +// func addVV(z, x, y []Word) (c Word) + +TEXT ·addVV(SB), NOSPLIT, $0 + MOVD addvectorfacility+0x00(SB), R1 + BR (R1) + +TEXT ·addVV_check(SB), NOSPLIT, $0 + MOVB ·hasVX(SB), R1 + CMPBEQ R1, $1, vectorimpl // vectorfacility = 1, vector supported + MOVD $addvectorfacility+0x00(SB), R1 + MOVD $·addVV_novec(SB), R2 + MOVD R2, 0(R1) + + // MOVD $·addVV_novec(SB), 0(R1) + BR ·addVV_novec(SB) + +vectorimpl: + MOVD $addvectorfacility+0x00(SB), R1 + MOVD $·addVV_vec(SB), R2 + MOVD R2, 0(R1) + + // MOVD $·addVV_vec(SB), 0(R1) + BR ·addVV_vec(SB) + +GLOBL addvectorfacility+0x00(SB), NOPTR, $8 +DATA addvectorfacility+0x00(SB)/8, $·addVV_check(SB) + +TEXT ·addVV_vec(SB), NOSPLIT, $0 + MOVD z_len+8(FP), R3 + MOVD x+24(FP), R8 + MOVD y+48(FP), R9 + MOVD z+0(FP), R2 + + MOVD $0, R4 // c = 0 + MOVD $0, R0 // make sure it's zero + MOVD $0, R10 // i = 0 + + // s/JL/JMP/ below to disable the unrolled loop + SUB $4, R3 + BLT v1 + SUB $12, R3 // n -= 16 + BLT A1 // if n < 0 goto A1 + + MOVD R8, R5 + MOVD R9, R6 + MOVD R2, R7 + + // n >= 0 + // regular loop body unrolled 16x + VZERO V0 // c = 0 + +UU1: + VLM 0(R5), V1, V4 // 64-bytes into V1..V8 + ADD $64, R5 + VPDI $0x4, V1, V1, V1 // flip the doublewords to big-endian order + VPDI $0x4, V2, V2, V2 // flip the doublewords to big-endian order + + VLM 0(R6), V9, V12 // 64-bytes into V9..V16 + ADD $64, R6 + VPDI $0x4, V9, V9, V9 // flip the doublewords to big-endian order + VPDI $0x4, V10, V10, V10 // flip the doublewords to big-endian order + + VACCCQ V1, V9, V0, V25 + VACQ V1, V9, V0, V17 + VACCCQ V2, V10, V25, V26 + VACQ V2, V10, V25, V18 + + VLM 0(R5), V5, V6 // 32-bytes into V1..V8 + VLM 0(R6), V13, V14 // 32-bytes into V9..V16 + ADD $32, R5 + ADD $32, R6 + + VPDI $0x4, V3, V3, V3 // flip the doublewords to big-endian order + VPDI $0x4, V4, V4, V4 // flip the doublewords to big-endian order + VPDI $0x4, V11, V11, V11 // flip the doublewords to big-endian order + VPDI $0x4, V12, V12, V12 // flip the doublewords to big-endian order + + VACCCQ V3, V11, V26, V27 + VACQ V3, V11, V26, V19 + VACCCQ V4, V12, V27, V28 + VACQ V4, V12, V27, V20 + + VLM 0(R5), V7, V8 // 32-bytes into V1..V8 + VLM 0(R6), V15, V16 // 32-bytes into V9..V16 + ADD $32, R5 + ADD $32, R6 + + VPDI $0x4, V5, V5, V5 // flip the doublewords to big-endian order + VPDI $0x4, V6, V6, V6 // flip the doublewords to big-endian order + VPDI $0x4, V13, V13, V13 // flip the doublewords to big-endian order + VPDI $0x4, V14, V14, V14 // flip the doublewords to big-endian order + + VACCCQ V5, V13, V28, V29 + VACQ V5, V13, V28, V21 + VACCCQ V6, V14, V29, V30 + VACQ V6, V14, V29, V22 + + VPDI $0x4, V7, V7, V7 // flip the doublewords to big-endian order + VPDI $0x4, V8, V8, V8 // flip the doublewords to big-endian order + VPDI $0x4, V15, V15, V15 // flip the doublewords to big-endian order + VPDI $0x4, V16, V16, V16 // flip the doublewords to big-endian order + + VACCCQ V7, V15, V30, V31 + VACQ V7, V15, V30, V23 + VACCCQ V8, V16, V31, V0 // V0 has carry-over + VACQ V8, V16, V31, V24 + + VPDI $0x4, V17, V17, V17 // flip the doublewords to big-endian order + VPDI $0x4, V18, V18, V18 // flip the doublewords to big-endian order + VPDI $0x4, V19, V19, V19 // flip the doublewords to big-endian order + VPDI $0x4, V20, V20, V20 // flip the doublewords to big-endian order + VPDI $0x4, V21, V21, V21 // flip the doublewords to big-endian order + VPDI $0x4, V22, V22, V22 // flip the doublewords to big-endian order + VPDI $0x4, V23, V23, V23 // flip the doublewords to big-endian order + VPDI $0x4, V24, V24, V24 // flip the doublewords to big-endian order + VSTM V17, V24, 0(R7) // 128-bytes into z + ADD $128, R7 + ADD $128, R10 // i += 16 + SUB $16, R3 // n -= 16 + BGE UU1 // if n >= 0 goto U1 + VLGVG $1, V0, R4 // put cf into R4 + NEG R4, R4 // save cf + +A1: + ADD $12, R3 // n += 16 + + // s/JL/JMP/ below to disable the unrolled loop + BLT v1 // if n < 0 goto v1 + +U1: // n >= 0 + // regular loop body unrolled 4x + MOVD 0(R8)(R10*1), R5 + MOVD 8(R8)(R10*1), R6 + MOVD 16(R8)(R10*1), R7 + MOVD 24(R8)(R10*1), R1 + ADDC R4, R4 // restore CF + MOVD 0(R9)(R10*1), R11 + ADDE R11, R5 + MOVD 8(R9)(R10*1), R11 + ADDE R11, R6 + MOVD 16(R9)(R10*1), R11 + ADDE R11, R7 + MOVD 24(R9)(R10*1), R11 + ADDE R11, R1 + MOVD R0, R4 + ADDE R4, R4 // save CF + NEG R4, R4 + MOVD R5, 0(R2)(R10*1) + MOVD R6, 8(R2)(R10*1) + MOVD R7, 16(R2)(R10*1) + MOVD R1, 24(R2)(R10*1) + + ADD $32, R10 // i += 4 + SUB $4, R3 // n -= 4 + BGE U1 // if n >= 0 goto U1 + +v1: + ADD $4, R3 // n += 4 + BLE E1 // if n <= 0 goto E1 + +L1: // n > 0 + ADDC R4, R4 // restore CF + MOVD 0(R8)(R10*1), R5 + MOVD 0(R9)(R10*1), R11 + ADDE R11, R5 + MOVD R5, 0(R2)(R10*1) + MOVD R0, R4 + ADDE R4, R4 // save CF + NEG R4, R4 + + ADD $8, R10 // i++ + SUB $1, R3 // n-- + BGT L1 // if n > 0 goto L1 + +E1: + NEG R4, R4 + MOVD R4, c+72(FP) // return c + RET + +TEXT ·addVV_novec(SB), NOSPLIT, $0 +novec: + MOVD z_len+8(FP), R3 + MOVD x+24(FP), R8 + MOVD y+48(FP), R9 + MOVD z+0(FP), R2 + + MOVD $0, R4 // c = 0 + MOVD $0, R0 // make sure it's zero + MOVD $0, R10 // i = 0 + + // s/JL/JMP/ below to disable the unrolled loop + SUB $4, R3 // n -= 4 + BLT v1n // if n < 0 goto v1n + +U1n: // n >= 0 + // regular loop body unrolled 4x + MOVD 0(R8)(R10*1), R5 + MOVD 8(R8)(R10*1), R6 + MOVD 16(R8)(R10*1), R7 + MOVD 24(R8)(R10*1), R1 + ADDC R4, R4 // restore CF + MOVD 0(R9)(R10*1), R11 + ADDE R11, R5 + MOVD 8(R9)(R10*1), R11 + ADDE R11, R6 + MOVD 16(R9)(R10*1), R11 + ADDE R11, R7 + MOVD 24(R9)(R10*1), R11 + ADDE R11, R1 + MOVD R0, R4 + ADDE R4, R4 // save CF + NEG R4, R4 + MOVD R5, 0(R2)(R10*1) + MOVD R6, 8(R2)(R10*1) + MOVD R7, 16(R2)(R10*1) + MOVD R1, 24(R2)(R10*1) + + ADD $32, R10 // i += 4 + SUB $4, R3 // n -= 4 + BGE U1n // if n >= 0 goto U1n + +v1n: + ADD $4, R3 // n += 4 + BLE E1n // if n <= 0 goto E1n + +L1n: // n > 0 + ADDC R4, R4 // restore CF + MOVD 0(R8)(R10*1), R5 + MOVD 0(R9)(R10*1), R11 + ADDE R11, R5 + MOVD R5, 0(R2)(R10*1) + MOVD R0, R4 + ADDE R4, R4 // save CF + NEG R4, R4 + + ADD $8, R10 // i++ + SUB $1, R3 // n-- + BGT L1n // if n > 0 goto L1n + +E1n: + NEG R4, R4 + MOVD R4, c+72(FP) // return c + RET + +TEXT ·subVV(SB), NOSPLIT, $0 + MOVD subvectorfacility+0x00(SB), R1 + BR (R1) + +TEXT ·subVV_check(SB), NOSPLIT, $0 + MOVB ·hasVX(SB), R1 + CMPBEQ R1, $1, vectorimpl // vectorfacility = 1, vector supported + MOVD $subvectorfacility+0x00(SB), R1 + MOVD $·subVV_novec(SB), R2 + MOVD R2, 0(R1) + + // MOVD $·subVV_novec(SB), 0(R1) + BR ·subVV_novec(SB) + +vectorimpl: + MOVD $subvectorfacility+0x00(SB), R1 + MOVD $·subVV_vec(SB), R2 + MOVD R2, 0(R1) + + // MOVD $·subVV_vec(SB), 0(R1) + BR ·subVV_vec(SB) + +GLOBL subvectorfacility+0x00(SB), NOPTR, $8 +DATA subvectorfacility+0x00(SB)/8, $·subVV_check(SB) + +// DI = R3, CX = R4, SI = r10, r8 = r8, r9=r9, r10 = r2, r11 = r5, r12 = r6, r13 = r7, r14 = r1 (R0 set to 0) + use R11 +// func subVV(z, x, y []Word) (c Word) +// (same as addVV except for SUBC/SUBE instead of ADDC/ADDE and label names) +TEXT ·subVV_vec(SB), NOSPLIT, $0 + MOVD z_len+8(FP), R3 + MOVD x+24(FP), R8 + MOVD y+48(FP), R9 + MOVD z+0(FP), R2 + MOVD $0, R4 // c = 0 + MOVD $0, R0 // make sure it's zero + MOVD $0, R10 // i = 0 + + // s/JL/JMP/ below to disable the unrolled loop + SUB $4, R3 // n -= 4 + BLT v1 // if n < 0 goto v1 + SUB $12, R3 // n -= 16 + BLT A1 // if n < 0 goto A1 + + MOVD R8, R5 + MOVD R9, R6 + MOVD R2, R7 + + // n >= 0 + // regular loop body unrolled 16x + VZERO V0 // cf = 0 + MOVD $1, R4 // for 390 subtraction cf starts as 1 (no borrow) + VLVGG $1, R4, V0 // put carry into V0 + +UU1: + VLM 0(R5), V1, V4 // 64-bytes into V1..V8 + ADD $64, R5 + VPDI $0x4, V1, V1, V1 // flip the doublewords to big-endian order + VPDI $0x4, V2, V2, V2 // flip the doublewords to big-endian order + + VLM 0(R6), V9, V12 // 64-bytes into V9..V16 + ADD $64, R6 + VPDI $0x4, V9, V9, V9 // flip the doublewords to big-endian order + VPDI $0x4, V10, V10, V10 // flip the doublewords to big-endian order + + VSBCBIQ V1, V9, V0, V25 + VSBIQ V1, V9, V0, V17 + VSBCBIQ V2, V10, V25, V26 + VSBIQ V2, V10, V25, V18 + + VLM 0(R5), V5, V6 // 32-bytes into V1..V8 + VLM 0(R6), V13, V14 // 32-bytes into V9..V16 + ADD $32, R5 + ADD $32, R6 + + VPDI $0x4, V3, V3, V3 // flip the doublewords to big-endian order + VPDI $0x4, V4, V4, V4 // flip the doublewords to big-endian order + VPDI $0x4, V11, V11, V11 // flip the doublewords to big-endian order + VPDI $0x4, V12, V12, V12 // flip the doublewords to big-endian order + + VSBCBIQ V3, V11, V26, V27 + VSBIQ V3, V11, V26, V19 + VSBCBIQ V4, V12, V27, V28 + VSBIQ V4, V12, V27, V20 + + VLM 0(R5), V7, V8 // 32-bytes into V1..V8 + VLM 0(R6), V15, V16 // 32-bytes into V9..V16 + ADD $32, R5 + ADD $32, R6 + + VPDI $0x4, V5, V5, V5 // flip the doublewords to big-endian order + VPDI $0x4, V6, V6, V6 // flip the doublewords to big-endian order + VPDI $0x4, V13, V13, V13 // flip the doublewords to big-endian order + VPDI $0x4, V14, V14, V14 // flip the doublewords to big-endian order + + VSBCBIQ V5, V13, V28, V29 + VSBIQ V5, V13, V28, V21 + VSBCBIQ V6, V14, V29, V30 + VSBIQ V6, V14, V29, V22 + + VPDI $0x4, V7, V7, V7 // flip the doublewords to big-endian order + VPDI $0x4, V8, V8, V8 // flip the doublewords to big-endian order + VPDI $0x4, V15, V15, V15 // flip the doublewords to big-endian order + VPDI $0x4, V16, V16, V16 // flip the doublewords to big-endian order + + VSBCBIQ V7, V15, V30, V31 + VSBIQ V7, V15, V30, V23 + VSBCBIQ V8, V16, V31, V0 // V0 has carry-over + VSBIQ V8, V16, V31, V24 + + VPDI $0x4, V17, V17, V17 // flip the doublewords to big-endian order + VPDI $0x4, V18, V18, V18 // flip the doublewords to big-endian order + VPDI $0x4, V19, V19, V19 // flip the doublewords to big-endian order + VPDI $0x4, V20, V20, V20 // flip the doublewords to big-endian order + VPDI $0x4, V21, V21, V21 // flip the doublewords to big-endian order + VPDI $0x4, V22, V22, V22 // flip the doublewords to big-endian order + VPDI $0x4, V23, V23, V23 // flip the doublewords to big-endian order + VPDI $0x4, V24, V24, V24 // flip the doublewords to big-endian order + VSTM V17, V24, 0(R7) // 128-bytes into z + ADD $128, R7 + ADD $128, R10 // i += 16 + SUB $16, R3 // n -= 16 + BGE UU1 // if n >= 0 goto U1 + VLGVG $1, V0, R4 // put cf into R4 + SUB $1, R4 // save cf + +A1: + ADD $12, R3 // n += 16 + BLT v1 // if n < 0 goto v1 + +U1: // n >= 0 + // regular loop body unrolled 4x + MOVD 0(R8)(R10*1), R5 + MOVD 8(R8)(R10*1), R6 + MOVD 16(R8)(R10*1), R7 + MOVD 24(R8)(R10*1), R1 + MOVD R0, R11 + SUBC R4, R11 // restore CF + MOVD 0(R9)(R10*1), R11 + SUBE R11, R5 + MOVD 8(R9)(R10*1), R11 + SUBE R11, R6 + MOVD 16(R9)(R10*1), R11 + SUBE R11, R7 + MOVD 24(R9)(R10*1), R11 + SUBE R11, R1 + MOVD R0, R4 + SUBE R4, R4 // save CF + MOVD R5, 0(R2)(R10*1) + MOVD R6, 8(R2)(R10*1) + MOVD R7, 16(R2)(R10*1) + MOVD R1, 24(R2)(R10*1) + + ADD $32, R10 // i += 4 + SUB $4, R3 // n -= 4 + BGE U1 // if n >= 0 goto U1n + +v1: + ADD $4, R3 // n += 4 + BLE E1 // if n <= 0 goto E1 + +L1: // n > 0 + MOVD R0, R11 + SUBC R4, R11 // restore CF + MOVD 0(R8)(R10*1), R5 + MOVD 0(R9)(R10*1), R11 + SUBE R11, R5 + MOVD R5, 0(R2)(R10*1) + MOVD R0, R4 + SUBE R4, R4 // save CF + + ADD $8, R10 // i++ + SUB $1, R3 // n-- + BGT L1 // if n > 0 goto L1n + +E1: + NEG R4, R4 + MOVD R4, c+72(FP) // return c + RET + +// DI = R3, CX = R4, SI = r10, r8 = r8, r9=r9, r10 = r2, r11 = r5, r12 = r6, r13 = r7, r14 = r1 (R0 set to 0) + use R11 +// func subVV(z, x, y []Word) (c Word) +// (same as addVV except for SUBC/SUBE instead of ADDC/ADDE and label names) +TEXT ·subVV_novec(SB), NOSPLIT, $0 + MOVD z_len+8(FP), R3 + MOVD x+24(FP), R8 + MOVD y+48(FP), R9 + MOVD z+0(FP), R2 + + MOVD $0, R4 // c = 0 + MOVD $0, R0 // make sure it's zero + MOVD $0, R10 // i = 0 + + // s/JL/JMP/ below to disable the unrolled loop + SUB $4, R3 // n -= 4 + BLT v1 // if n < 0 goto v1 + +U1: // n >= 0 + // regular loop body unrolled 4x + MOVD 0(R8)(R10*1), R5 + MOVD 8(R8)(R10*1), R6 + MOVD 16(R8)(R10*1), R7 + MOVD 24(R8)(R10*1), R1 + MOVD R0, R11 + SUBC R4, R11 // restore CF + MOVD 0(R9)(R10*1), R11 + SUBE R11, R5 + MOVD 8(R9)(R10*1), R11 + SUBE R11, R6 + MOVD 16(R9)(R10*1), R11 + SUBE R11, R7 + MOVD 24(R9)(R10*1), R11 + SUBE R11, R1 + MOVD R0, R4 + SUBE R4, R4 // save CF + MOVD R5, 0(R2)(R10*1) + MOVD R6, 8(R2)(R10*1) + MOVD R7, 16(R2)(R10*1) + MOVD R1, 24(R2)(R10*1) + + ADD $32, R10 // i += 4 + SUB $4, R3 // n -= 4 + BGE U1 // if n >= 0 goto U1 + +v1: + ADD $4, R3 // n += 4 + BLE E1 // if n <= 0 goto E1 + +L1: // n > 0 + MOVD R0, R11 + SUBC R4, R11 // restore CF + MOVD 0(R8)(R10*1), R5 + MOVD 0(R9)(R10*1), R11 + SUBE R11, R5 + MOVD R5, 0(R2)(R10*1) + MOVD R0, R4 + SUBE R4, R4 // save CF + + ADD $8, R10 // i++ + SUB $1, R3 // n-- + BGT L1 // if n > 0 goto L1 + +E1: + NEG R4, R4 + MOVD R4, c+72(FP) // return c + RET + +TEXT ·addVW(SB), NOSPLIT, $0 + MOVD z_len+8(FP), R5 // length of z + MOVD x+24(FP), R6 + MOVD y+48(FP), R7 // c = y + MOVD z+0(FP), R8 + + CMPBEQ R5, $0, returnC // if len(z) == 0, we can have an early return + + // Add the first two words, and determine which path (copy path or loop path) to take based on the carry flag. + ADDC 0(R6), R7 + MOVD R7, 0(R8) + CMPBEQ R5, $1, returnResult // len(z) == 1 + MOVD $0, R9 + ADDE 8(R6), R9 + MOVD R9, 8(R8) + CMPBEQ R5, $2, returnResult // len(z) == 2 + + // Update the counters + MOVD $16, R12 // i = 2 + MOVD $-2(R5), R5 // n = n - 2 + +loopOverEachWord: + BRC $12, copySetup // carry = 0, copy the rest + MOVD $1, R9 + + // Originally we used the carry flag generated in the previous iteration + // (i.e: ADDE could be used here to do the addition). However, since we + // already know carry is 1 (otherwise we will go to copy section), we can use + // ADDC here so the current iteration does not depend on the carry flag + // generated in the previous iteration. This could be useful when branch prediction happens. + ADDC 0(R6)(R12*1), R9 + MOVD R9, 0(R8)(R12*1) // z[i] = x[i] + c + + MOVD $8(R12), R12 // i++ + BRCTG R5, loopOverEachWord // n-- + +// Return the current carry value +returnResult: + MOVD $0, R0 + ADDE R0, R0 + MOVD R0, c+56(FP) + RET + +// Update position of x(R6) and z(R8) based on the current counter value and perform copying. +// With the assumption that x and z will not overlap with each other or x and z will +// point to same memory region, we can use a faster version of copy using only MVC here. +// In the following implementation, we have three copy loops, each copying a word, 4 words, and +// 32 words at a time. Via benchmarking, this implementation is faster than calling runtime·memmove. +copySetup: + ADD R12, R6 + ADD R12, R8 + + CMPBGE R5, $4, mediumLoop + +smallLoop: // does a loop unrolling to copy word when n < 4 + CMPBEQ R5, $0, returnZero + MVC $8, 0(R6), 0(R8) + CMPBEQ R5, $1, returnZero + MVC $8, 8(R6), 8(R8) + CMPBEQ R5, $2, returnZero + MVC $8, 16(R6), 16(R8) + +returnZero: + MOVD $0, c+56(FP) // return 0 as carry + RET + +mediumLoop: + CMPBLT R5, $4, smallLoop + CMPBLT R5, $32, mediumLoopBody + +largeLoop: // Copying 256 bytes at a time. + MVC $256, 0(R6), 0(R8) + MOVD $256(R6), R6 + MOVD $256(R8), R8 + MOVD $-32(R5), R5 + CMPBGE R5, $32, largeLoop + BR mediumLoop + +mediumLoopBody: // Copying 32 bytes at a time + MVC $32, 0(R6), 0(R8) + MOVD $32(R6), R6 + MOVD $32(R8), R8 + MOVD $-4(R5), R5 + CMPBGE R5, $4, mediumLoopBody + BR smallLoop + +returnC: + MOVD R7, c+56(FP) + RET + +TEXT ·subVW(SB), NOSPLIT, $0 + MOVD z_len+8(FP), R5 + MOVD x+24(FP), R6 + MOVD y+48(FP), R7 // The borrow bit passed in + MOVD z+0(FP), R8 + MOVD $0, R0 // R0 is a temporary variable used during computation. Ensure it has zero in it. + + CMPBEQ R5, $0, returnC // len(z) == 0, have an early return + + // Subtract the first two words, and determine which path (copy path or loop path) to take based on the borrow flag + MOVD 0(R6), R9 + SUBC R7, R9 + MOVD R9, 0(R8) + CMPBEQ R5, $1, returnResult + MOVD 8(R6), R9 + SUBE R0, R9 + MOVD R9, 8(R8) + CMPBEQ R5, $2, returnResult + + // Update the counters + MOVD $16, R12 // i = 2 + MOVD $-2(R5), R5 // n = n - 2 + +loopOverEachWord: + BRC $3, copySetup // no borrow, copy the rest + MOVD 0(R6)(R12*1), R9 + + // Originally we used the borrow flag generated in the previous iteration + // (i.e: SUBE could be used here to do the subtraction). However, since we + // already know borrow is 1 (otherwise we will go to copy section), we can + // use SUBC here so the current iteration does not depend on the borrow flag + // generated in the previous iteration. This could be useful when branch prediction happens. + SUBC $1, R9 + MOVD R9, 0(R8)(R12*1) // z[i] = x[i] - 1 + + MOVD $8(R12), R12 // i++ + BRCTG R5, loopOverEachWord // n-- + +// return the current borrow value +returnResult: + SUBE R0, R0 + NEG R0, R0 + MOVD R0, c+56(FP) + RET + +// Update position of x(R6) and z(R8) based on the current counter value and perform copying. +// With the assumption that x and z will not overlap with each other or x and z will +// point to same memory region, we can use a faster version of copy using only MVC here. +// In the following implementation, we have three copy loops, each copying a word, 4 words, and +// 32 words at a time. Via benchmarking, this implementation is faster than calling runtime·memmove. +copySetup: + ADD R12, R6 + ADD R12, R8 + + CMPBGE R5, $4, mediumLoop + +smallLoop: // does a loop unrolling to copy word when n < 4 + CMPBEQ R5, $0, returnZero + MVC $8, 0(R6), 0(R8) + CMPBEQ R5, $1, returnZero + MVC $8, 8(R6), 8(R8) + CMPBEQ R5, $2, returnZero + MVC $8, 16(R6), 16(R8) + +returnZero: + MOVD $0, c+56(FP) // return 0 as borrow + RET + +mediumLoop: + CMPBLT R5, $4, smallLoop + CMPBLT R5, $32, mediumLoopBody + +largeLoop: // Copying 256 bytes at a time + MVC $256, 0(R6), 0(R8) + MOVD $256(R6), R6 + MOVD $256(R8), R8 + MOVD $-32(R5), R5 + CMPBGE R5, $32, largeLoop + BR mediumLoop + +mediumLoopBody: // Copying 32 bytes at a time + MVC $32, 0(R6), 0(R8) + MOVD $32(R6), R6 + MOVD $32(R8), R8 + MOVD $-4(R5), R5 + CMPBGE R5, $4, mediumLoopBody + BR smallLoop + +returnC: + MOVD R7, c+56(FP) + RET + +// func shlVU(z, x []Word, s uint) (c Word) +TEXT ·shlVU(SB), NOSPLIT, $0 + BR ·shlVU_g(SB) + +// func shrVU(z, x []Word, s uint) (c Word) +TEXT ·shrVU(SB), NOSPLIT, $0 + BR ·shrVU_g(SB) + +// CX = R4, r8 = r8, r9=r9, r10 = r2, r11 = r5, DX = r3, AX = r6, BX = R1, (R0 set to 0) + use R11 + use R7 for i +// func mulAddVWW(z, x []Word, y, r Word) (c Word) +TEXT ·mulAddVWW(SB), NOSPLIT, $0 + MOVD z+0(FP), R2 + MOVD x+24(FP), R8 + MOVD y+48(FP), R9 + MOVD r+56(FP), R4 // c = r + MOVD z_len+8(FP), R5 + MOVD $0, R1 // i = 0 + MOVD $0, R7 // i*8 = 0 + MOVD $0, R0 // make sure it's zero + BR E5 + +L5: + MOVD (R8)(R1*1), R6 + MULHDU R9, R6 + ADDC R4, R11 // add to low order bits + ADDE R0, R6 + MOVD R11, (R2)(R1*1) + MOVD R6, R4 + ADD $8, R1 // i*8 + 8 + ADD $1, R7 // i++ + +E5: + CMPBLT R7, R5, L5 // i < n + + MOVD R4, c+64(FP) + RET + +// func addMulVVW(z, x []Word, y Word) (c Word) +// CX = R4, r8 = r8, r9=r9, r10 = r2, r11 = r5, AX = r11, DX = R6, r12=r12, BX = R1, (R0 set to 0) + use R11 + use R7 for i +TEXT ·addMulVVW(SB), NOSPLIT, $0 + MOVD z+0(FP), R2 + MOVD x+24(FP), R8 + MOVD y+48(FP), R9 + MOVD z_len+8(FP), R5 + + MOVD $0, R1 // i*8 = 0 + MOVD $0, R7 // i = 0 + MOVD $0, R0 // make sure it's zero + MOVD $0, R4 // c = 0 + + MOVD R5, R12 + AND $-2, R12 + CMPBGE R5, $2, A6 + BR E6 + +A6: + MOVD (R8)(R1*1), R6 + MULHDU R9, R6 + MOVD (R2)(R1*1), R10 + ADDC R10, R11 // add to low order bits + ADDE R0, R6 + ADDC R4, R11 + ADDE R0, R6 + MOVD R6, R4 + MOVD R11, (R2)(R1*1) + + MOVD (8)(R8)(R1*1), R6 + MULHDU R9, R6 + MOVD (8)(R2)(R1*1), R10 + ADDC R10, R11 // add to low order bits + ADDE R0, R6 + ADDC R4, R11 + ADDE R0, R6 + MOVD R6, R4 + MOVD R11, (8)(R2)(R1*1) + + ADD $16, R1 // i*8 + 8 + ADD $2, R7 // i++ + + CMPBLT R7, R12, A6 + BR E6 + +L6: + MOVD (R8)(R1*1), R6 + MULHDU R9, R6 + MOVD (R2)(R1*1), R10 + ADDC R10, R11 // add to low order bits + ADDE R0, R6 + ADDC R4, R11 + ADDE R0, R6 + MOVD R6, R4 + MOVD R11, (R2)(R1*1) + + ADD $8, R1 // i*8 + 8 + ADD $1, R7 // i++ + +E6: + CMPBLT R7, R5, L6 // i < n + + MOVD R4, c+56(FP) + RET + diff --git a/src/math/big/arith_s390x_test.go b/src/math/big/arith_s390x_test.go new file mode 100644 index 0000000..8375ddb --- /dev/null +++ b/src/math/big/arith_s390x_test.go @@ -0,0 +1,33 @@ +// Copyright 2016 The Go Authors. All rights reserved. +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. + +//go:build s390x && !math_big_pure_go +// +build s390x,!math_big_pure_go + +package big + +import ( + "testing" +) + +// Tests whether the non vector routines are working, even when the tests are run on a +// vector-capable machine + +func TestFunVVnovec(t *testing.T) { + if hasVX == true { + for _, a := range sumVV { + arg := a + testFunVV(t, "addVV_novec", addVV_novec, arg) + + arg = argVV{a.z, a.y, a.x, a.c} + testFunVV(t, "addVV_novec symmetric", addVV_novec, arg) + + arg = argVV{a.x, a.z, a.y, a.c} + testFunVV(t, "subVV_novec", subVV_novec, arg) + + arg = argVV{a.y, a.z, a.x, a.c} + testFunVV(t, "subVV_novec symmetric", subVV_novec, arg) + } + } +} diff --git a/src/math/big/arith_test.go b/src/math/big/arith_test.go new file mode 100644 index 0000000..7b3427f --- /dev/null +++ b/src/math/big/arith_test.go @@ -0,0 +1,697 @@ +// Copyright 2009 The Go Authors. All rights reserved. +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. + +package big + +import ( + "fmt" + "internal/testenv" + "math/bits" + "math/rand" + "strings" + "testing" +) + +var isRaceBuilder = strings.HasSuffix(testenv.Builder(), "-race") + +type funVV func(z, x, y []Word) (c Word) +type argVV struct { + z, x, y nat + c Word +} + +var sumVV = []argVV{ + {}, + {nat{0}, nat{0}, nat{0}, 0}, + {nat{1}, nat{1}, nat{0}, 0}, + {nat{0}, nat{_M}, nat{1}, 1}, + {nat{80235}, nat{12345}, nat{67890}, 0}, + {nat{_M - 1}, nat{_M}, nat{_M}, 1}, + {nat{0, 0, 0, 0}, nat{_M, _M, _M, _M}, nat{1, 0, 0, 0}, 1}, + {nat{0, 0, 0, _M}, nat{_M, _M, _M, _M - 1}, nat{1, 0, 0, 0}, 0}, + {nat{0, 0, 0, 0}, nat{_M, 0, _M, 0}, nat{1, _M, 0, _M}, 1}, +} + +func testFunVV(t *testing.T, msg string, f funVV, a argVV) { + z := make(nat, len(a.z)) + c := f(z, a.x, a.y) + for i, zi := range z { + if zi != a.z[i] { + t.Errorf("%s%+v\n\tgot z[%d] = %#x; want %#x", msg, a, i, zi, a.z[i]) + break + } + } + if c != a.c { + t.Errorf("%s%+v\n\tgot c = %#x; want %#x", msg, a, c, a.c) + } +} + +func TestFunVV(t *testing.T) { + for _, a := range sumVV { + arg := a + testFunVV(t, "addVV_g", addVV_g, arg) + testFunVV(t, "addVV", addVV, arg) + + arg = argVV{a.z, a.y, a.x, a.c} + testFunVV(t, "addVV_g symmetric", addVV_g, arg) + testFunVV(t, "addVV symmetric", addVV, arg) + + arg = argVV{a.x, a.z, a.y, a.c} + testFunVV(t, "subVV_g", subVV_g, arg) + testFunVV(t, "subVV", subVV, arg) + + arg = argVV{a.y, a.z, a.x, a.c} + testFunVV(t, "subVV_g symmetric", subVV_g, arg) + testFunVV(t, "subVV symmetric", subVV, arg) + } +} + +// Always the same seed for reproducible results. +var rnd = rand.New(rand.NewSource(0)) + +func rndW() Word { + return Word(rnd.Int63()<<1 | rnd.Int63n(2)) +} + +func rndV(n int) []Word { + v := make([]Word, n) + for i := range v { + v[i] = rndW() + } + return v +} + +var benchSizes = []int{1, 2, 3, 4, 5, 1e1, 1e2, 1e3, 1e4, 1e5} + +func BenchmarkAddVV(b *testing.B) { + for _, n := range benchSizes { + if isRaceBuilder && n > 1e3 { + continue + } + x := rndV(n) + y := rndV(n) + z := make([]Word, n) + b.Run(fmt.Sprint(n), func(b *testing.B) { + b.SetBytes(int64(n * _W)) + for i := 0; i < b.N; i++ { + addVV(z, x, y) + } + }) + } +} + +func BenchmarkSubVV(b *testing.B) { + for _, n := range benchSizes { + if isRaceBuilder && n > 1e3 { + continue + } + x := rndV(n) + y := rndV(n) + z := make([]Word, n) + b.Run(fmt.Sprint(n), func(b *testing.B) { + b.SetBytes(int64(n * _W)) + for i := 0; i < b.N; i++ { + subVV(z, x, y) + } + }) + } +} + +type funVW func(z, x []Word, y Word) (c Word) +type argVW struct { + z, x nat + y Word + c Word +} + +var sumVW = []argVW{ + {}, + {nil, nil, 2, 2}, + {nat{0}, nat{0}, 0, 0}, + {nat{1}, nat{0}, 1, 0}, + {nat{1}, nat{1}, 0, 0}, + {nat{0}, nat{_M}, 1, 1}, + {nat{0, 0, 0, 0}, nat{_M, _M, _M, _M}, 1, 1}, + {nat{585}, nat{314}, 271, 0}, +} + +var lshVW = []argVW{ + {}, + {nat{0}, nat{0}, 0, 0}, + {nat{0}, nat{0}, 1, 0}, + {nat{0}, nat{0}, 20, 0}, + + {nat{_M}, nat{_M}, 0, 0}, + {nat{_M << 1 & _M}, nat{_M}, 1, 1}, + {nat{_M << 20 & _M}, nat{_M}, 20, _M >> (_W - 20)}, + + {nat{_M, _M, _M}, nat{_M, _M, _M}, 0, 0}, + {nat{_M << 1 & _M, _M, _M}, nat{_M, _M, _M}, 1, 1}, + {nat{_M << 20 & _M, _M, _M}, nat{_M, _M, _M}, 20, _M >> (_W - 20)}, +} + +var rshVW = []argVW{ + {}, + {nat{0}, nat{0}, 0, 0}, + {nat{0}, nat{0}, 1, 0}, + {nat{0}, nat{0}, 20, 0}, + + {nat{_M}, nat{_M}, 0, 0}, + {nat{_M >> 1}, nat{_M}, 1, _M << (_W - 1) & _M}, + {nat{_M >> 20}, nat{_M}, 20, _M << (_W - 20) & _M}, + + {nat{_M, _M, _M}, nat{_M, _M, _M}, 0, 0}, + {nat{_M, _M, _M >> 1}, nat{_M, _M, _M}, 1, _M << (_W - 1) & _M}, + {nat{_M, _M, _M >> 20}, nat{_M, _M, _M}, 20, _M << (_W - 20) & _M}, +} + +func testFunVW(t *testing.T, msg string, f funVW, a argVW) { + z := make(nat, len(a.z)) + c := f(z, a.x, a.y) + for i, zi := range z { + if zi != a.z[i] { + t.Errorf("%s%+v\n\tgot z[%d] = %#x; want %#x", msg, a, i, zi, a.z[i]) + break + } + } + if c != a.c { + t.Errorf("%s%+v\n\tgot c = %#x; want %#x", msg, a, c, a.c) + } +} + +func testFunVWext(t *testing.T, msg string, f funVW, f_g funVW, a argVW) { + // using the result of addVW_g/subVW_g as golden + z_g := make(nat, len(a.z)) + c_g := f_g(z_g, a.x, a.y) + c := f(a.z, a.x, a.y) + + for i, zi := range a.z { + if zi != z_g[i] { + t.Errorf("%s\n\tgot z[%d] = %#x; want %#x", msg, i, zi, z_g[i]) + break + } + } + if c != c_g { + t.Errorf("%s\n\tgot c = %#x; want %#x", msg, c, c_g) + } +} + +func makeFunVW(f func(z, x []Word, s uint) (c Word)) funVW { + return func(z, x []Word, s Word) (c Word) { + return f(z, x, uint(s)) + } +} + +func TestFunVW(t *testing.T) { + for _, a := range sumVW { + arg := a + testFunVW(t, "addVW_g", addVW_g, arg) + testFunVW(t, "addVW", addVW, arg) + + arg = argVW{a.x, a.z, a.y, a.c} + testFunVW(t, "subVW_g", subVW_g, arg) + testFunVW(t, "subVW", subVW, arg) + } + + shlVW_g := makeFunVW(shlVU_g) + shlVW := makeFunVW(shlVU) + for _, a := range lshVW { + arg := a + testFunVW(t, "shlVU_g", shlVW_g, arg) + testFunVW(t, "shlVU", shlVW, arg) + } + + shrVW_g := makeFunVW(shrVU_g) + shrVW := makeFunVW(shrVU) + for _, a := range rshVW { + arg := a + testFunVW(t, "shrVU_g", shrVW_g, arg) + testFunVW(t, "shrVU", shrVW, arg) + } +} + +// Construct a vector comprising the same word, usually '0' or 'maximum uint' +func makeWordVec(e Word, n int) []Word { + v := make([]Word, n) + for i := range v { + v[i] = e + } + return v +} + +// Extended testing to addVW and subVW using various kinds of input data. +// We utilize the results of addVW_g and subVW_g as golden reference to check +// correctness. +func TestFunVWExt(t *testing.T) { + // 32 is the current threshold that triggers an optimized version of + // calculation for large-sized vector, ensure we have sizes around it tested. + var vwSizes = []int{0, 1, 3, 4, 5, 8, 9, 23, 31, 32, 33, 34, 35, 36, 50, 120} + for _, n := range vwSizes { + // vector of random numbers, using the result of addVW_g/subVW_g as golden + x := rndV(n) + y := rndW() + z := make(nat, n) + arg := argVW{z, x, y, 0} + testFunVWext(t, "addVW, random inputs", addVW, addVW_g, arg) + testFunVWext(t, "subVW, random inputs", subVW, subVW_g, arg) + + // vector of random numbers, but make 'x' and 'z' share storage + arg = argVW{x, x, y, 0} + testFunVWext(t, "addVW, random inputs, sharing storage", addVW, addVW_g, arg) + testFunVWext(t, "subVW, random inputs, sharing storage", subVW, subVW_g, arg) + + // vector of maximum uint, to force carry flag set in each 'add' + y = ^Word(0) + x = makeWordVec(y, n) + arg = argVW{z, x, y, 0} + testFunVWext(t, "addVW, vector of max uint", addVW, addVW_g, arg) + + // vector of '0', to force carry flag set in each 'sub' + x = makeWordVec(0, n) + arg = argVW{z, x, 1, 0} + testFunVWext(t, "subVW, vector of zero", subVW, subVW_g, arg) + } +} + +type argVU struct { + d []Word // d is a Word slice, the input parameters x and z come from this array. + l uint // l is the length of the input parameters x and z. + xp uint // xp is the starting position of the input parameter x, x := d[xp:xp+l]. + zp uint // zp is the starting position of the input parameter z, z := d[zp:zp+l]. + s uint // s is the shift number. + r []Word // r is the expected output result z. + c Word // c is the expected return value. + m string // message. +} + +var argshlVUIn = []Word{1, 2, 4, 8, 16, 32, 64, 0, 0, 0} +var argshlVUr0 = []Word{1, 2, 4, 8, 16, 32, 64} +var argshlVUr1 = []Word{2, 4, 8, 16, 32, 64, 128} +var argshlVUrWm1 = []Word{1 << (_W - 1), 0, 1, 2, 4, 8, 16} + +var argshlVU = []argVU{ + // test cases for shlVU + {[]Word{1, _M, _M, _M, _M, _M, 3 << (_W - 2), 0}, 7, 0, 0, 1, []Word{2, _M - 1, _M, _M, _M, _M, 1<<(_W-1) + 1}, 1, "complete overlap of shlVU"}, + {[]Word{1, _M, _M, _M, _M, _M, 3 << (_W - 2), 0, 0, 0, 0}, 7, 0, 3, 1, []Word{2, _M - 1, _M, _M, _M, _M, 1<<(_W-1) + 1}, 1, "partial overlap by half of shlVU"}, + {[]Word{1, _M, _M, _M, _M, _M, 3 << (_W - 2), 0, 0, 0, 0, 0, 0, 0}, 7, 0, 6, 1, []Word{2, _M - 1, _M, _M, _M, _M, 1<<(_W-1) + 1}, 1, "partial overlap by 1 Word of shlVU"}, + {[]Word{1, _M, _M, _M, _M, _M, 3 << (_W - 2), 0, 0, 0, 0, 0, 0, 0, 0}, 7, 0, 7, 1, []Word{2, _M - 1, _M, _M, _M, _M, 1<<(_W-1) + 1}, 1, "no overlap of shlVU"}, + // additional test cases with shift values of 0, 1 and (_W-1) + {argshlVUIn, 7, 0, 0, 0, argshlVUr0, 0, "complete overlap of shlVU and shift of 0"}, + {argshlVUIn, 7, 0, 0, 1, argshlVUr1, 0, "complete overlap of shlVU and shift of 1"}, + {argshlVUIn, 7, 0, 0, _W - 1, argshlVUrWm1, 32, "complete overlap of shlVU and shift of _W - 1"}, + {argshlVUIn, 7, 0, 1, 0, argshlVUr0, 0, "partial overlap by 6 Words of shlVU and shift of 0"}, + {argshlVUIn, 7, 0, 1, 1, argshlVUr1, 0, "partial overlap by 6 Words of shlVU and shift of 1"}, + {argshlVUIn, 7, 0, 1, _W - 1, argshlVUrWm1, 32, "partial overlap by 6 Words of shlVU and shift of _W - 1"}, + {argshlVUIn, 7, 0, 2, 0, argshlVUr0, 0, "partial overlap by 5 Words of shlVU and shift of 0"}, + {argshlVUIn, 7, 0, 2, 1, argshlVUr1, 0, "partial overlap by 5 Words of shlVU and shift of 1"}, + {argshlVUIn, 7, 0, 2, _W - 1, argshlVUrWm1, 32, "partial overlap by 5 Words of shlVU abd shift of _W - 1"}, + {argshlVUIn, 7, 0, 3, 0, argshlVUr0, 0, "partial overlap by 4 Words of shlVU and shift of 0"}, + {argshlVUIn, 7, 0, 3, 1, argshlVUr1, 0, "partial overlap by 4 Words of shlVU and shift of 1"}, + {argshlVUIn, 7, 0, 3, _W - 1, argshlVUrWm1, 32, "partial overlap by 4 Words of shlVU and shift of _W - 1"}, +} + +var argshrVUIn = []Word{0, 0, 0, 1, 2, 4, 8, 16, 32, 64} +var argshrVUr0 = []Word{1, 2, 4, 8, 16, 32, 64} +var argshrVUr1 = []Word{0, 1, 2, 4, 8, 16, 32} +var argshrVUrWm1 = []Word{4, 8, 16, 32, 64, 128, 0} + +var argshrVU = []argVU{ + // test cases for shrVU + {[]Word{0, 3, _M, _M, _M, _M, _M, 1 << (_W - 1)}, 7, 1, 1, 1, []Word{1<<(_W-1) + 1, _M, _M, _M, _M, _M >> 1, 1 << (_W - 2)}, 1 << (_W - 1), "complete overlap of shrVU"}, + {[]Word{0, 0, 0, 0, 3, _M, _M, _M, _M, _M, 1 << (_W - 1)}, 7, 4, 1, 1, []Word{1<<(_W-1) + 1, _M, _M, _M, _M, _M >> 1, 1 << (_W - 2)}, 1 << (_W - 1), "partial overlap by half of shrVU"}, + {[]Word{0, 0, 0, 0, 0, 0, 0, 3, _M, _M, _M, _M, _M, 1 << (_W - 1)}, 7, 7, 1, 1, []Word{1<<(_W-1) + 1, _M, _M, _M, _M, _M >> 1, 1 << (_W - 2)}, 1 << (_W - 1), "partial overlap by 1 Word of shrVU"}, + {[]Word{0, 0, 0, 0, 0, 0, 0, 0, 3, _M, _M, _M, _M, _M, 1 << (_W - 1)}, 7, 8, 1, 1, []Word{1<<(_W-1) + 1, _M, _M, _M, _M, _M >> 1, 1 << (_W - 2)}, 1 << (_W - 1), "no overlap of shrVU"}, + // additional test cases with shift values of 0, 1 and (_W-1) + {argshrVUIn, 7, 3, 3, 0, argshrVUr0, 0, "complete overlap of shrVU and shift of 0"}, + {argshrVUIn, 7, 3, 3, 1, argshrVUr1, 1 << (_W - 1), "complete overlap of shrVU and shift of 1"}, + {argshrVUIn, 7, 3, 3, _W - 1, argshrVUrWm1, 2, "complete overlap of shrVU and shift of _W - 1"}, + {argshrVUIn, 7, 3, 2, 0, argshrVUr0, 0, "partial overlap by 6 Words of shrVU and shift of 0"}, + {argshrVUIn, 7, 3, 2, 1, argshrVUr1, 1 << (_W - 1), "partial overlap by 6 Words of shrVU and shift of 1"}, + {argshrVUIn, 7, 3, 2, _W - 1, argshrVUrWm1, 2, "partial overlap by 6 Words of shrVU and shift of _W - 1"}, + {argshrVUIn, 7, 3, 1, 0, argshrVUr0, 0, "partial overlap by 5 Words of shrVU and shift of 0"}, + {argshrVUIn, 7, 3, 1, 1, argshrVUr1, 1 << (_W - 1), "partial overlap by 5 Words of shrVU and shift of 1"}, + {argshrVUIn, 7, 3, 1, _W - 1, argshrVUrWm1, 2, "partial overlap by 5 Words of shrVU and shift of _W - 1"}, + {argshrVUIn, 7, 3, 0, 0, argshrVUr0, 0, "partial overlap by 4 Words of shrVU and shift of 0"}, + {argshrVUIn, 7, 3, 0, 1, argshrVUr1, 1 << (_W - 1), "partial overlap by 4 Words of shrVU and shift of 1"}, + {argshrVUIn, 7, 3, 0, _W - 1, argshrVUrWm1, 2, "partial overlap by 4 Words of shrVU and shift of _W - 1"}, +} + +func testShiftFunc(t *testing.T, f func(z, x []Word, s uint) Word, a argVU) { + // work on copy of a.d to preserve the original data. + b := make([]Word, len(a.d)) + copy(b, a.d) + z := b[a.zp : a.zp+a.l] + x := b[a.xp : a.xp+a.l] + c := f(z, x, a.s) + for i, zi := range z { + if zi != a.r[i] { + t.Errorf("d := %v, %s(d[%d:%d], d[%d:%d], %d)\n\tgot z[%d] = %#x; want %#x", a.d, a.m, a.zp, a.zp+a.l, a.xp, a.xp+a.l, a.s, i, zi, a.r[i]) + break + } + } + if c != a.c { + t.Errorf("d := %v, %s(d[%d:%d], d[%d:%d], %d)\n\tgot c = %#x; want %#x", a.d, a.m, a.zp, a.zp+a.l, a.xp, a.xp+a.l, a.s, c, a.c) + } +} + +func TestShiftOverlap(t *testing.T) { + for _, a := range argshlVU { + arg := a + testShiftFunc(t, shlVU, arg) + } + + for _, a := range argshrVU { + arg := a + testShiftFunc(t, shrVU, arg) + } +} + +func TestIssue31084(t *testing.T) { + // compute 10^n via 5^n << n. + const n = 165 + p := nat(nil).expNN(nat{5}, nat{n}, nil) + p = p.shl(p, n) + got := string(p.utoa(10)) + want := "1" + strings.Repeat("0", n) + if got != want { + t.Errorf("shl(%v, %v)\n\tgot %s\n\twant %s", p, n, got, want) + } +} + +const issue42838Value = "159309191113245227702888039776771180559110455519261878607388585338616290151305816094308987472018268594098344692611135542392730712890625" + +func TestIssue42838(t *testing.T) { + const s = 192 + z, _, _, _ := nat(nil).scan(strings.NewReader(issue42838Value), 0, false) + z = z.shl(z, s) + got := string(z.utoa(10)) + want := "1" + strings.Repeat("0", s) + if got != want { + t.Errorf("shl(%v, %v)\n\tgot %s\n\twant %s", z, s, got, want) + } +} + +func BenchmarkAddVW(b *testing.B) { + for _, n := range benchSizes { + if isRaceBuilder && n > 1e3 { + continue + } + x := rndV(n) + y := rndW() + z := make([]Word, n) + b.Run(fmt.Sprint(n), func(b *testing.B) { + b.SetBytes(int64(n * _S)) + for i := 0; i < b.N; i++ { + addVW(z, x, y) + } + }) + } +} + +// Benchmarking addVW using vector of maximum uint to force carry flag set +func BenchmarkAddVWext(b *testing.B) { + for _, n := range benchSizes { + if isRaceBuilder && n > 1e3 { + continue + } + y := ^Word(0) + x := makeWordVec(y, n) + z := make([]Word, n) + b.Run(fmt.Sprint(n), func(b *testing.B) { + b.SetBytes(int64(n * _S)) + for i := 0; i < b.N; i++ { + addVW(z, x, y) + } + }) + } +} + +func BenchmarkSubVW(b *testing.B) { + for _, n := range benchSizes { + if isRaceBuilder && n > 1e3 { + continue + } + x := rndV(n) + y := rndW() + z := make([]Word, n) + b.Run(fmt.Sprint(n), func(b *testing.B) { + b.SetBytes(int64(n * _S)) + for i := 0; i < b.N; i++ { + subVW(z, x, y) + } + }) + } +} + +// Benchmarking subVW using vector of zero to force carry flag set +func BenchmarkSubVWext(b *testing.B) { + for _, n := range benchSizes { + if isRaceBuilder && n > 1e3 { + continue + } + x := makeWordVec(0, n) + y := Word(1) + z := make([]Word, n) + b.Run(fmt.Sprint(n), func(b *testing.B) { + b.SetBytes(int64(n * _S)) + for i := 0; i < b.N; i++ { + subVW(z, x, y) + } + }) + } +} + +type funVWW func(z, x []Word, y, r Word) (c Word) +type argVWW struct { + z, x nat + y, r Word + c Word +} + +var prodVWW = []argVWW{ + {}, + {nat{0}, nat{0}, 0, 0, 0}, + {nat{991}, nat{0}, 0, 991, 0}, + {nat{0}, nat{_M}, 0, 0, 0}, + {nat{991}, nat{_M}, 0, 991, 0}, + {nat{0}, nat{0}, _M, 0, 0}, + {nat{991}, nat{0}, _M, 991, 0}, + {nat{1}, nat{1}, 1, 0, 0}, + {nat{992}, nat{1}, 1, 991, 0}, + {nat{22793}, nat{991}, 23, 0, 0}, + {nat{22800}, nat{991}, 23, 7, 0}, + {nat{0, 0, 0, 22793}, nat{0, 0, 0, 991}, 23, 0, 0}, + {nat{7, 0, 0, 22793}, nat{0, 0, 0, 991}, 23, 7, 0}, + {nat{0, 0, 0, 0}, nat{7893475, 7395495, 798547395, 68943}, 0, 0, 0}, + {nat{991, 0, 0, 0}, nat{7893475, 7395495, 798547395, 68943}, 0, 991, 0}, + {nat{0, 0, 0, 0}, nat{0, 0, 0, 0}, 894375984, 0, 0}, + {nat{991, 0, 0, 0}, nat{0, 0, 0, 0}, 894375984, 991, 0}, + {nat{_M << 1 & _M}, nat{_M}, 1 << 1, 0, _M >> (_W - 1)}, + {nat{_M<<1&_M + 1}, nat{_M}, 1 << 1, 1, _M >> (_W - 1)}, + {nat{_M << 7 & _M}, nat{_M}, 1 << 7, 0, _M >> (_W - 7)}, + {nat{_M<<7&_M + 1<<6}, nat{_M}, 1 << 7, 1 << 6, _M >> (_W - 7)}, + {nat{_M << 7 & _M, _M, _M, _M}, nat{_M, _M, _M, _M}, 1 << 7, 0, _M >> (_W - 7)}, + {nat{_M<<7&_M + 1<<6, _M, _M, _M}, nat{_M, _M, _M, _M}, 1 << 7, 1 << 6, _M >> (_W - 7)}, +} + +func testFunVWW(t *testing.T, msg string, f funVWW, a argVWW) { + z := make(nat, len(a.z)) + c := f(z, a.x, a.y, a.r) + for i, zi := range z { + if zi != a.z[i] { + t.Errorf("%s%+v\n\tgot z[%d] = %#x; want %#x", msg, a, i, zi, a.z[i]) + break + } + } + if c != a.c { + t.Errorf("%s%+v\n\tgot c = %#x; want %#x", msg, a, c, a.c) + } +} + +// TODO(gri) mulAddVWW and divWVW are symmetric operations but +// their signature is not symmetric. Try to unify. + +type funWVW func(z []Word, xn Word, x []Word, y Word) (r Word) +type argWVW struct { + z nat + xn Word + x nat + y Word + r Word +} + +func testFunWVW(t *testing.T, msg string, f funWVW, a argWVW) { + z := make(nat, len(a.z)) + r := f(z, a.xn, a.x, a.y) + for i, zi := range z { + if zi != a.z[i] { + t.Errorf("%s%+v\n\tgot z[%d] = %#x; want %#x", msg, a, i, zi, a.z[i]) + break + } + } + if r != a.r { + t.Errorf("%s%+v\n\tgot r = %#x; want %#x", msg, a, r, a.r) + } +} + +func TestFunVWW(t *testing.T) { + for _, a := range prodVWW { + arg := a + testFunVWW(t, "mulAddVWW_g", mulAddVWW_g, arg) + testFunVWW(t, "mulAddVWW", mulAddVWW, arg) + + if a.y != 0 && a.r < a.y { + arg := argWVW{a.x, a.c, a.z, a.y, a.r} + testFunWVW(t, "divWVW", divWVW, arg) + } + } +} + +var mulWWTests = []struct { + x, y Word + q, r Word +}{ + {_M, _M, _M - 1, 1}, + // 32 bit only: {0xc47dfa8c, 50911, 0x98a4, 0x998587f4}, +} + +func TestMulWW(t *testing.T) { + for i, test := range mulWWTests { + q, r := mulWW_g(test.x, test.y) + if q != test.q || r != test.r { + t.Errorf("#%d got (%x, %x) want (%x, %x)", i, q, r, test.q, test.r) + } + } +} + +var mulAddWWWTests = []struct { + x, y, c Word + q, r Word +}{ + // TODO(agl): These will only work on 64-bit platforms. + // {15064310297182388543, 0xe7df04d2d35d5d80, 13537600649892366549, 13644450054494335067, 10832252001440893781}, + // {15064310297182388543, 0xdab2f18048baa68d, 13644450054494335067, 12869334219691522700, 14233854684711418382}, + {_M, _M, 0, _M - 1, 1}, + {_M, _M, _M, _M, 0}, +} + +func TestMulAddWWW(t *testing.T) { + for i, test := range mulAddWWWTests { + q, r := mulAddWWW_g(test.x, test.y, test.c) + if q != test.q || r != test.r { + t.Errorf("#%d got (%x, %x) want (%x, %x)", i, q, r, test.q, test.r) + } + } +} + +var divWWTests = []struct { + x1, x0, y Word + q, r Word +}{ + {_M >> 1, 0, _M, _M >> 1, _M >> 1}, + {_M - (1 << (_W - 2)), _M, 3 << (_W - 2), _M, _M - (1 << (_W - 2))}, +} + +const testsNumber = 1 << 16 + +func TestDivWW(t *testing.T) { + i := 0 + for i, test := range divWWTests { + rec := reciprocalWord(test.y) + q, r := divWW(test.x1, test.x0, test.y, rec) + if q != test.q || r != test.r { + t.Errorf("#%d got (%x, %x) want (%x, %x)", i, q, r, test.q, test.r) + } + } + //random tests + for ; i < testsNumber; i++ { + x1 := rndW() + x0 := rndW() + y := rndW() + if x1 >= y { + continue + } + rec := reciprocalWord(y) + qGot, rGot := divWW(x1, x0, y, rec) + qWant, rWant := bits.Div(uint(x1), uint(x0), uint(y)) + if uint(qGot) != qWant || uint(rGot) != rWant { + t.Errorf("#%d got (%x, %x) want (%x, %x)", i, qGot, rGot, qWant, rWant) + } + } +} + +func BenchmarkMulAddVWW(b *testing.B) { + for _, n := range benchSizes { + if isRaceBuilder && n > 1e3 { + continue + } + z := make([]Word, n+1) + x := rndV(n) + y := rndW() + r := rndW() + b.Run(fmt.Sprint(n), func(b *testing.B) { + b.SetBytes(int64(n * _W)) + for i := 0; i < b.N; i++ { + mulAddVWW(z, x, y, r) + } + }) + } +} + +func BenchmarkAddMulVVW(b *testing.B) { + for _, n := range benchSizes { + if isRaceBuilder && n > 1e3 { + continue + } + x := rndV(n) + y := rndW() + z := make([]Word, n) + b.Run(fmt.Sprint(n), func(b *testing.B) { + b.SetBytes(int64(n * _W)) + for i := 0; i < b.N; i++ { + addMulVVW(z, x, y) + } + }) + } +} +func BenchmarkDivWVW(b *testing.B) { + for _, n := range benchSizes { + if isRaceBuilder && n > 1e3 { + continue + } + x := rndV(n) + y := rndW() + z := make([]Word, n) + b.Run(fmt.Sprint(n), func(b *testing.B) { + b.SetBytes(int64(n * _W)) + for i := 0; i < b.N; i++ { + divWVW(z, 0, x, y) + } + }) + } +} + +func BenchmarkNonZeroShifts(b *testing.B) { + for _, n := range benchSizes { + if isRaceBuilder && n > 1e3 { + continue + } + x := rndV(n) + s := uint(rand.Int63n(_W-2)) + 1 // avoid 0 and over-large shifts + z := make([]Word, n) + b.Run(fmt.Sprint(n), func(b *testing.B) { + b.SetBytes(int64(n * _W)) + b.Run("shrVU", func(b *testing.B) { + for i := 0; i < b.N; i++ { + _ = shrVU(z, x, s) + } + }) + b.Run("shlVU", func(b *testing.B) { + for i := 0; i < b.N; i++ { + _ = shlVU(z, x, s) + } + }) + }) + } +} diff --git a/src/math/big/arith_wasm.s b/src/math/big/arith_wasm.s new file mode 100644 index 0000000..e8605f1 --- /dev/null +++ b/src/math/big/arith_wasm.s @@ -0,0 +1,36 @@ +// Copyright 2018 The Go Authors. All rights reserved. +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. + +//go:build !math_big_pure_go +// +build !math_big_pure_go + +#include "textflag.h" + +TEXT ·mulWW(SB),NOSPLIT,$0 + JMP ·mulWW_g(SB) + +TEXT ·addVV(SB),NOSPLIT,$0 + JMP ·addVV_g(SB) + +TEXT ·subVV(SB),NOSPLIT,$0 + JMP ·subVV_g(SB) + +TEXT ·addVW(SB),NOSPLIT,$0 + JMP ·addVW_g(SB) + +TEXT ·subVW(SB),NOSPLIT,$0 + JMP ·subVW_g(SB) + +TEXT ·shlVU(SB),NOSPLIT,$0 + JMP ·shlVU_g(SB) + +TEXT ·shrVU(SB),NOSPLIT,$0 + JMP ·shrVU_g(SB) + +TEXT ·mulAddVWW(SB),NOSPLIT,$0 + JMP ·mulAddVWW_g(SB) + +TEXT ·addMulVVW(SB),NOSPLIT,$0 + JMP ·addMulVVW_g(SB) + diff --git a/src/math/big/bits_test.go b/src/math/big/bits_test.go new file mode 100644 index 0000000..985b60b --- /dev/null +++ b/src/math/big/bits_test.go @@ -0,0 +1,224 @@ +// Copyright 2015 The Go Authors. All rights reserved. +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. + +// This file implements the Bits type used for testing Float operations +// via an independent (albeit slower) representations for floating-point +// numbers. + +package big + +import ( + "fmt" + "sort" + "testing" +) + +// A Bits value b represents a finite floating-point number x of the form +// +// x = 2**b[0] + 2**b[1] + ... 2**b[len(b)-1] +// +// The order of slice elements is not significant. Negative elements may be +// used to form fractions. A Bits value is normalized if each b[i] occurs at +// most once. For instance Bits{0, 0, 1} is not normalized but represents the +// same floating-point number as Bits{2}, which is normalized. The zero (nil) +// value of Bits is a ready to use Bits value and represents the value 0. +type Bits []int + +func (x Bits) add(y Bits) Bits { + return append(x, y...) +} + +func (x Bits) mul(y Bits) Bits { + var p Bits + for _, x := range x { + for _, y := range y { + p = append(p, x+y) + } + } + return p +} + +func TestMulBits(t *testing.T) { + for _, test := range []struct { + x, y, want Bits + }{ + {nil, nil, nil}, + {Bits{}, Bits{}, nil}, + {Bits{0}, Bits{0}, Bits{0}}, + {Bits{0}, Bits{1}, Bits{1}}, + {Bits{1}, Bits{1, 2, 3}, Bits{2, 3, 4}}, + {Bits{-1}, Bits{1}, Bits{0}}, + {Bits{-10, -1, 0, 1, 10}, Bits{1, 2, 3}, Bits{-9, -8, -7, 0, 1, 2, 1, 2, 3, 2, 3, 4, 11, 12, 13}}, + } { + got := fmt.Sprintf("%v", test.x.mul(test.y)) + want := fmt.Sprintf("%v", test.want) + if got != want { + t.Errorf("%v * %v = %s; want %s", test.x, test.y, got, want) + } + + } +} + +// norm returns the normalized bits for x: It removes multiple equal entries +// by treating them as an addition (e.g., Bits{5, 5} => Bits{6}), and it sorts +// the result list for reproducible results. +func (x Bits) norm() Bits { + m := make(map[int]bool) + for _, b := range x { + for m[b] { + m[b] = false + b++ + } + m[b] = true + } + var z Bits + for b, set := range m { + if set { + z = append(z, b) + } + } + sort.Ints([]int(z)) + return z +} + +func TestNormBits(t *testing.T) { + for _, test := range []struct { + x, want Bits + }{ + {nil, nil}, + {Bits{}, Bits{}}, + {Bits{0}, Bits{0}}, + {Bits{0, 0}, Bits{1}}, + {Bits{3, 1, 1}, Bits{2, 3}}, + {Bits{10, 9, 8, 7, 6, 6}, Bits{11}}, + } { + got := fmt.Sprintf("%v", test.x.norm()) + want := fmt.Sprintf("%v", test.want) + if got != want { + t.Errorf("normBits(%v) = %s; want %s", test.x, got, want) + } + + } +} + +// round returns the Float value corresponding to x after rounding x +// to prec bits according to mode. +func (x Bits) round(prec uint, mode RoundingMode) *Float { + x = x.norm() + + // determine range + var min, max int + for i, b := range x { + if i == 0 || b < min { + min = b + } + if i == 0 || b > max { + max = b + } + } + prec0 := uint(max + 1 - min) + if prec >= prec0 { + return x.Float() + } + // prec < prec0 + + // determine bit 0, rounding, and sticky bit, and result bits z + var bit0, rbit, sbit uint + var z Bits + r := max - int(prec) + for _, b := range x { + switch { + case b == r: + rbit = 1 + case b < r: + sbit = 1 + default: + // b > r + if b == r+1 { + bit0 = 1 + } + z = append(z, b) + } + } + + // round + f := z.Float() // rounded to zero + if mode == ToNearestAway { + panic("not yet implemented") + } + if mode == ToNearestEven && rbit == 1 && (sbit == 1 || sbit == 0 && bit0 != 0) || mode == AwayFromZero { + // round away from zero + f.SetMode(ToZero).SetPrec(prec) + f.Add(f, Bits{int(r) + 1}.Float()) + } + return f +} + +// Float returns the *Float z of the smallest possible precision such that +// z = sum(2**bits[i]), with i = range bits. If multiple bits[i] are equal, +// they are added: Bits{0, 1, 0}.Float() == 2**0 + 2**1 + 2**0 = 4. +func (bits Bits) Float() *Float { + // handle 0 + if len(bits) == 0 { + return new(Float) + } + // len(bits) > 0 + + // determine lsb exponent + var min int + for i, b := range bits { + if i == 0 || b < min { + min = b + } + } + + // create bit pattern + x := NewInt(0) + for _, b := range bits { + badj := b - min + // propagate carry if necessary + for x.Bit(badj) != 0 { + x.SetBit(x, badj, 0) + badj++ + } + x.SetBit(x, badj, 1) + } + + // create corresponding float + z := new(Float).SetInt(x) // normalized + if e := int64(z.exp) + int64(min); MinExp <= e && e <= MaxExp { + z.exp = int32(e) + } else { + // this should never happen for our test cases + panic("exponent out of range") + } + return z +} + +func TestFromBits(t *testing.T) { + for _, test := range []struct { + bits Bits + want string + }{ + // all different bit numbers + {nil, "0"}, + {Bits{0}, "0x.8p+1"}, + {Bits{1}, "0x.8p+2"}, + {Bits{-1}, "0x.8p+0"}, + {Bits{63}, "0x.8p+64"}, + {Bits{33, -30}, "0x.8000000000000001p+34"}, + {Bits{255, 0}, "0x.8000000000000000000000000000000000000000000000000000000000000001p+256"}, + + // multiple equal bit numbers + {Bits{0, 0}, "0x.8p+2"}, + {Bits{0, 0, 0, 0}, "0x.8p+3"}, + {Bits{0, 1, 0}, "0x.8p+3"}, + {append(Bits{2, 1, 0} /* 7 */, Bits{3, 1} /* 10 */ ...), "0x.88p+5" /* 17 */}, + } { + f := test.bits.Float() + if got := f.Text('p', 0); got != test.want { + t.Errorf("setBits(%v) = %s; want %s", test.bits, got, test.want) + } + } +} diff --git a/src/math/big/calibrate_test.go b/src/math/big/calibrate_test.go new file mode 100644 index 0000000..4fa663f --- /dev/null +++ b/src/math/big/calibrate_test.go @@ -0,0 +1,173 @@ +// Copyright 2009 The Go Authors. All rights reserved. +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. + +// Calibration used to determine thresholds for using +// different algorithms. Ideally, this would be converted +// to go generate to create thresholds.go + +// This file prints execution times for the Mul benchmark +// given different Karatsuba thresholds. The result may be +// used to manually fine-tune the threshold constant. The +// results are somewhat fragile; use repeated runs to get +// a clear picture. + +// Calculates lower and upper thresholds for when basicSqr +// is faster than standard multiplication. + +// Usage: go test -run=TestCalibrate -v -calibrate + +package big + +import ( + "flag" + "fmt" + "testing" + "time" +) + +var calibrate = flag.Bool("calibrate", false, "run calibration test") + +const ( + sqrModeMul = "mul(x, x)" + sqrModeBasic = "basicSqr(x)" + sqrModeKaratsuba = "karatsubaSqr(x)" +) + +func TestCalibrate(t *testing.T) { + if !*calibrate { + return + } + + computeKaratsubaThresholds() + + // compute basicSqrThreshold where overhead becomes negligible + minSqr := computeSqrThreshold(10, 30, 1, 3, sqrModeMul, sqrModeBasic) + // compute karatsubaSqrThreshold where karatsuba is faster + maxSqr := computeSqrThreshold(200, 500, 10, 3, sqrModeBasic, sqrModeKaratsuba) + if minSqr != 0 { + fmt.Printf("found basicSqrThreshold = %d\n", minSqr) + } else { + fmt.Println("no basicSqrThreshold found") + } + if maxSqr != 0 { + fmt.Printf("found karatsubaSqrThreshold = %d\n", maxSqr) + } else { + fmt.Println("no karatsubaSqrThreshold found") + } +} + +func karatsubaLoad(b *testing.B) { + BenchmarkMul(b) +} + +// measureKaratsuba returns the time to run a Karatsuba-relevant benchmark +// given Karatsuba threshold th. +func measureKaratsuba(th int) time.Duration { + th, karatsubaThreshold = karatsubaThreshold, th + res := testing.Benchmark(karatsubaLoad) + karatsubaThreshold = th + return time.Duration(res.NsPerOp()) +} + +func computeKaratsubaThresholds() { + fmt.Printf("Multiplication times for varying Karatsuba thresholds\n") + fmt.Printf("(run repeatedly for good results)\n") + + // determine Tk, the work load execution time using basic multiplication + Tb := measureKaratsuba(1e9) // th == 1e9 => Karatsuba multiplication disabled + fmt.Printf("Tb = %10s\n", Tb) + + // thresholds + th := 4 + th1 := -1 + th2 := -1 + + var deltaOld time.Duration + for count := -1; count != 0 && th < 128; count-- { + // determine Tk, the work load execution time using Karatsuba multiplication + Tk := measureKaratsuba(th) + + // improvement over Tb + delta := (Tb - Tk) * 100 / Tb + + fmt.Printf("th = %3d Tk = %10s %4d%%", th, Tk, delta) + + // determine break-even point + if Tk < Tb && th1 < 0 { + th1 = th + fmt.Print(" break-even point") + } + + // determine diminishing return + if 0 < delta && delta < deltaOld && th2 < 0 { + th2 = th + fmt.Print(" diminishing return") + } + deltaOld = delta + + fmt.Println() + + // trigger counter + if th1 >= 0 && th2 >= 0 && count < 0 { + count = 10 // this many extra measurements after we got both thresholds + } + + th++ + } +} + +func measureSqr(words, nruns int, mode string) time.Duration { + // more runs for better statistics + initBasicSqr, initKaratsubaSqr := basicSqrThreshold, karatsubaSqrThreshold + + switch mode { + case sqrModeMul: + basicSqrThreshold = words + 1 + case sqrModeBasic: + basicSqrThreshold, karatsubaSqrThreshold = words-1, words+1 + case sqrModeKaratsuba: + karatsubaSqrThreshold = words - 1 + } + + var testval int64 + for i := 0; i < nruns; i++ { + res := testing.Benchmark(func(b *testing.B) { benchmarkNatSqr(b, words) }) + testval += res.NsPerOp() + } + testval /= int64(nruns) + + basicSqrThreshold, karatsubaSqrThreshold = initBasicSqr, initKaratsubaSqr + + return time.Duration(testval) +} + +func computeSqrThreshold(from, to, step, nruns int, lower, upper string) int { + fmt.Printf("Calibrating threshold between %s and %s\n", lower, upper) + fmt.Printf("Looking for a timing difference for x between %d - %d words by %d step\n", from, to, step) + var initPos bool + var threshold int + for i := from; i <= to; i += step { + baseline := measureSqr(i, nruns, lower) + testval := measureSqr(i, nruns, upper) + pos := baseline > testval + delta := baseline - testval + percent := delta * 100 / baseline + fmt.Printf("words = %3d deltaT = %10s (%4d%%) is %s better: %v", i, delta, percent, upper, pos) + if i == from { + initPos = pos + } + if threshold == 0 && pos != initPos { + threshold = i + fmt.Printf(" threshold found") + } + fmt.Println() + + } + if threshold != 0 { + fmt.Printf("Found threshold = %d between %d - %d\n", threshold, from, to) + } else { + fmt.Printf("Found NO threshold between %d - %d\n", from, to) + } + return threshold +} diff --git a/src/math/big/decimal.go b/src/math/big/decimal.go new file mode 100644 index 0000000..716f03b --- /dev/null +++ b/src/math/big/decimal.go @@ -0,0 +1,270 @@ +// Copyright 2015 The Go Authors. All rights reserved. +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. + +// This file implements multi-precision decimal numbers. +// The implementation is for float to decimal conversion only; +// not general purpose use. +// The only operations are precise conversion from binary to +// decimal and rounding. +// +// The key observation and some code (shr) is borrowed from +// strconv/decimal.go: conversion of binary fractional values can be done +// precisely in multi-precision decimal because 2 divides 10 (required for +// >> of mantissa); but conversion of decimal floating-point values cannot +// be done precisely in binary representation. +// +// In contrast to strconv/decimal.go, only right shift is implemented in +// decimal format - left shift can be done precisely in binary format. + +package big + +// A decimal represents an unsigned floating-point number in decimal representation. +// The value of a non-zero decimal d is d.mant * 10**d.exp with 0.1 <= d.mant < 1, +// with the most-significant mantissa digit at index 0. For the zero decimal, the +// mantissa length and exponent are 0. +// The zero value for decimal represents a ready-to-use 0.0. +type decimal struct { + mant []byte // mantissa ASCII digits, big-endian + exp int // exponent +} + +// at returns the i'th mantissa digit, starting with the most significant digit at 0. +func (d *decimal) at(i int) byte { + if 0 <= i && i < len(d.mant) { + return d.mant[i] + } + return '0' +} + +// Maximum shift amount that can be done in one pass without overflow. +// A Word has _W bits and (1<<maxShift - 1)*10 + 9 must fit into Word. +const maxShift = _W - 4 + +// TODO(gri) Since we know the desired decimal precision when converting +// a floating-point number, we may be able to limit the number of decimal +// digits that need to be computed by init by providing an additional +// precision argument and keeping track of when a number was truncated early +// (equivalent of "sticky bit" in binary rounding). + +// TODO(gri) Along the same lines, enforce some limit to shift magnitudes +// to avoid "infinitely" long running conversions (until we run out of space). + +// Init initializes x to the decimal representation of m << shift (for +// shift >= 0), or m >> -shift (for shift < 0). +func (x *decimal) init(m nat, shift int) { + // special case 0 + if len(m) == 0 { + x.mant = x.mant[:0] + x.exp = 0 + return + } + + // Optimization: If we need to shift right, first remove any trailing + // zero bits from m to reduce shift amount that needs to be done in + // decimal format (since that is likely slower). + if shift < 0 { + ntz := m.trailingZeroBits() + s := uint(-shift) + if s >= ntz { + s = ntz // shift at most ntz bits + } + m = nat(nil).shr(m, s) + shift += int(s) + } + + // Do any shift left in binary representation. + if shift > 0 { + m = nat(nil).shl(m, uint(shift)) + shift = 0 + } + + // Convert mantissa into decimal representation. + s := m.utoa(10) + n := len(s) + x.exp = n + // Trim trailing zeros; instead the exponent is tracking + // the decimal point independent of the number of digits. + for n > 0 && s[n-1] == '0' { + n-- + } + x.mant = append(x.mant[:0], s[:n]...) + + // Do any (remaining) shift right in decimal representation. + if shift < 0 { + for shift < -maxShift { + shr(x, maxShift) + shift += maxShift + } + shr(x, uint(-shift)) + } +} + +// shr implements x >> s, for s <= maxShift. +func shr(x *decimal, s uint) { + // Division by 1<<s using shift-and-subtract algorithm. + + // pick up enough leading digits to cover first shift + r := 0 // read index + var n Word + for n>>s == 0 && r < len(x.mant) { + ch := Word(x.mant[r]) + r++ + n = n*10 + ch - '0' + } + if n == 0 { + // x == 0; shouldn't get here, but handle anyway + x.mant = x.mant[:0] + return + } + for n>>s == 0 { + r++ + n *= 10 + } + x.exp += 1 - r + + // read a digit, write a digit + w := 0 // write index + mask := Word(1)<<s - 1 + for r < len(x.mant) { + ch := Word(x.mant[r]) + r++ + d := n >> s + n &= mask // n -= d << s + x.mant[w] = byte(d + '0') + w++ + n = n*10 + ch - '0' + } + + // write extra digits that still fit + for n > 0 && w < len(x.mant) { + d := n >> s + n &= mask + x.mant[w] = byte(d + '0') + w++ + n = n * 10 + } + x.mant = x.mant[:w] // the number may be shorter (e.g. 1024 >> 10) + + // append additional digits that didn't fit + for n > 0 { + d := n >> s + n &= mask + x.mant = append(x.mant, byte(d+'0')) + n = n * 10 + } + + trim(x) +} + +func (x *decimal) String() string { + if len(x.mant) == 0 { + return "0" + } + + var buf []byte + switch { + case x.exp <= 0: + // 0.00ddd + buf = make([]byte, 0, 2+(-x.exp)+len(x.mant)) + buf = append(buf, "0."...) + buf = appendZeros(buf, -x.exp) + buf = append(buf, x.mant...) + + case /* 0 < */ x.exp < len(x.mant): + // dd.ddd + buf = make([]byte, 0, 1+len(x.mant)) + buf = append(buf, x.mant[:x.exp]...) + buf = append(buf, '.') + buf = append(buf, x.mant[x.exp:]...) + + default: // len(x.mant) <= x.exp + // ddd00 + buf = make([]byte, 0, x.exp) + buf = append(buf, x.mant...) + buf = appendZeros(buf, x.exp-len(x.mant)) + } + + return string(buf) +} + +// appendZeros appends n 0 digits to buf and returns buf. +func appendZeros(buf []byte, n int) []byte { + for ; n > 0; n-- { + buf = append(buf, '0') + } + return buf +} + +// shouldRoundUp reports if x should be rounded up +// if shortened to n digits. n must be a valid index +// for x.mant. +func shouldRoundUp(x *decimal, n int) bool { + if x.mant[n] == '5' && n+1 == len(x.mant) { + // exactly halfway - round to even + return n > 0 && (x.mant[n-1]-'0')&1 != 0 + } + // not halfway - digit tells all (x.mant has no trailing zeros) + return x.mant[n] >= '5' +} + +// round sets x to (at most) n mantissa digits by rounding it +// to the nearest even value with n (or fever) mantissa digits. +// If n < 0, x remains unchanged. +func (x *decimal) round(n int) { + if n < 0 || n >= len(x.mant) { + return // nothing to do + } + + if shouldRoundUp(x, n) { + x.roundUp(n) + } else { + x.roundDown(n) + } +} + +func (x *decimal) roundUp(n int) { + if n < 0 || n >= len(x.mant) { + return // nothing to do + } + // 0 <= n < len(x.mant) + + // find first digit < '9' + for n > 0 && x.mant[n-1] >= '9' { + n-- + } + + if n == 0 { + // all digits are '9's => round up to '1' and update exponent + x.mant[0] = '1' // ok since len(x.mant) > n + x.mant = x.mant[:1] + x.exp++ + return + } + + // n > 0 && x.mant[n-1] < '9' + x.mant[n-1]++ + x.mant = x.mant[:n] + // x already trimmed +} + +func (x *decimal) roundDown(n int) { + if n < 0 || n >= len(x.mant) { + return // nothing to do + } + x.mant = x.mant[:n] + trim(x) +} + +// trim cuts off any trailing zeros from x's mantissa; +// they are meaningless for the value of x. +func trim(x *decimal) { + i := len(x.mant) + for i > 0 && x.mant[i-1] == '0' { + i-- + } + x.mant = x.mant[:i] + if i == 0 { + x.exp = 0 + } +} diff --git a/src/math/big/decimal_test.go b/src/math/big/decimal_test.go new file mode 100644 index 0000000..424811e --- /dev/null +++ b/src/math/big/decimal_test.go @@ -0,0 +1,134 @@ +// Copyright 2015 The Go Authors. All rights reserved. +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. + +package big + +import ( + "fmt" + "testing" +) + +func TestDecimalString(t *testing.T) { + for _, test := range []struct { + x decimal + want string + }{ + {want: "0"}, + {decimal{nil, 1000}, "0"}, // exponent of 0 is ignored + {decimal{[]byte("12345"), 0}, "0.12345"}, + {decimal{[]byte("12345"), -3}, "0.00012345"}, + {decimal{[]byte("12345"), +3}, "123.45"}, + {decimal{[]byte("12345"), +10}, "1234500000"}, + } { + if got := test.x.String(); got != test.want { + t.Errorf("%v == %s; want %s", test.x, got, test.want) + } + } +} + +func TestDecimalInit(t *testing.T) { + for _, test := range []struct { + x Word + shift int + want string + }{ + {0, 0, "0"}, + {0, -100, "0"}, + {0, 100, "0"}, + {1, 0, "1"}, + {1, 10, "1024"}, + {1, 100, "1267650600228229401496703205376"}, + {1, -100, "0.0000000000000000000000000000007888609052210118054117285652827862296732064351090230047702789306640625"}, + {12345678, 8, "3160493568"}, + {12345678, -8, "48225.3046875"}, + {195312, 9, "99999744"}, + {1953125, 9, "1000000000"}, + } { + var d decimal + d.init(nat{test.x}.norm(), test.shift) + if got := d.String(); got != test.want { + t.Errorf("%d << %d == %s; want %s", test.x, test.shift, got, test.want) + } + } +} + +func TestDecimalRounding(t *testing.T) { + for _, test := range []struct { + x uint64 + n int + down, even, up string + }{ + {0, 0, "0", "0", "0"}, + {0, 1, "0", "0", "0"}, + + {1, 0, "0", "0", "10"}, + {5, 0, "0", "0", "10"}, + {9, 0, "0", "10", "10"}, + + {15, 1, "10", "20", "20"}, + {45, 1, "40", "40", "50"}, + {95, 1, "90", "100", "100"}, + + {12344999, 4, "12340000", "12340000", "12350000"}, + {12345000, 4, "12340000", "12340000", "12350000"}, + {12345001, 4, "12340000", "12350000", "12350000"}, + {23454999, 4, "23450000", "23450000", "23460000"}, + {23455000, 4, "23450000", "23460000", "23460000"}, + {23455001, 4, "23450000", "23460000", "23460000"}, + + {99994999, 4, "99990000", "99990000", "100000000"}, + {99995000, 4, "99990000", "100000000", "100000000"}, + {99999999, 4, "99990000", "100000000", "100000000"}, + + {12994999, 4, "12990000", "12990000", "13000000"}, + {12995000, 4, "12990000", "13000000", "13000000"}, + {12999999, 4, "12990000", "13000000", "13000000"}, + } { + x := nat(nil).setUint64(test.x) + + var d decimal + d.init(x, 0) + d.roundDown(test.n) + if got := d.String(); got != test.down { + t.Errorf("roundDown(%d, %d) = %s; want %s", test.x, test.n, got, test.down) + } + + d.init(x, 0) + d.round(test.n) + if got := d.String(); got != test.even { + t.Errorf("round(%d, %d) = %s; want %s", test.x, test.n, got, test.even) + } + + d.init(x, 0) + d.roundUp(test.n) + if got := d.String(); got != test.up { + t.Errorf("roundUp(%d, %d) = %s; want %s", test.x, test.n, got, test.up) + } + } +} + +var sink string + +func BenchmarkDecimalConversion(b *testing.B) { + for i := 0; i < b.N; i++ { + for shift := -100; shift <= +100; shift++ { + var d decimal + d.init(natOne, shift) + sink = d.String() + } + } +} + +func BenchmarkFloatString(b *testing.B) { + x := new(Float) + for _, prec := range []uint{1e2, 1e3, 1e4, 1e5} { + x.SetPrec(prec).SetRat(NewRat(1, 3)) + b.Run(fmt.Sprintf("%v", prec), func(b *testing.B) { + b.ReportAllocs() + for i := 0; i < b.N; i++ { + sink = x.String() + } + }) + } +} diff --git a/src/math/big/doc.go b/src/math/big/doc.go new file mode 100644 index 0000000..65ed019 --- /dev/null +++ b/src/math/big/doc.go @@ -0,0 +1,99 @@ +// Copyright 2009 The Go Authors. All rights reserved. +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. + +/* +Package big implements arbitrary-precision arithmetic (big numbers). +The following numeric types are supported: + + Int signed integers + Rat rational numbers + Float floating-point numbers + +The zero value for an Int, Rat, or Float correspond to 0. Thus, new +values can be declared in the usual ways and denote 0 without further +initialization: + + var x Int // &x is an *Int of value 0 + var r = &Rat{} // r is a *Rat of value 0 + y := new(Float) // y is a *Float of value 0 + +Alternatively, new values can be allocated and initialized with factory +functions of the form: + + func NewT(v V) *T + +For instance, NewInt(x) returns an *Int set to the value of the int64 +argument x, NewRat(a, b) returns a *Rat set to the fraction a/b where +a and b are int64 values, and NewFloat(f) returns a *Float initialized +to the float64 argument f. More flexibility is provided with explicit +setters, for instance: + + var z1 Int + z1.SetUint64(123) // z1 := 123 + z2 := new(Rat).SetFloat64(1.25) // z2 := 5/4 + z3 := new(Float).SetInt(z1) // z3 := 123.0 + +Setters, numeric operations and predicates are represented as methods of +the form: + + func (z *T) SetV(v V) *T // z = v + func (z *T) Unary(x *T) *T // z = unary x + func (z *T) Binary(x, y *T) *T // z = x binary y + func (x *T) Pred() P // p = pred(x) + +with T one of Int, Rat, or Float. For unary and binary operations, the +result is the receiver (usually named z in that case; see below); if it +is one of the operands x or y it may be safely overwritten (and its memory +reused). + +Arithmetic expressions are typically written as a sequence of individual +method calls, with each call corresponding to an operation. The receiver +denotes the result and the method arguments are the operation's operands. +For instance, given three *Int values a, b and c, the invocation + + c.Add(a, b) + +computes the sum a + b and stores the result in c, overwriting whatever +value was held in c before. Unless specified otherwise, operations permit +aliasing of parameters, so it is perfectly ok to write + + sum.Add(sum, x) + +to accumulate values x in a sum. + +(By always passing in a result value via the receiver, memory use can be +much better controlled. Instead of having to allocate new memory for each +result, an operation can reuse the space allocated for the result value, +and overwrite that value with the new result in the process.) + +Notational convention: Incoming method parameters (including the receiver) +are named consistently in the API to clarify their use. Incoming operands +are usually named x, y, a, b, and so on, but never z. A parameter specifying +the result is named z (typically the receiver). + +For instance, the arguments for (*Int).Add are named x and y, and because +the receiver specifies the result destination, it is called z: + + func (z *Int) Add(x, y *Int) *Int + +Methods of this form typically return the incoming receiver as well, to +enable simple call chaining. + +Methods which don't require a result value to be passed in (for instance, +Int.Sign), simply return the result. In this case, the receiver is typically +the first operand, named x: + + func (x *Int) Sign() int + +Various methods support conversions between strings and corresponding +numeric values, and vice versa: *Int, *Rat, and *Float values implement +the Stringer interface for a (default) string representation of the value, +but also provide SetString methods to initialize a value from a string in +a variety of supported formats (see the respective SetString documentation). + +Finally, *Int, *Rat, and *Float satisfy the fmt package's Scanner interface +for scanning and (except for *Rat) the Formatter interface for formatted +printing. +*/ +package big diff --git a/src/math/big/example_rat_test.go b/src/math/big/example_rat_test.go new file mode 100644 index 0000000..a971170 --- /dev/null +++ b/src/math/big/example_rat_test.go @@ -0,0 +1,65 @@ +// Copyright 2015 The Go Authors. All rights reserved. +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. + +package big_test + +import ( + "fmt" + "math/big" +) + +// Use the classic continued fraction for e +// e = [1; 0, 1, 1, 2, 1, 1, ... 2n, 1, 1, ...] +// i.e., for the nth term, use +// 1 if n mod 3 != 1 +// (n-1)/3 * 2 if n mod 3 == 1 +func recur(n, lim int64) *big.Rat { + term := new(big.Rat) + if n%3 != 1 { + term.SetInt64(1) + } else { + term.SetInt64((n - 1) / 3 * 2) + } + + if n > lim { + return term + } + + // Directly initialize frac as the fractional + // inverse of the result of recur. + frac := new(big.Rat).Inv(recur(n+1, lim)) + + return term.Add(term, frac) +} + +// This example demonstrates how to use big.Rat to compute the +// first 15 terms in the sequence of rational convergents for +// the constant e (base of natural logarithm). +func Example_eConvergents() { + for i := 1; i <= 15; i++ { + r := recur(0, int64(i)) + + // Print r both as a fraction and as a floating-point number. + // Since big.Rat implements fmt.Formatter, we can use %-13s to + // get a left-aligned string representation of the fraction. + fmt.Printf("%-13s = %s\n", r, r.FloatString(8)) + } + + // Output: + // 2/1 = 2.00000000 + // 3/1 = 3.00000000 + // 8/3 = 2.66666667 + // 11/4 = 2.75000000 + // 19/7 = 2.71428571 + // 87/32 = 2.71875000 + // 106/39 = 2.71794872 + // 193/71 = 2.71830986 + // 1264/465 = 2.71827957 + // 1457/536 = 2.71828358 + // 2721/1001 = 2.71828172 + // 23225/8544 = 2.71828184 + // 25946/9545 = 2.71828182 + // 49171/18089 = 2.71828183 + // 517656/190435 = 2.71828183 +} diff --git a/src/math/big/example_test.go b/src/math/big/example_test.go new file mode 100644 index 0000000..31ca784 --- /dev/null +++ b/src/math/big/example_test.go @@ -0,0 +1,148 @@ +// Copyright 2012 The Go Authors. All rights reserved. +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. + +package big_test + +import ( + "fmt" + "log" + "math" + "math/big" +) + +func ExampleRat_SetString() { + r := new(big.Rat) + r.SetString("355/113") + fmt.Println(r.FloatString(3)) + // Output: 3.142 +} + +func ExampleInt_SetString() { + i := new(big.Int) + i.SetString("644", 8) // octal + fmt.Println(i) + // Output: 420 +} + +func ExampleFloat_SetString() { + f := new(big.Float) + f.SetString("3.14159") + fmt.Println(f) + // Output: 3.14159 +} + +func ExampleRat_Scan() { + // The Scan function is rarely used directly; + // the fmt package recognizes it as an implementation of fmt.Scanner. + r := new(big.Rat) + _, err := fmt.Sscan("1.5000", r) + if err != nil { + log.Println("error scanning value:", err) + } else { + fmt.Println(r) + } + // Output: 3/2 +} + +func ExampleInt_Scan() { + // The Scan function is rarely used directly; + // the fmt package recognizes it as an implementation of fmt.Scanner. + i := new(big.Int) + _, err := fmt.Sscan("18446744073709551617", i) + if err != nil { + log.Println("error scanning value:", err) + } else { + fmt.Println(i) + } + // Output: 18446744073709551617 +} + +func ExampleFloat_Scan() { + // The Scan function is rarely used directly; + // the fmt package recognizes it as an implementation of fmt.Scanner. + f := new(big.Float) + _, err := fmt.Sscan("1.19282e99", f) + if err != nil { + log.Println("error scanning value:", err) + } else { + fmt.Println(f) + } + // Output: 1.19282e+99 +} + +// This example demonstrates how to use big.Int to compute the smallest +// Fibonacci number with 100 decimal digits and to test whether it is prime. +func Example_fibonacci() { + // Initialize two big ints with the first two numbers in the sequence. + a := big.NewInt(0) + b := big.NewInt(1) + + // Initialize limit as 10^99, the smallest integer with 100 digits. + var limit big.Int + limit.Exp(big.NewInt(10), big.NewInt(99), nil) + + // Loop while a is smaller than 1e100. + for a.Cmp(&limit) < 0 { + // Compute the next Fibonacci number, storing it in a. + a.Add(a, b) + // Swap a and b so that b is the next number in the sequence. + a, b = b, a + } + fmt.Println(a) // 100-digit Fibonacci number + + // Test a for primality. + // (ProbablyPrimes' argument sets the number of Miller-Rabin + // rounds to be performed. 20 is a good value.) + fmt.Println(a.ProbablyPrime(20)) + + // Output: + // 1344719667586153181419716641724567886890850696275767987106294472017884974410332069524504824747437757 + // false +} + +// This example shows how to use big.Float to compute the square root of 2 with +// a precision of 200 bits, and how to print the result as a decimal number. +func Example_sqrt2() { + // We'll do computations with 200 bits of precision in the mantissa. + const prec = 200 + + // Compute the square root of 2 using Newton's Method. We start with + // an initial estimate for sqrt(2), and then iterate: + // x_{n+1} = 1/2 * ( x_n + (2.0 / x_n) ) + + // Since Newton's Method doubles the number of correct digits at each + // iteration, we need at least log_2(prec) steps. + steps := int(math.Log2(prec)) + + // Initialize values we need for the computation. + two := new(big.Float).SetPrec(prec).SetInt64(2) + half := new(big.Float).SetPrec(prec).SetFloat64(0.5) + + // Use 1 as the initial estimate. + x := new(big.Float).SetPrec(prec).SetInt64(1) + + // We use t as a temporary variable. There's no need to set its precision + // since big.Float values with unset (== 0) precision automatically assume + // the largest precision of the arguments when used as the result (receiver) + // of a big.Float operation. + t := new(big.Float) + + // Iterate. + for i := 0; i <= steps; i++ { + t.Quo(two, x) // t = 2.0 / x_n + t.Add(x, t) // t = x_n + (2.0 / x_n) + x.Mul(half, t) // x_{n+1} = 0.5 * t + } + + // We can use the usual fmt.Printf verbs since big.Float implements fmt.Formatter + fmt.Printf("sqrt(2) = %.50f\n", x) + + // Print the error between 2 and x*x. + t.Mul(x, x) // t = x*x + fmt.Printf("error = %e\n", t.Sub(two, t)) + + // Output: + // sqrt(2) = 1.41421356237309504880168872420969807856967187537695 + // error = 0.000000e+00 +} diff --git a/src/math/big/float.go b/src/math/big/float.go new file mode 100644 index 0000000..a8c91a6 --- /dev/null +++ b/src/math/big/float.go @@ -0,0 +1,1732 @@ +// Copyright 2014 The Go Authors. All rights reserved. +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. + +// This file implements multi-precision floating-point numbers. +// Like in the GNU MPFR library (https://www.mpfr.org/), operands +// can be of mixed precision. Unlike MPFR, the rounding mode is +// not specified with each operation, but with each operand. The +// rounding mode of the result operand determines the rounding +// mode of an operation. This is a from-scratch implementation. + +package big + +import ( + "fmt" + "math" + "math/bits" +) + +const debugFloat = false // enable for debugging + +// A nonzero finite Float represents a multi-precision floating point number +// +// sign × mantissa × 2**exponent +// +// with 0.5 <= mantissa < 1.0, and MinExp <= exponent <= MaxExp. +// A Float may also be zero (+0, -0) or infinite (+Inf, -Inf). +// All Floats are ordered, and the ordering of two Floats x and y +// is defined by x.Cmp(y). +// +// Each Float value also has a precision, rounding mode, and accuracy. +// The precision is the maximum number of mantissa bits available to +// represent the value. The rounding mode specifies how a result should +// be rounded to fit into the mantissa bits, and accuracy describes the +// rounding error with respect to the exact result. +// +// Unless specified otherwise, all operations (including setters) that +// specify a *Float variable for the result (usually via the receiver +// with the exception of MantExp), round the numeric result according +// to the precision and rounding mode of the result variable. +// +// If the provided result precision is 0 (see below), it is set to the +// precision of the argument with the largest precision value before any +// rounding takes place, and the rounding mode remains unchanged. Thus, +// uninitialized Floats provided as result arguments will have their +// precision set to a reasonable value determined by the operands, and +// their mode is the zero value for RoundingMode (ToNearestEven). +// +// By setting the desired precision to 24 or 53 and using matching rounding +// mode (typically ToNearestEven), Float operations produce the same results +// as the corresponding float32 or float64 IEEE-754 arithmetic for operands +// that correspond to normal (i.e., not denormal) float32 or float64 numbers. +// Exponent underflow and overflow lead to a 0 or an Infinity for different +// values than IEEE-754 because Float exponents have a much larger range. +// +// The zero (uninitialized) value for a Float is ready to use and represents +// the number +0.0 exactly, with precision 0 and rounding mode ToNearestEven. +// +// Operations always take pointer arguments (*Float) rather +// than Float values, and each unique Float value requires +// its own unique *Float pointer. To "copy" a Float value, +// an existing (or newly allocated) Float must be set to +// a new value using the Float.Set method; shallow copies +// of Floats are not supported and may lead to errors. +type Float struct { + prec uint32 + mode RoundingMode + acc Accuracy + form form + neg bool + mant nat + exp int32 +} + +// An ErrNaN panic is raised by a Float operation that would lead to +// a NaN under IEEE-754 rules. An ErrNaN implements the error interface. +type ErrNaN struct { + msg string +} + +func (err ErrNaN) Error() string { + return err.msg +} + +// NewFloat allocates and returns a new Float set to x, +// with precision 53 and rounding mode ToNearestEven. +// NewFloat panics with ErrNaN if x is a NaN. +func NewFloat(x float64) *Float { + if math.IsNaN(x) { + panic(ErrNaN{"NewFloat(NaN)"}) + } + return new(Float).SetFloat64(x) +} + +// Exponent and precision limits. +const ( + MaxExp = math.MaxInt32 // largest supported exponent + MinExp = math.MinInt32 // smallest supported exponent + MaxPrec = math.MaxUint32 // largest (theoretically) supported precision; likely memory-limited +) + +// Internal representation: The mantissa bits x.mant of a nonzero finite +// Float x are stored in a nat slice long enough to hold up to x.prec bits; +// the slice may (but doesn't have to) be shorter if the mantissa contains +// trailing 0 bits. x.mant is normalized if the msb of x.mant == 1 (i.e., +// the msb is shifted all the way "to the left"). Thus, if the mantissa has +// trailing 0 bits or x.prec is not a multiple of the Word size _W, +// x.mant[0] has trailing zero bits. The msb of the mantissa corresponds +// to the value 0.5; the exponent x.exp shifts the binary point as needed. +// +// A zero or non-finite Float x ignores x.mant and x.exp. +// +// x form neg mant exp +// ---------------------------------------------------------- +// ±0 zero sign - - +// 0 < |x| < +Inf finite sign mantissa exponent +// ±Inf inf sign - - + +// A form value describes the internal representation. +type form byte + +// The form value order is relevant - do not change! +const ( + zero form = iota + finite + inf +) + +// RoundingMode determines how a Float value is rounded to the +// desired precision. Rounding may change the Float value; the +// rounding error is described by the Float's Accuracy. +type RoundingMode byte + +// These constants define supported rounding modes. +const ( + ToNearestEven RoundingMode = iota // == IEEE 754-2008 roundTiesToEven + ToNearestAway // == IEEE 754-2008 roundTiesToAway + ToZero // == IEEE 754-2008 roundTowardZero + AwayFromZero // no IEEE 754-2008 equivalent + ToNegativeInf // == IEEE 754-2008 roundTowardNegative + ToPositiveInf // == IEEE 754-2008 roundTowardPositive +) + +//go:generate stringer -type=RoundingMode + +// Accuracy describes the rounding error produced by the most recent +// operation that generated a Float value, relative to the exact value. +type Accuracy int8 + +// Constants describing the Accuracy of a Float. +const ( + Below Accuracy = -1 + Exact Accuracy = 0 + Above Accuracy = +1 +) + +//go:generate stringer -type=Accuracy + +// SetPrec sets z's precision to prec and returns the (possibly) rounded +// value of z. Rounding occurs according to z's rounding mode if the mantissa +// cannot be represented in prec bits without loss of precision. +// SetPrec(0) maps all finite values to ±0; infinite values remain unchanged. +// If prec > MaxPrec, it is set to MaxPrec. +func (z *Float) SetPrec(prec uint) *Float { + z.acc = Exact // optimistically assume no rounding is needed + + // special case + if prec == 0 { + z.prec = 0 + if z.form == finite { + // truncate z to 0 + z.acc = makeAcc(z.neg) + z.form = zero + } + return z + } + + // general case + if prec > MaxPrec { + prec = MaxPrec + } + old := z.prec + z.prec = uint32(prec) + if z.prec < old { + z.round(0) + } + return z +} + +func makeAcc(above bool) Accuracy { + if above { + return Above + } + return Below +} + +// SetMode sets z's rounding mode to mode and returns an exact z. +// z remains unchanged otherwise. +// z.SetMode(z.Mode()) is a cheap way to set z's accuracy to Exact. +func (z *Float) SetMode(mode RoundingMode) *Float { + z.mode = mode + z.acc = Exact + return z +} + +// Prec returns the mantissa precision of x in bits. +// The result may be 0 for |x| == 0 and |x| == Inf. +func (x *Float) Prec() uint { + return uint(x.prec) +} + +// MinPrec returns the minimum precision required to represent x exactly +// (i.e., the smallest prec before x.SetPrec(prec) would start rounding x). +// The result is 0 for |x| == 0 and |x| == Inf. +func (x *Float) MinPrec() uint { + if x.form != finite { + return 0 + } + return uint(len(x.mant))*_W - x.mant.trailingZeroBits() +} + +// Mode returns the rounding mode of x. +func (x *Float) Mode() RoundingMode { + return x.mode +} + +// Acc returns the accuracy of x produced by the most recent +// operation, unless explicitly documented otherwise by that +// operation. +func (x *Float) Acc() Accuracy { + return x.acc +} + +// Sign returns: +// +// -1 if x < 0 +// 0 if x is ±0 +// +1 if x > 0 +// +func (x *Float) Sign() int { + if debugFloat { + x.validate() + } + if x.form == zero { + return 0 + } + if x.neg { + return -1 + } + return 1 +} + +// MantExp breaks x into its mantissa and exponent components +// and returns the exponent. If a non-nil mant argument is +// provided its value is set to the mantissa of x, with the +// same precision and rounding mode as x. The components +// satisfy x == mant × 2**exp, with 0.5 <= |mant| < 1.0. +// Calling MantExp with a nil argument is an efficient way to +// get the exponent of the receiver. +// +// Special cases are: +// +// ( ±0).MantExp(mant) = 0, with mant set to ±0 +// (±Inf).MantExp(mant) = 0, with mant set to ±Inf +// +// x and mant may be the same in which case x is set to its +// mantissa value. +func (x *Float) MantExp(mant *Float) (exp int) { + if debugFloat { + x.validate() + } + if x.form == finite { + exp = int(x.exp) + } + if mant != nil { + mant.Copy(x) + if mant.form == finite { + mant.exp = 0 + } + } + return +} + +func (z *Float) setExpAndRound(exp int64, sbit uint) { + if exp < MinExp { + // underflow + z.acc = makeAcc(z.neg) + z.form = zero + return + } + + if exp > MaxExp { + // overflow + z.acc = makeAcc(!z.neg) + z.form = inf + return + } + + z.form = finite + z.exp = int32(exp) + z.round(sbit) +} + +// SetMantExp sets z to mant × 2**exp and returns z. +// The result z has the same precision and rounding mode +// as mant. SetMantExp is an inverse of MantExp but does +// not require 0.5 <= |mant| < 1.0. Specifically, for a +// given x of type *Float, SetMantExp relates to MantExp +// as follows: +// +// mant := new(Float) +// new(Float).SetMantExp(mant, x.MantExp(mant)).Cmp(x) == 0 +// +// Special cases are: +// +// z.SetMantExp( ±0, exp) = ±0 +// z.SetMantExp(±Inf, exp) = ±Inf +// +// z and mant may be the same in which case z's exponent +// is set to exp. +func (z *Float) SetMantExp(mant *Float, exp int) *Float { + if debugFloat { + z.validate() + mant.validate() + } + z.Copy(mant) + + if z.form == finite { + // 0 < |mant| < +Inf + z.setExpAndRound(int64(z.exp)+int64(exp), 0) + } + return z +} + +// Signbit reports whether x is negative or negative zero. +func (x *Float) Signbit() bool { + return x.neg +} + +// IsInf reports whether x is +Inf or -Inf. +func (x *Float) IsInf() bool { + return x.form == inf +} + +// IsInt reports whether x is an integer. +// ±Inf values are not integers. +func (x *Float) IsInt() bool { + if debugFloat { + x.validate() + } + // special cases + if x.form != finite { + return x.form == zero + } + // x.form == finite + if x.exp <= 0 { + return false + } + // x.exp > 0 + return x.prec <= uint32(x.exp) || x.MinPrec() <= uint(x.exp) // not enough bits for fractional mantissa +} + +// debugging support +func (x *Float) validate() { + if !debugFloat { + // avoid performance bugs + panic("validate called but debugFloat is not set") + } + if x.form != finite { + return + } + m := len(x.mant) + if m == 0 { + panic("nonzero finite number with empty mantissa") + } + const msb = 1 << (_W - 1) + if x.mant[m-1]&msb == 0 { + panic(fmt.Sprintf("msb not set in last word %#x of %s", x.mant[m-1], x.Text('p', 0))) + } + if x.prec == 0 { + panic("zero precision finite number") + } +} + +// round rounds z according to z.mode to z.prec bits and sets z.acc accordingly. +// sbit must be 0 or 1 and summarizes any "sticky bit" information one might +// have before calling round. z's mantissa must be normalized (with the msb set) +// or empty. +// +// CAUTION: The rounding modes ToNegativeInf, ToPositiveInf are affected by the +// sign of z. For correct rounding, the sign of z must be set correctly before +// calling round. +func (z *Float) round(sbit uint) { + if debugFloat { + z.validate() + } + + z.acc = Exact + if z.form != finite { + // ±0 or ±Inf => nothing left to do + return + } + // z.form == finite && len(z.mant) > 0 + // m > 0 implies z.prec > 0 (checked by validate) + + m := uint32(len(z.mant)) // present mantissa length in words + bits := m * _W // present mantissa bits; bits > 0 + if bits <= z.prec { + // mantissa fits => nothing to do + return + } + // bits > z.prec + + // Rounding is based on two bits: the rounding bit (rbit) and the + // sticky bit (sbit). The rbit is the bit immediately before the + // z.prec leading mantissa bits (the "0.5"). The sbit is set if any + // of the bits before the rbit are set (the "0.25", "0.125", etc.): + // + // rbit sbit => "fractional part" + // + // 0 0 == 0 + // 0 1 > 0 , < 0.5 + // 1 0 == 0.5 + // 1 1 > 0.5, < 1.0 + + // bits > z.prec: mantissa too large => round + r := uint(bits - z.prec - 1) // rounding bit position; r >= 0 + rbit := z.mant.bit(r) & 1 // rounding bit; be safe and ensure it's a single bit + // The sticky bit is only needed for rounding ToNearestEven + // or when the rounding bit is zero. Avoid computation otherwise. + if sbit == 0 && (rbit == 0 || z.mode == ToNearestEven) { + sbit = z.mant.sticky(r) + } + sbit &= 1 // be safe and ensure it's a single bit + + // cut off extra words + n := (z.prec + (_W - 1)) / _W // mantissa length in words for desired precision + if m > n { + copy(z.mant, z.mant[m-n:]) // move n last words to front + z.mant = z.mant[:n] + } + + // determine number of trailing zero bits (ntz) and compute lsb mask of mantissa's least-significant word + ntz := n*_W - z.prec // 0 <= ntz < _W + lsb := Word(1) << ntz + + // round if result is inexact + if rbit|sbit != 0 { + // Make rounding decision: The result mantissa is truncated ("rounded down") + // by default. Decide if we need to increment, or "round up", the (unsigned) + // mantissa. + inc := false + switch z.mode { + case ToNegativeInf: + inc = z.neg + case ToZero: + // nothing to do + case ToNearestEven: + inc = rbit != 0 && (sbit != 0 || z.mant[0]&lsb != 0) + case ToNearestAway: + inc = rbit != 0 + case AwayFromZero: + inc = true + case ToPositiveInf: + inc = !z.neg + default: + panic("unreachable") + } + + // A positive result (!z.neg) is Above the exact result if we increment, + // and it's Below if we truncate (Exact results require no rounding). + // For a negative result (z.neg) it is exactly the opposite. + z.acc = makeAcc(inc != z.neg) + + if inc { + // add 1 to mantissa + if addVW(z.mant, z.mant, lsb) != 0 { + // mantissa overflow => adjust exponent + if z.exp >= MaxExp { + // exponent overflow + z.form = inf + return + } + z.exp++ + // adjust mantissa: divide by 2 to compensate for exponent adjustment + shrVU(z.mant, z.mant, 1) + // set msb == carry == 1 from the mantissa overflow above + const msb = 1 << (_W - 1) + z.mant[n-1] |= msb + } + } + } + + // zero out trailing bits in least-significant word + z.mant[0] &^= lsb - 1 + + if debugFloat { + z.validate() + } +} + +func (z *Float) setBits64(neg bool, x uint64) *Float { + if z.prec == 0 { + z.prec = 64 + } + z.acc = Exact + z.neg = neg + if x == 0 { + z.form = zero + return z + } + // x != 0 + z.form = finite + s := bits.LeadingZeros64(x) + z.mant = z.mant.setUint64(x << uint(s)) + z.exp = int32(64 - s) // always fits + if z.prec < 64 { + z.round(0) + } + return z +} + +// SetUint64 sets z to the (possibly rounded) value of x and returns z. +// If z's precision is 0, it is changed to 64 (and rounding will have +// no effect). +func (z *Float) SetUint64(x uint64) *Float { + return z.setBits64(false, x) +} + +// SetInt64 sets z to the (possibly rounded) value of x and returns z. +// If z's precision is 0, it is changed to 64 (and rounding will have +// no effect). +func (z *Float) SetInt64(x int64) *Float { + u := x + if u < 0 { + u = -u + } + // We cannot simply call z.SetUint64(uint64(u)) and change + // the sign afterwards because the sign affects rounding. + return z.setBits64(x < 0, uint64(u)) +} + +// SetFloat64 sets z to the (possibly rounded) value of x and returns z. +// If z's precision is 0, it is changed to 53 (and rounding will have +// no effect). SetFloat64 panics with ErrNaN if x is a NaN. +func (z *Float) SetFloat64(x float64) *Float { + if z.prec == 0 { + z.prec = 53 + } + if math.IsNaN(x) { + panic(ErrNaN{"Float.SetFloat64(NaN)"}) + } + z.acc = Exact + z.neg = math.Signbit(x) // handle -0, -Inf correctly + if x == 0 { + z.form = zero + return z + } + if math.IsInf(x, 0) { + z.form = inf + return z + } + // normalized x != 0 + z.form = finite + fmant, exp := math.Frexp(x) // get normalized mantissa + z.mant = z.mant.setUint64(1<<63 | math.Float64bits(fmant)<<11) + z.exp = int32(exp) // always fits + if z.prec < 53 { + z.round(0) + } + return z +} + +// fnorm normalizes mantissa m by shifting it to the left +// such that the msb of the most-significant word (msw) is 1. +// It returns the shift amount. It assumes that len(m) != 0. +func fnorm(m nat) int64 { + if debugFloat && (len(m) == 0 || m[len(m)-1] == 0) { + panic("msw of mantissa is 0") + } + s := nlz(m[len(m)-1]) + if s > 0 { + c := shlVU(m, m, s) + if debugFloat && c != 0 { + panic("nlz or shlVU incorrect") + } + } + return int64(s) +} + +// SetInt sets z to the (possibly rounded) value of x and returns z. +// If z's precision is 0, it is changed to the larger of x.BitLen() +// or 64 (and rounding will have no effect). +func (z *Float) SetInt(x *Int) *Float { + // TODO(gri) can be more efficient if z.prec > 0 + // but small compared to the size of x, or if there + // are many trailing 0's. + bits := uint32(x.BitLen()) + if z.prec == 0 { + z.prec = umax32(bits, 64) + } + z.acc = Exact + z.neg = x.neg + if len(x.abs) == 0 { + z.form = zero + return z + } + // x != 0 + z.mant = z.mant.set(x.abs) + fnorm(z.mant) + z.setExpAndRound(int64(bits), 0) + return z +} + +// SetRat sets z to the (possibly rounded) value of x and returns z. +// If z's precision is 0, it is changed to the largest of a.BitLen(), +// b.BitLen(), or 64; with x = a/b. +func (z *Float) SetRat(x *Rat) *Float { + if x.IsInt() { + return z.SetInt(x.Num()) + } + var a, b Float + a.SetInt(x.Num()) + b.SetInt(x.Denom()) + if z.prec == 0 { + z.prec = umax32(a.prec, b.prec) + } + return z.Quo(&a, &b) +} + +// SetInf sets z to the infinite Float -Inf if signbit is +// set, or +Inf if signbit is not set, and returns z. The +// precision of z is unchanged and the result is always +// Exact. +func (z *Float) SetInf(signbit bool) *Float { + z.acc = Exact + z.form = inf + z.neg = signbit + return z +} + +// Set sets z to the (possibly rounded) value of x and returns z. +// If z's precision is 0, it is changed to the precision of x +// before setting z (and rounding will have no effect). +// Rounding is performed according to z's precision and rounding +// mode; and z's accuracy reports the result error relative to the +// exact (not rounded) result. +func (z *Float) Set(x *Float) *Float { + if debugFloat { + x.validate() + } + z.acc = Exact + if z != x { + z.form = x.form + z.neg = x.neg + if x.form == finite { + z.exp = x.exp + z.mant = z.mant.set(x.mant) + } + if z.prec == 0 { + z.prec = x.prec + } else if z.prec < x.prec { + z.round(0) + } + } + return z +} + +// Copy sets z to x, with the same precision, rounding mode, and +// accuracy as x, and returns z. x is not changed even if z and +// x are the same. +func (z *Float) Copy(x *Float) *Float { + if debugFloat { + x.validate() + } + if z != x { + z.prec = x.prec + z.mode = x.mode + z.acc = x.acc + z.form = x.form + z.neg = x.neg + if z.form == finite { + z.mant = z.mant.set(x.mant) + z.exp = x.exp + } + } + return z +} + +// msb32 returns the 32 most significant bits of x. +func msb32(x nat) uint32 { + i := len(x) - 1 + if i < 0 { + return 0 + } + if debugFloat && x[i]&(1<<(_W-1)) == 0 { + panic("x not normalized") + } + switch _W { + case 32: + return uint32(x[i]) + case 64: + return uint32(x[i] >> 32) + } + panic("unreachable") +} + +// msb64 returns the 64 most significant bits of x. +func msb64(x nat) uint64 { + i := len(x) - 1 + if i < 0 { + return 0 + } + if debugFloat && x[i]&(1<<(_W-1)) == 0 { + panic("x not normalized") + } + switch _W { + case 32: + v := uint64(x[i]) << 32 + if i > 0 { + v |= uint64(x[i-1]) + } + return v + case 64: + return uint64(x[i]) + } + panic("unreachable") +} + +// Uint64 returns the unsigned integer resulting from truncating x +// towards zero. If 0 <= x <= math.MaxUint64, the result is Exact +// if x is an integer and Below otherwise. +// The result is (0, Above) for x < 0, and (math.MaxUint64, Below) +// for x > math.MaxUint64. +func (x *Float) Uint64() (uint64, Accuracy) { + if debugFloat { + x.validate() + } + + switch x.form { + case finite: + if x.neg { + return 0, Above + } + // 0 < x < +Inf + if x.exp <= 0 { + // 0 < x < 1 + return 0, Below + } + // 1 <= x < Inf + if x.exp <= 64 { + // u = trunc(x) fits into a uint64 + u := msb64(x.mant) >> (64 - uint32(x.exp)) + if x.MinPrec() <= 64 { + return u, Exact + } + return u, Below // x truncated + } + // x too large + return math.MaxUint64, Below + + case zero: + return 0, Exact + + case inf: + if x.neg { + return 0, Above + } + return math.MaxUint64, Below + } + + panic("unreachable") +} + +// Int64 returns the integer resulting from truncating x towards zero. +// If math.MinInt64 <= x <= math.MaxInt64, the result is Exact if x is +// an integer, and Above (x < 0) or Below (x > 0) otherwise. +// The result is (math.MinInt64, Above) for x < math.MinInt64, +// and (math.MaxInt64, Below) for x > math.MaxInt64. +func (x *Float) Int64() (int64, Accuracy) { + if debugFloat { + x.validate() + } + + switch x.form { + case finite: + // 0 < |x| < +Inf + acc := makeAcc(x.neg) + if x.exp <= 0 { + // 0 < |x| < 1 + return 0, acc + } + // x.exp > 0 + + // 1 <= |x| < +Inf + if x.exp <= 63 { + // i = trunc(x) fits into an int64 (excluding math.MinInt64) + i := int64(msb64(x.mant) >> (64 - uint32(x.exp))) + if x.neg { + i = -i + } + if x.MinPrec() <= uint(x.exp) { + return i, Exact + } + return i, acc // x truncated + } + if x.neg { + // check for special case x == math.MinInt64 (i.e., x == -(0.5 << 64)) + if x.exp == 64 && x.MinPrec() == 1 { + acc = Exact + } + return math.MinInt64, acc + } + // x too large + return math.MaxInt64, Below + + case zero: + return 0, Exact + + case inf: + if x.neg { + return math.MinInt64, Above + } + return math.MaxInt64, Below + } + + panic("unreachable") +} + +// Float32 returns the float32 value nearest to x. If x is too small to be +// represented by a float32 (|x| < math.SmallestNonzeroFloat32), the result +// is (0, Below) or (-0, Above), respectively, depending on the sign of x. +// If x is too large to be represented by a float32 (|x| > math.MaxFloat32), +// the result is (+Inf, Above) or (-Inf, Below), depending on the sign of x. +func (x *Float) Float32() (float32, Accuracy) { + if debugFloat { + x.validate() + } + + switch x.form { + case finite: + // 0 < |x| < +Inf + + const ( + fbits = 32 // float size + mbits = 23 // mantissa size (excluding implicit msb) + ebits = fbits - mbits - 1 // 8 exponent size + bias = 1<<(ebits-1) - 1 // 127 exponent bias + dmin = 1 - bias - mbits // -149 smallest unbiased exponent (denormal) + emin = 1 - bias // -126 smallest unbiased exponent (normal) + emax = bias // 127 largest unbiased exponent (normal) + ) + + // Float mantissa m is 0.5 <= m < 1.0; compute exponent e for float32 mantissa. + e := x.exp - 1 // exponent for normal mantissa m with 1.0 <= m < 2.0 + + // Compute precision p for float32 mantissa. + // If the exponent is too small, we have a denormal number before + // rounding and fewer than p mantissa bits of precision available + // (the exponent remains fixed but the mantissa gets shifted right). + p := mbits + 1 // precision of normal float + if e < emin { + // recompute precision + p = mbits + 1 - emin + int(e) + // If p == 0, the mantissa of x is shifted so much to the right + // that its msb falls immediately to the right of the float32 + // mantissa space. In other words, if the smallest denormal is + // considered "1.0", for p == 0, the mantissa value m is >= 0.5. + // If m > 0.5, it is rounded up to 1.0; i.e., the smallest denormal. + // If m == 0.5, it is rounded down to even, i.e., 0.0. + // If p < 0, the mantissa value m is <= "0.25" which is never rounded up. + if p < 0 /* m <= 0.25 */ || p == 0 && x.mant.sticky(uint(len(x.mant))*_W-1) == 0 /* m == 0.5 */ { + // underflow to ±0 + if x.neg { + var z float32 + return -z, Above + } + return 0.0, Below + } + // otherwise, round up + // We handle p == 0 explicitly because it's easy and because + // Float.round doesn't support rounding to 0 bits of precision. + if p == 0 { + if x.neg { + return -math.SmallestNonzeroFloat32, Below + } + return math.SmallestNonzeroFloat32, Above + } + } + // p > 0 + + // round + var r Float + r.prec = uint32(p) + r.Set(x) + e = r.exp - 1 + + // Rounding may have caused r to overflow to ±Inf + // (rounding never causes underflows to 0). + // If the exponent is too large, also overflow to ±Inf. + if r.form == inf || e > emax { + // overflow + if x.neg { + return float32(math.Inf(-1)), Below + } + return float32(math.Inf(+1)), Above + } + // e <= emax + + // Determine sign, biased exponent, and mantissa. + var sign, bexp, mant uint32 + if x.neg { + sign = 1 << (fbits - 1) + } + + // Rounding may have caused a denormal number to + // become normal. Check again. + if e < emin { + // denormal number: recompute precision + // Since rounding may have at best increased precision + // and we have eliminated p <= 0 early, we know p > 0. + // bexp == 0 for denormals + p = mbits + 1 - emin + int(e) + mant = msb32(r.mant) >> uint(fbits-p) + } else { + // normal number: emin <= e <= emax + bexp = uint32(e+bias) << mbits + mant = msb32(r.mant) >> ebits & (1<<mbits - 1) // cut off msb (implicit 1 bit) + } + + return math.Float32frombits(sign | bexp | mant), r.acc + + case zero: + if x.neg { + var z float32 + return -z, Exact + } + return 0.0, Exact + + case inf: + if x.neg { + return float32(math.Inf(-1)), Exact + } + return float32(math.Inf(+1)), Exact + } + + panic("unreachable") +} + +// Float64 returns the float64 value nearest to x. If x is too small to be +// represented by a float64 (|x| < math.SmallestNonzeroFloat64), the result +// is (0, Below) or (-0, Above), respectively, depending on the sign of x. +// If x is too large to be represented by a float64 (|x| > math.MaxFloat64), +// the result is (+Inf, Above) or (-Inf, Below), depending on the sign of x. +func (x *Float) Float64() (float64, Accuracy) { + if debugFloat { + x.validate() + } + + switch x.form { + case finite: + // 0 < |x| < +Inf + + const ( + fbits = 64 // float size + mbits = 52 // mantissa size (excluding implicit msb) + ebits = fbits - mbits - 1 // 11 exponent size + bias = 1<<(ebits-1) - 1 // 1023 exponent bias + dmin = 1 - bias - mbits // -1074 smallest unbiased exponent (denormal) + emin = 1 - bias // -1022 smallest unbiased exponent (normal) + emax = bias // 1023 largest unbiased exponent (normal) + ) + + // Float mantissa m is 0.5 <= m < 1.0; compute exponent e for float64 mantissa. + e := x.exp - 1 // exponent for normal mantissa m with 1.0 <= m < 2.0 + + // Compute precision p for float64 mantissa. + // If the exponent is too small, we have a denormal number before + // rounding and fewer than p mantissa bits of precision available + // (the exponent remains fixed but the mantissa gets shifted right). + p := mbits + 1 // precision of normal float + if e < emin { + // recompute precision + p = mbits + 1 - emin + int(e) + // If p == 0, the mantissa of x is shifted so much to the right + // that its msb falls immediately to the right of the float64 + // mantissa space. In other words, if the smallest denormal is + // considered "1.0", for p == 0, the mantissa value m is >= 0.5. + // If m > 0.5, it is rounded up to 1.0; i.e., the smallest denormal. + // If m == 0.5, it is rounded down to even, i.e., 0.0. + // If p < 0, the mantissa value m is <= "0.25" which is never rounded up. + if p < 0 /* m <= 0.25 */ || p == 0 && x.mant.sticky(uint(len(x.mant))*_W-1) == 0 /* m == 0.5 */ { + // underflow to ±0 + if x.neg { + var z float64 + return -z, Above + } + return 0.0, Below + } + // otherwise, round up + // We handle p == 0 explicitly because it's easy and because + // Float.round doesn't support rounding to 0 bits of precision. + if p == 0 { + if x.neg { + return -math.SmallestNonzeroFloat64, Below + } + return math.SmallestNonzeroFloat64, Above + } + } + // p > 0 + + // round + var r Float + r.prec = uint32(p) + r.Set(x) + e = r.exp - 1 + + // Rounding may have caused r to overflow to ±Inf + // (rounding never causes underflows to 0). + // If the exponent is too large, also overflow to ±Inf. + if r.form == inf || e > emax { + // overflow + if x.neg { + return math.Inf(-1), Below + } + return math.Inf(+1), Above + } + // e <= emax + + // Determine sign, biased exponent, and mantissa. + var sign, bexp, mant uint64 + if x.neg { + sign = 1 << (fbits - 1) + } + + // Rounding may have caused a denormal number to + // become normal. Check again. + if e < emin { + // denormal number: recompute precision + // Since rounding may have at best increased precision + // and we have eliminated p <= 0 early, we know p > 0. + // bexp == 0 for denormals + p = mbits + 1 - emin + int(e) + mant = msb64(r.mant) >> uint(fbits-p) + } else { + // normal number: emin <= e <= emax + bexp = uint64(e+bias) << mbits + mant = msb64(r.mant) >> ebits & (1<<mbits - 1) // cut off msb (implicit 1 bit) + } + + return math.Float64frombits(sign | bexp | mant), r.acc + + case zero: + if x.neg { + var z float64 + return -z, Exact + } + return 0.0, Exact + + case inf: + if x.neg { + return math.Inf(-1), Exact + } + return math.Inf(+1), Exact + } + + panic("unreachable") +} + +// Int returns the result of truncating x towards zero; +// or nil if x is an infinity. +// The result is Exact if x.IsInt(); otherwise it is Below +// for x > 0, and Above for x < 0. +// If a non-nil *Int argument z is provided, Int stores +// the result in z instead of allocating a new Int. +func (x *Float) Int(z *Int) (*Int, Accuracy) { + if debugFloat { + x.validate() + } + + if z == nil && x.form <= finite { + z = new(Int) + } + + switch x.form { + case finite: + // 0 < |x| < +Inf + acc := makeAcc(x.neg) + if x.exp <= 0 { + // 0 < |x| < 1 + return z.SetInt64(0), acc + } + // x.exp > 0 + + // 1 <= |x| < +Inf + // determine minimum required precision for x + allBits := uint(len(x.mant)) * _W + exp := uint(x.exp) + if x.MinPrec() <= exp { + acc = Exact + } + // shift mantissa as needed + if z == nil { + z = new(Int) + } + z.neg = x.neg + switch { + case exp > allBits: + z.abs = z.abs.shl(x.mant, exp-allBits) + default: + z.abs = z.abs.set(x.mant) + case exp < allBits: + z.abs = z.abs.shr(x.mant, allBits-exp) + } + return z, acc + + case zero: + return z.SetInt64(0), Exact + + case inf: + return nil, makeAcc(x.neg) + } + + panic("unreachable") +} + +// Rat returns the rational number corresponding to x; +// or nil if x is an infinity. +// The result is Exact if x is not an Inf. +// If a non-nil *Rat argument z is provided, Rat stores +// the result in z instead of allocating a new Rat. +func (x *Float) Rat(z *Rat) (*Rat, Accuracy) { + if debugFloat { + x.validate() + } + + if z == nil && x.form <= finite { + z = new(Rat) + } + + switch x.form { + case finite: + // 0 < |x| < +Inf + allBits := int32(len(x.mant)) * _W + // build up numerator and denominator + z.a.neg = x.neg + switch { + case x.exp > allBits: + z.a.abs = z.a.abs.shl(x.mant, uint(x.exp-allBits)) + z.b.abs = z.b.abs[:0] // == 1 (see Rat) + // z already in normal form + default: + z.a.abs = z.a.abs.set(x.mant) + z.b.abs = z.b.abs[:0] // == 1 (see Rat) + // z already in normal form + case x.exp < allBits: + z.a.abs = z.a.abs.set(x.mant) + t := z.b.abs.setUint64(1) + z.b.abs = t.shl(t, uint(allBits-x.exp)) + z.norm() + } + return z, Exact + + case zero: + return z.SetInt64(0), Exact + + case inf: + return nil, makeAcc(x.neg) + } + + panic("unreachable") +} + +// Abs sets z to the (possibly rounded) value |x| (the absolute value of x) +// and returns z. +func (z *Float) Abs(x *Float) *Float { + z.Set(x) + z.neg = false + return z +} + +// Neg sets z to the (possibly rounded) value of x with its sign negated, +// and returns z. +func (z *Float) Neg(x *Float) *Float { + z.Set(x) + z.neg = !z.neg + return z +} + +func validateBinaryOperands(x, y *Float) { + if !debugFloat { + // avoid performance bugs + panic("validateBinaryOperands called but debugFloat is not set") + } + if len(x.mant) == 0 { + panic("empty mantissa for x") + } + if len(y.mant) == 0 { + panic("empty mantissa for y") + } +} + +// z = x + y, ignoring signs of x and y for the addition +// but using the sign of z for rounding the result. +// x and y must have a non-empty mantissa and valid exponent. +func (z *Float) uadd(x, y *Float) { + // Note: This implementation requires 2 shifts most of the + // time. It is also inefficient if exponents or precisions + // differ by wide margins. The following article describes + // an efficient (but much more complicated) implementation + // compatible with the internal representation used here: + // + // Vincent Lefèvre: "The Generic Multiple-Precision Floating- + // Point Addition With Exact Rounding (as in the MPFR Library)" + // http://www.vinc17.net/research/papers/rnc6.pdf + + if debugFloat { + validateBinaryOperands(x, y) + } + + // compute exponents ex, ey for mantissa with "binary point" + // on the right (mantissa.0) - use int64 to avoid overflow + ex := int64(x.exp) - int64(len(x.mant))*_W + ey := int64(y.exp) - int64(len(y.mant))*_W + + al := alias(z.mant, x.mant) || alias(z.mant, y.mant) + + // TODO(gri) having a combined add-and-shift primitive + // could make this code significantly faster + switch { + case ex < ey: + if al { + t := nat(nil).shl(y.mant, uint(ey-ex)) + z.mant = z.mant.add(x.mant, t) + } else { + z.mant = z.mant.shl(y.mant, uint(ey-ex)) + z.mant = z.mant.add(x.mant, z.mant) + } + default: + // ex == ey, no shift needed + z.mant = z.mant.add(x.mant, y.mant) + case ex > ey: + if al { + t := nat(nil).shl(x.mant, uint(ex-ey)) + z.mant = z.mant.add(t, y.mant) + } else { + z.mant = z.mant.shl(x.mant, uint(ex-ey)) + z.mant = z.mant.add(z.mant, y.mant) + } + ex = ey + } + // len(z.mant) > 0 + + z.setExpAndRound(ex+int64(len(z.mant))*_W-fnorm(z.mant), 0) +} + +// z = x - y for |x| > |y|, ignoring signs of x and y for the subtraction +// but using the sign of z for rounding the result. +// x and y must have a non-empty mantissa and valid exponent. +func (z *Float) usub(x, y *Float) { + // This code is symmetric to uadd. + // We have not factored the common code out because + // eventually uadd (and usub) should be optimized + // by special-casing, and the code will diverge. + + if debugFloat { + validateBinaryOperands(x, y) + } + + ex := int64(x.exp) - int64(len(x.mant))*_W + ey := int64(y.exp) - int64(len(y.mant))*_W + + al := alias(z.mant, x.mant) || alias(z.mant, y.mant) + + switch { + case ex < ey: + if al { + t := nat(nil).shl(y.mant, uint(ey-ex)) + z.mant = t.sub(x.mant, t) + } else { + z.mant = z.mant.shl(y.mant, uint(ey-ex)) + z.mant = z.mant.sub(x.mant, z.mant) + } + default: + // ex == ey, no shift needed + z.mant = z.mant.sub(x.mant, y.mant) + case ex > ey: + if al { + t := nat(nil).shl(x.mant, uint(ex-ey)) + z.mant = t.sub(t, y.mant) + } else { + z.mant = z.mant.shl(x.mant, uint(ex-ey)) + z.mant = z.mant.sub(z.mant, y.mant) + } + ex = ey + } + + // operands may have canceled each other out + if len(z.mant) == 0 { + z.acc = Exact + z.form = zero + z.neg = false + return + } + // len(z.mant) > 0 + + z.setExpAndRound(ex+int64(len(z.mant))*_W-fnorm(z.mant), 0) +} + +// z = x * y, ignoring signs of x and y for the multiplication +// but using the sign of z for rounding the result. +// x and y must have a non-empty mantissa and valid exponent. +func (z *Float) umul(x, y *Float) { + if debugFloat { + validateBinaryOperands(x, y) + } + + // Note: This is doing too much work if the precision + // of z is less than the sum of the precisions of x + // and y which is often the case (e.g., if all floats + // have the same precision). + // TODO(gri) Optimize this for the common case. + + e := int64(x.exp) + int64(y.exp) + if x == y { + z.mant = z.mant.sqr(x.mant) + } else { + z.mant = z.mant.mul(x.mant, y.mant) + } + z.setExpAndRound(e-fnorm(z.mant), 0) +} + +// z = x / y, ignoring signs of x and y for the division +// but using the sign of z for rounding the result. +// x and y must have a non-empty mantissa and valid exponent. +func (z *Float) uquo(x, y *Float) { + if debugFloat { + validateBinaryOperands(x, y) + } + + // mantissa length in words for desired result precision + 1 + // (at least one extra bit so we get the rounding bit after + // the division) + n := int(z.prec/_W) + 1 + + // compute adjusted x.mant such that we get enough result precision + xadj := x.mant + if d := n - len(x.mant) + len(y.mant); d > 0 { + // d extra words needed => add d "0 digits" to x + xadj = make(nat, len(x.mant)+d) + copy(xadj[d:], x.mant) + } + // TODO(gri): If we have too many digits (d < 0), we should be able + // to shorten x for faster division. But we must be extra careful + // with rounding in that case. + + // Compute d before division since there may be aliasing of x.mant + // (via xadj) or y.mant with z.mant. + d := len(xadj) - len(y.mant) + + // divide + var r nat + z.mant, r = z.mant.div(nil, xadj, y.mant) + e := int64(x.exp) - int64(y.exp) - int64(d-len(z.mant))*_W + + // The result is long enough to include (at least) the rounding bit. + // If there's a non-zero remainder, the corresponding fractional part + // (if it were computed), would have a non-zero sticky bit (if it were + // zero, it couldn't have a non-zero remainder). + var sbit uint + if len(r) > 0 { + sbit = 1 + } + + z.setExpAndRound(e-fnorm(z.mant), sbit) +} + +// ucmp returns -1, 0, or +1, depending on whether +// |x| < |y|, |x| == |y|, or |x| > |y|. +// x and y must have a non-empty mantissa and valid exponent. +func (x *Float) ucmp(y *Float) int { + if debugFloat { + validateBinaryOperands(x, y) + } + + switch { + case x.exp < y.exp: + return -1 + case x.exp > y.exp: + return +1 + } + // x.exp == y.exp + + // compare mantissas + i := len(x.mant) + j := len(y.mant) + for i > 0 || j > 0 { + var xm, ym Word + if i > 0 { + i-- + xm = x.mant[i] + } + if j > 0 { + j-- + ym = y.mant[j] + } + switch { + case xm < ym: + return -1 + case xm > ym: + return +1 + } + } + + return 0 +} + +// Handling of sign bit as defined by IEEE 754-2008, section 6.3: +// +// When neither the inputs nor result are NaN, the sign of a product or +// quotient is the exclusive OR of the operands’ signs; the sign of a sum, +// or of a difference x−y regarded as a sum x+(−y), differs from at most +// one of the addends’ signs; and the sign of the result of conversions, +// the quantize operation, the roundToIntegral operations, and the +// roundToIntegralExact (see 5.3.1) is the sign of the first or only operand. +// These rules shall apply even when operands or results are zero or infinite. +// +// When the sum of two operands with opposite signs (or the difference of +// two operands with like signs) is exactly zero, the sign of that sum (or +// difference) shall be +0 in all rounding-direction attributes except +// roundTowardNegative; under that attribute, the sign of an exact zero +// sum (or difference) shall be −0. However, x+x = x−(−x) retains the same +// sign as x even when x is zero. +// +// See also: https://play.golang.org/p/RtH3UCt5IH + +// Add sets z to the rounded sum x+y and returns z. If z's precision is 0, +// it is changed to the larger of x's or y's precision before the operation. +// Rounding is performed according to z's precision and rounding mode; and +// z's accuracy reports the result error relative to the exact (not rounded) +// result. Add panics with ErrNaN if x and y are infinities with opposite +// signs. The value of z is undefined in that case. +func (z *Float) Add(x, y *Float) *Float { + if debugFloat { + x.validate() + y.validate() + } + + if z.prec == 0 { + z.prec = umax32(x.prec, y.prec) + } + + if x.form == finite && y.form == finite { + // x + y (common case) + + // Below we set z.neg = x.neg, and when z aliases y this will + // change the y operand's sign. This is fine, because if an + // operand aliases the receiver it'll be overwritten, but we still + // want the original x.neg and y.neg values when we evaluate + // x.neg != y.neg, so we need to save y.neg before setting z.neg. + yneg := y.neg + + z.neg = x.neg + if x.neg == yneg { + // x + y == x + y + // (-x) + (-y) == -(x + y) + z.uadd(x, y) + } else { + // x + (-y) == x - y == -(y - x) + // (-x) + y == y - x == -(x - y) + if x.ucmp(y) > 0 { + z.usub(x, y) + } else { + z.neg = !z.neg + z.usub(y, x) + } + } + if z.form == zero && z.mode == ToNegativeInf && z.acc == Exact { + z.neg = true + } + return z + } + + if x.form == inf && y.form == inf && x.neg != y.neg { + // +Inf + -Inf + // -Inf + +Inf + // value of z is undefined but make sure it's valid + z.acc = Exact + z.form = zero + z.neg = false + panic(ErrNaN{"addition of infinities with opposite signs"}) + } + + if x.form == zero && y.form == zero { + // ±0 + ±0 + z.acc = Exact + z.form = zero + z.neg = x.neg && y.neg // -0 + -0 == -0 + return z + } + + if x.form == inf || y.form == zero { + // ±Inf + y + // x + ±0 + return z.Set(x) + } + + // ±0 + y + // x + ±Inf + return z.Set(y) +} + +// Sub sets z to the rounded difference x-y and returns z. +// Precision, rounding, and accuracy reporting are as for Add. +// Sub panics with ErrNaN if x and y are infinities with equal +// signs. The value of z is undefined in that case. +func (z *Float) Sub(x, y *Float) *Float { + if debugFloat { + x.validate() + y.validate() + } + + if z.prec == 0 { + z.prec = umax32(x.prec, y.prec) + } + + if x.form == finite && y.form == finite { + // x - y (common case) + yneg := y.neg + z.neg = x.neg + if x.neg != yneg { + // x - (-y) == x + y + // (-x) - y == -(x + y) + z.uadd(x, y) + } else { + // x - y == x - y == -(y - x) + // (-x) - (-y) == y - x == -(x - y) + if x.ucmp(y) > 0 { + z.usub(x, y) + } else { + z.neg = !z.neg + z.usub(y, x) + } + } + if z.form == zero && z.mode == ToNegativeInf && z.acc == Exact { + z.neg = true + } + return z + } + + if x.form == inf && y.form == inf && x.neg == y.neg { + // +Inf - +Inf + // -Inf - -Inf + // value of z is undefined but make sure it's valid + z.acc = Exact + z.form = zero + z.neg = false + panic(ErrNaN{"subtraction of infinities with equal signs"}) + } + + if x.form == zero && y.form == zero { + // ±0 - ±0 + z.acc = Exact + z.form = zero + z.neg = x.neg && !y.neg // -0 - +0 == -0 + return z + } + + if x.form == inf || y.form == zero { + // ±Inf - y + // x - ±0 + return z.Set(x) + } + + // ±0 - y + // x - ±Inf + return z.Neg(y) +} + +// Mul sets z to the rounded product x*y and returns z. +// Precision, rounding, and accuracy reporting are as for Add. +// Mul panics with ErrNaN if one operand is zero and the other +// operand an infinity. The value of z is undefined in that case. +func (z *Float) Mul(x, y *Float) *Float { + if debugFloat { + x.validate() + y.validate() + } + + if z.prec == 0 { + z.prec = umax32(x.prec, y.prec) + } + + z.neg = x.neg != y.neg + + if x.form == finite && y.form == finite { + // x * y (common case) + z.umul(x, y) + return z + } + + z.acc = Exact + if x.form == zero && y.form == inf || x.form == inf && y.form == zero { + // ±0 * ±Inf + // ±Inf * ±0 + // value of z is undefined but make sure it's valid + z.form = zero + z.neg = false + panic(ErrNaN{"multiplication of zero with infinity"}) + } + + if x.form == inf || y.form == inf { + // ±Inf * y + // x * ±Inf + z.form = inf + return z + } + + // ±0 * y + // x * ±0 + z.form = zero + return z +} + +// Quo sets z to the rounded quotient x/y and returns z. +// Precision, rounding, and accuracy reporting are as for Add. +// Quo panics with ErrNaN if both operands are zero or infinities. +// The value of z is undefined in that case. +func (z *Float) Quo(x, y *Float) *Float { + if debugFloat { + x.validate() + y.validate() + } + + if z.prec == 0 { + z.prec = umax32(x.prec, y.prec) + } + + z.neg = x.neg != y.neg + + if x.form == finite && y.form == finite { + // x / y (common case) + z.uquo(x, y) + return z + } + + z.acc = Exact + if x.form == zero && y.form == zero || x.form == inf && y.form == inf { + // ±0 / ±0 + // ±Inf / ±Inf + // value of z is undefined but make sure it's valid + z.form = zero + z.neg = false + panic(ErrNaN{"division of zero by zero or infinity by infinity"}) + } + + if x.form == zero || y.form == inf { + // ±0 / y + // x / ±Inf + z.form = zero + return z + } + + // x / ±0 + // ±Inf / y + z.form = inf + return z +} + +// Cmp compares x and y and returns: +// +// -1 if x < y +// 0 if x == y (incl. -0 == 0, -Inf == -Inf, and +Inf == +Inf) +// +1 if x > y +// +func (x *Float) Cmp(y *Float) int { + if debugFloat { + x.validate() + y.validate() + } + + mx := x.ord() + my := y.ord() + switch { + case mx < my: + return -1 + case mx > my: + return +1 + } + // mx == my + + // only if |mx| == 1 we have to compare the mantissae + switch mx { + case -1: + return y.ucmp(x) + case +1: + return x.ucmp(y) + } + + return 0 +} + +// ord classifies x and returns: +// +// -2 if -Inf == x +// -1 if -Inf < x < 0 +// 0 if x == 0 (signed or unsigned) +// +1 if 0 < x < +Inf +// +2 if x == +Inf +// +func (x *Float) ord() int { + var m int + switch x.form { + case finite: + m = 1 + case zero: + return 0 + case inf: + m = 2 + } + if x.neg { + m = -m + } + return m +} + +func umax32(x, y uint32) uint32 { + if x > y { + return x + } + return y +} diff --git a/src/math/big/float_test.go b/src/math/big/float_test.go new file mode 100644 index 0000000..7d6bf03 --- /dev/null +++ b/src/math/big/float_test.go @@ -0,0 +1,1858 @@ +// Copyright 2014 The Go Authors. All rights reserved. +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. + +package big + +import ( + "flag" + "fmt" + "math" + "strconv" + "strings" + "testing" +) + +// Verify that ErrNaN implements the error interface. +var _ error = ErrNaN{} + +func (x *Float) uint64() uint64 { + u, acc := x.Uint64() + if acc != Exact { + panic(fmt.Sprintf("%s is not a uint64", x.Text('g', 10))) + } + return u +} + +func (x *Float) int64() int64 { + i, acc := x.Int64() + if acc != Exact { + panic(fmt.Sprintf("%s is not an int64", x.Text('g', 10))) + } + return i +} + +func TestFloatZeroValue(t *testing.T) { + // zero (uninitialized) value is a ready-to-use 0.0 + var x Float + if s := x.Text('f', 1); s != "0.0" { + t.Errorf("zero value = %s; want 0.0", s) + } + + // zero value has precision 0 + if prec := x.Prec(); prec != 0 { + t.Errorf("prec = %d; want 0", prec) + } + + // zero value can be used in any and all positions of binary operations + make := func(x int) *Float { + var f Float + if x != 0 { + f.SetInt64(int64(x)) + } + // x == 0 translates into the zero value + return &f + } + for _, test := range []struct { + z, x, y, want int + opname rune + op func(z, x, y *Float) *Float + }{ + {0, 0, 0, 0, '+', (*Float).Add}, + {0, 1, 2, 3, '+', (*Float).Add}, + {1, 2, 0, 2, '+', (*Float).Add}, + {2, 0, 1, 1, '+', (*Float).Add}, + + {0, 0, 0, 0, '-', (*Float).Sub}, + {0, 1, 2, -1, '-', (*Float).Sub}, + {1, 2, 0, 2, '-', (*Float).Sub}, + {2, 0, 1, -1, '-', (*Float).Sub}, + + {0, 0, 0, 0, '*', (*Float).Mul}, + {0, 1, 2, 2, '*', (*Float).Mul}, + {1, 2, 0, 0, '*', (*Float).Mul}, + {2, 0, 1, 0, '*', (*Float).Mul}, + + // {0, 0, 0, 0, '/', (*Float).Quo}, // panics + {0, 2, 1, 2, '/', (*Float).Quo}, + {1, 2, 0, 0, '/', (*Float).Quo}, // = +Inf + {2, 0, 1, 0, '/', (*Float).Quo}, + } { + z := make(test.z) + test.op(z, make(test.x), make(test.y)) + got := 0 + if !z.IsInf() { + got = int(z.int64()) + } + if got != test.want { + t.Errorf("%d %c %d = %d; want %d", test.x, test.opname, test.y, got, test.want) + } + } + + // TODO(gri) test how precision is set for zero value results +} + +func makeFloat(s string) *Float { + x, _, err := ParseFloat(s, 0, 1000, ToNearestEven) + if err != nil { + panic(err) + } + return x +} + +func TestFloatSetPrec(t *testing.T) { + for _, test := range []struct { + x string + prec uint + want string + acc Accuracy + }{ + // prec 0 + {"0", 0, "0", Exact}, + {"-0", 0, "-0", Exact}, + {"-Inf", 0, "-Inf", Exact}, + {"+Inf", 0, "+Inf", Exact}, + {"123", 0, "0", Below}, + {"-123", 0, "-0", Above}, + + // prec at upper limit + {"0", MaxPrec, "0", Exact}, + {"-0", MaxPrec, "-0", Exact}, + {"-Inf", MaxPrec, "-Inf", Exact}, + {"+Inf", MaxPrec, "+Inf", Exact}, + + // just a few regular cases - general rounding is tested elsewhere + {"1.5", 1, "2", Above}, + {"-1.5", 1, "-2", Below}, + {"123", 1e6, "123", Exact}, + {"-123", 1e6, "-123", Exact}, + } { + x := makeFloat(test.x).SetPrec(test.prec) + prec := test.prec + if prec > MaxPrec { + prec = MaxPrec + } + if got := x.Prec(); got != prec { + t.Errorf("%s.SetPrec(%d).Prec() == %d; want %d", test.x, test.prec, got, prec) + } + if got, acc := x.String(), x.Acc(); got != test.want || acc != test.acc { + t.Errorf("%s.SetPrec(%d) = %s (%s); want %s (%s)", test.x, test.prec, got, acc, test.want, test.acc) + } + } +} + +func TestFloatMinPrec(t *testing.T) { + const max = 100 + for _, test := range []struct { + x string + want uint + }{ + {"0", 0}, + {"-0", 0}, + {"+Inf", 0}, + {"-Inf", 0}, + {"1", 1}, + {"2", 1}, + {"3", 2}, + {"0x8001", 16}, + {"0x8001p-1000", 16}, + {"0x8001p+1000", 16}, + {"0.1", max}, + } { + x := makeFloat(test.x).SetPrec(max) + if got := x.MinPrec(); got != test.want { + t.Errorf("%s.MinPrec() = %d; want %d", test.x, got, test.want) + } + } +} + +func TestFloatSign(t *testing.T) { + for _, test := range []struct { + x string + s int + }{ + {"-Inf", -1}, + {"-1", -1}, + {"-0", 0}, + {"+0", 0}, + {"+1", +1}, + {"+Inf", +1}, + } { + x := makeFloat(test.x) + s := x.Sign() + if s != test.s { + t.Errorf("%s.Sign() = %d; want %d", test.x, s, test.s) + } + } +} + +// alike(x, y) is like x.Cmp(y) == 0 but also considers the sign of 0 (0 != -0). +func alike(x, y *Float) bool { + return x.Cmp(y) == 0 && x.Signbit() == y.Signbit() +} + +func alike32(x, y float32) bool { + // we can ignore NaNs + return x == y && math.Signbit(float64(x)) == math.Signbit(float64(y)) + +} + +func alike64(x, y float64) bool { + // we can ignore NaNs + return x == y && math.Signbit(x) == math.Signbit(y) + +} + +func TestFloatMantExp(t *testing.T) { + for _, test := range []struct { + x string + mant string + exp int + }{ + {"0", "0", 0}, + {"+0", "0", 0}, + {"-0", "-0", 0}, + {"Inf", "+Inf", 0}, + {"+Inf", "+Inf", 0}, + {"-Inf", "-Inf", 0}, + {"1.5", "0.75", 1}, + {"1.024e3", "0.5", 11}, + {"-0.125", "-0.5", -2}, + } { + x := makeFloat(test.x) + mant := makeFloat(test.mant) + m := new(Float) + e := x.MantExp(m) + if !alike(m, mant) || e != test.exp { + t.Errorf("%s.MantExp() = %s, %d; want %s, %d", test.x, m.Text('g', 10), e, test.mant, test.exp) + } + } +} + +func TestFloatMantExpAliasing(t *testing.T) { + x := makeFloat("0.5p10") + if e := x.MantExp(x); e != 10 { + t.Fatalf("Float.MantExp aliasing error: got %d; want 10", e) + } + if want := makeFloat("0.5"); !alike(x, want) { + t.Fatalf("Float.MantExp aliasing error: got %s; want %s", x.Text('g', 10), want.Text('g', 10)) + } +} + +func TestFloatSetMantExp(t *testing.T) { + for _, test := range []struct { + frac string + exp int + z string + }{ + {"0", 0, "0"}, + {"+0", 0, "0"}, + {"-0", 0, "-0"}, + {"Inf", 1234, "+Inf"}, + {"+Inf", -1234, "+Inf"}, + {"-Inf", -1234, "-Inf"}, + {"0", MinExp, "0"}, + {"0.25", MinExp, "+0"}, // exponent underflow + {"-0.25", MinExp, "-0"}, // exponent underflow + {"1", MaxExp, "+Inf"}, // exponent overflow + {"2", MaxExp - 1, "+Inf"}, // exponent overflow + {"0.75", 1, "1.5"}, + {"0.5", 11, "1024"}, + {"-0.5", -2, "-0.125"}, + {"32", 5, "1024"}, + {"1024", -10, "1"}, + } { + frac := makeFloat(test.frac) + want := makeFloat(test.z) + var z Float + z.SetMantExp(frac, test.exp) + if !alike(&z, want) { + t.Errorf("SetMantExp(%s, %d) = %s; want %s", test.frac, test.exp, z.Text('g', 10), test.z) + } + // test inverse property + mant := new(Float) + if z.SetMantExp(mant, want.MantExp(mant)).Cmp(want) != 0 { + t.Errorf("Inverse property not satisfied: got %s; want %s", z.Text('g', 10), test.z) + } + } +} + +func TestFloatPredicates(t *testing.T) { + for _, test := range []struct { + x string + sign int + signbit, inf bool + }{ + {x: "-Inf", sign: -1, signbit: true, inf: true}, + {x: "-1", sign: -1, signbit: true}, + {x: "-0", signbit: true}, + {x: "0"}, + {x: "1", sign: 1}, + {x: "+Inf", sign: 1, inf: true}, + } { + x := makeFloat(test.x) + if got := x.Signbit(); got != test.signbit { + t.Errorf("(%s).Signbit() = %v; want %v", test.x, got, test.signbit) + } + if got := x.Sign(); got != test.sign { + t.Errorf("(%s).Sign() = %d; want %d", test.x, got, test.sign) + } + if got := x.IsInf(); got != test.inf { + t.Errorf("(%s).IsInf() = %v; want %v", test.x, got, test.inf) + } + } +} + +func TestFloatIsInt(t *testing.T) { + for _, test := range []string{ + "0 int", + "-0 int", + "1 int", + "-1 int", + "0.5", + "1.23", + "1.23e1", + "1.23e2 int", + "0.000000001e+8", + "0.000000001e+9 int", + "1.2345e200 int", + "Inf", + "+Inf", + "-Inf", + } { + s := strings.TrimSuffix(test, " int") + want := s != test + if got := makeFloat(s).IsInt(); got != want { + t.Errorf("%s.IsInt() == %t", s, got) + } + } +} + +func fromBinary(s string) int64 { + x, err := strconv.ParseInt(s, 2, 64) + if err != nil { + panic(err) + } + return x +} + +func toBinary(x int64) string { + return strconv.FormatInt(x, 2) +} + +func testFloatRound(t *testing.T, x, r int64, prec uint, mode RoundingMode) { + // verify test data + var ok bool + switch mode { + case ToNearestEven, ToNearestAway: + ok = true // nothing to do for now + case ToZero: + if x < 0 { + ok = r >= x + } else { + ok = r <= x + } + case AwayFromZero: + if x < 0 { + ok = r <= x + } else { + ok = r >= x + } + case ToNegativeInf: + ok = r <= x + case ToPositiveInf: + ok = r >= x + default: + panic("unreachable") + } + if !ok { + t.Fatalf("incorrect test data for prec = %d, %s: x = %s, r = %s", prec, mode, toBinary(x), toBinary(r)) + } + + // compute expected accuracy + a := Exact + switch { + case r < x: + a = Below + case r > x: + a = Above + } + + // round + f := new(Float).SetMode(mode).SetInt64(x).SetPrec(prec) + + // check result + r1 := f.int64() + p1 := f.Prec() + a1 := f.Acc() + if r1 != r || p1 != prec || a1 != a { + t.Errorf("round %s (%d bits, %s) incorrect: got %s (%d bits, %s); want %s (%d bits, %s)", + toBinary(x), prec, mode, + toBinary(r1), p1, a1, + toBinary(r), prec, a) + return + } + + // g and f should be the same + // (rounding by SetPrec after SetInt64 using default precision + // should be the same as rounding by SetInt64 after setting the + // precision) + g := new(Float).SetMode(mode).SetPrec(prec).SetInt64(x) + if !alike(g, f) { + t.Errorf("round %s (%d bits, %s) not symmetric: got %s and %s; want %s", + toBinary(x), prec, mode, + toBinary(g.int64()), + toBinary(r1), + toBinary(r), + ) + return + } + + // h and f should be the same + // (repeated rounding should be idempotent) + h := new(Float).SetMode(mode).SetPrec(prec).Set(f) + if !alike(h, f) { + t.Errorf("round %s (%d bits, %s) not idempotent: got %s and %s; want %s", + toBinary(x), prec, mode, + toBinary(h.int64()), + toBinary(r1), + toBinary(r), + ) + return + } +} + +// TestFloatRound tests basic rounding. +func TestFloatRound(t *testing.T) { + for _, test := range []struct { + prec uint + x, zero, neven, naway, away string // input, results rounded to prec bits + }{ + {5, "1000", "1000", "1000", "1000", "1000"}, + {5, "1001", "1001", "1001", "1001", "1001"}, + {5, "1010", "1010", "1010", "1010", "1010"}, + {5, "1011", "1011", "1011", "1011", "1011"}, + {5, "1100", "1100", "1100", "1100", "1100"}, + {5, "1101", "1101", "1101", "1101", "1101"}, + {5, "1110", "1110", "1110", "1110", "1110"}, + {5, "1111", "1111", "1111", "1111", "1111"}, + + {4, "1000", "1000", "1000", "1000", "1000"}, + {4, "1001", "1001", "1001", "1001", "1001"}, + {4, "1010", "1010", "1010", "1010", "1010"}, + {4, "1011", "1011", "1011", "1011", "1011"}, + {4, "1100", "1100", "1100", "1100", "1100"}, + {4, "1101", "1101", "1101", "1101", "1101"}, + {4, "1110", "1110", "1110", "1110", "1110"}, + {4, "1111", "1111", "1111", "1111", "1111"}, + + {3, "1000", "1000", "1000", "1000", "1000"}, + {3, "1001", "1000", "1000", "1010", "1010"}, + {3, "1010", "1010", "1010", "1010", "1010"}, + {3, "1011", "1010", "1100", "1100", "1100"}, + {3, "1100", "1100", "1100", "1100", "1100"}, + {3, "1101", "1100", "1100", "1110", "1110"}, + {3, "1110", "1110", "1110", "1110", "1110"}, + {3, "1111", "1110", "10000", "10000", "10000"}, + + {3, "1000001", "1000000", "1000000", "1000000", "1010000"}, + {3, "1001001", "1000000", "1010000", "1010000", "1010000"}, + {3, "1010001", "1010000", "1010000", "1010000", "1100000"}, + {3, "1011001", "1010000", "1100000", "1100000", "1100000"}, + {3, "1100001", "1100000", "1100000", "1100000", "1110000"}, + {3, "1101001", "1100000", "1110000", "1110000", "1110000"}, + {3, "1110001", "1110000", "1110000", "1110000", "10000000"}, + {3, "1111001", "1110000", "10000000", "10000000", "10000000"}, + + {2, "1000", "1000", "1000", "1000", "1000"}, + {2, "1001", "1000", "1000", "1000", "1100"}, + {2, "1010", "1000", "1000", "1100", "1100"}, + {2, "1011", "1000", "1100", "1100", "1100"}, + {2, "1100", "1100", "1100", "1100", "1100"}, + {2, "1101", "1100", "1100", "1100", "10000"}, + {2, "1110", "1100", "10000", "10000", "10000"}, + {2, "1111", "1100", "10000", "10000", "10000"}, + + {2, "1000001", "1000000", "1000000", "1000000", "1100000"}, + {2, "1001001", "1000000", "1000000", "1000000", "1100000"}, + {2, "1010001", "1000000", "1100000", "1100000", "1100000"}, + {2, "1011001", "1000000", "1100000", "1100000", "1100000"}, + {2, "1100001", "1100000", "1100000", "1100000", "10000000"}, + {2, "1101001", "1100000", "1100000", "1100000", "10000000"}, + {2, "1110001", "1100000", "10000000", "10000000", "10000000"}, + {2, "1111001", "1100000", "10000000", "10000000", "10000000"}, + + {1, "1000", "1000", "1000", "1000", "1000"}, + {1, "1001", "1000", "1000", "1000", "10000"}, + {1, "1010", "1000", "1000", "1000", "10000"}, + {1, "1011", "1000", "1000", "1000", "10000"}, + {1, "1100", "1000", "10000", "10000", "10000"}, + {1, "1101", "1000", "10000", "10000", "10000"}, + {1, "1110", "1000", "10000", "10000", "10000"}, + {1, "1111", "1000", "10000", "10000", "10000"}, + + {1, "1000001", "1000000", "1000000", "1000000", "10000000"}, + {1, "1001001", "1000000", "1000000", "1000000", "10000000"}, + {1, "1010001", "1000000", "1000000", "1000000", "10000000"}, + {1, "1011001", "1000000", "1000000", "1000000", "10000000"}, + {1, "1100001", "1000000", "10000000", "10000000", "10000000"}, + {1, "1101001", "1000000", "10000000", "10000000", "10000000"}, + {1, "1110001", "1000000", "10000000", "10000000", "10000000"}, + {1, "1111001", "1000000", "10000000", "10000000", "10000000"}, + } { + x := fromBinary(test.x) + z := fromBinary(test.zero) + e := fromBinary(test.neven) + n := fromBinary(test.naway) + a := fromBinary(test.away) + prec := test.prec + + testFloatRound(t, x, z, prec, ToZero) + testFloatRound(t, x, e, prec, ToNearestEven) + testFloatRound(t, x, n, prec, ToNearestAway) + testFloatRound(t, x, a, prec, AwayFromZero) + + testFloatRound(t, x, z, prec, ToNegativeInf) + testFloatRound(t, x, a, prec, ToPositiveInf) + + testFloatRound(t, -x, -a, prec, ToNegativeInf) + testFloatRound(t, -x, -z, prec, ToPositiveInf) + } +} + +// TestFloatRound24 tests that rounding a float64 to 24 bits +// matches IEEE-754 rounding to nearest when converting a +// float64 to a float32 (excluding denormal numbers). +func TestFloatRound24(t *testing.T) { + const x0 = 1<<26 - 0x10 // 11...110000 (26 bits) + for d := 0; d <= 0x10; d++ { + x := float64(x0 + d) + f := new(Float).SetPrec(24).SetFloat64(x) + got, _ := f.Float32() + want := float32(x) + if got != want { + t.Errorf("Round(%g, 24) = %g; want %g", x, got, want) + } + } +} + +func TestFloatSetUint64(t *testing.T) { + for _, want := range []uint64{ + 0, + 1, + 2, + 10, + 100, + 1<<32 - 1, + 1 << 32, + 1<<64 - 1, + } { + var f Float + f.SetUint64(want) + if got := f.uint64(); got != want { + t.Errorf("got %#x (%s); want %#x", got, f.Text('p', 0), want) + } + } + + // test basic rounding behavior (exhaustive rounding testing is done elsewhere) + const x uint64 = 0x8765432187654321 // 64 bits needed + for prec := uint(1); prec <= 64; prec++ { + f := new(Float).SetPrec(prec).SetMode(ToZero).SetUint64(x) + got := f.uint64() + want := x &^ (1<<(64-prec) - 1) // cut off (round to zero) low 64-prec bits + if got != want { + t.Errorf("got %#x (%s); want %#x", got, f.Text('p', 0), want) + } + } +} + +func TestFloatSetInt64(t *testing.T) { + for _, want := range []int64{ + 0, + 1, + 2, + 10, + 100, + 1<<32 - 1, + 1 << 32, + 1<<63 - 1, + } { + for i := range [2]int{} { + if i&1 != 0 { + want = -want + } + var f Float + f.SetInt64(want) + if got := f.int64(); got != want { + t.Errorf("got %#x (%s); want %#x", got, f.Text('p', 0), want) + } + } + } + + // test basic rounding behavior (exhaustive rounding testing is done elsewhere) + const x int64 = 0x7654321076543210 // 63 bits needed + for prec := uint(1); prec <= 63; prec++ { + f := new(Float).SetPrec(prec).SetMode(ToZero).SetInt64(x) + got := f.int64() + want := x &^ (1<<(63-prec) - 1) // cut off (round to zero) low 63-prec bits + if got != want { + t.Errorf("got %#x (%s); want %#x", got, f.Text('p', 0), want) + } + } +} + +func TestFloatSetFloat64(t *testing.T) { + for _, want := range []float64{ + 0, + 1, + 2, + 12345, + 1e10, + 1e100, + 3.14159265e10, + 2.718281828e-123, + 1.0 / 3, + math.MaxFloat32, + math.MaxFloat64, + math.SmallestNonzeroFloat32, + math.SmallestNonzeroFloat64, + math.Inf(-1), + math.Inf(0), + -math.Inf(1), + } { + for i := range [2]int{} { + if i&1 != 0 { + want = -want + } + var f Float + f.SetFloat64(want) + if got, acc := f.Float64(); got != want || acc != Exact { + t.Errorf("got %g (%s, %s); want %g (Exact)", got, f.Text('p', 0), acc, want) + } + } + } + + // test basic rounding behavior (exhaustive rounding testing is done elsewhere) + const x uint64 = 0x8765432143218 // 53 bits needed + for prec := uint(1); prec <= 52; prec++ { + f := new(Float).SetPrec(prec).SetMode(ToZero).SetFloat64(float64(x)) + got, _ := f.Float64() + want := float64(x &^ (1<<(52-prec) - 1)) // cut off (round to zero) low 53-prec bits + if got != want { + t.Errorf("got %g (%s); want %g", got, f.Text('p', 0), want) + } + } + + // test NaN + defer func() { + if p, ok := recover().(ErrNaN); !ok { + t.Errorf("got %v; want ErrNaN panic", p) + } + }() + var f Float + f.SetFloat64(math.NaN()) + // should not reach here + t.Errorf("got %s; want ErrNaN panic", f.Text('p', 0)) +} + +func TestFloatSetInt(t *testing.T) { + for _, want := range []string{ + "0", + "1", + "-1", + "1234567890", + "123456789012345678901234567890", + "123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890", + } { + var x Int + _, ok := x.SetString(want, 0) + if !ok { + t.Errorf("invalid integer %s", want) + continue + } + n := x.BitLen() + + var f Float + f.SetInt(&x) + + // check precision + if n < 64 { + n = 64 + } + if prec := f.Prec(); prec != uint(n) { + t.Errorf("got prec = %d; want %d", prec, n) + } + + // check value + got := f.Text('g', 100) + if got != want { + t.Errorf("got %s (%s); want %s", got, f.Text('p', 0), want) + } + } + + // TODO(gri) test basic rounding behavior +} + +func TestFloatSetRat(t *testing.T) { + for _, want := range []string{ + "0", + "1", + "-1", + "1234567890", + "123456789012345678901234567890", + "123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890", + "1.2", + "3.14159265", + // TODO(gri) expand + } { + var x Rat + _, ok := x.SetString(want) + if !ok { + t.Errorf("invalid fraction %s", want) + continue + } + n := max(x.Num().BitLen(), x.Denom().BitLen()) + + var f1, f2 Float + f2.SetPrec(1000) + f1.SetRat(&x) + f2.SetRat(&x) + + // check precision when set automatically + if n < 64 { + n = 64 + } + if prec := f1.Prec(); prec != uint(n) { + t.Errorf("got prec = %d; want %d", prec, n) + } + + got := f2.Text('g', 100) + if got != want { + t.Errorf("got %s (%s); want %s", got, f2.Text('p', 0), want) + } + } +} + +func TestFloatSetInf(t *testing.T) { + var f Float + for _, test := range []struct { + signbit bool + prec uint + want string + }{ + {false, 0, "+Inf"}, + {true, 0, "-Inf"}, + {false, 10, "+Inf"}, + {true, 30, "-Inf"}, + } { + x := f.SetPrec(test.prec).SetInf(test.signbit) + if got := x.String(); got != test.want || x.Prec() != test.prec { + t.Errorf("SetInf(%v) = %s (prec = %d); want %s (prec = %d)", test.signbit, got, x.Prec(), test.want, test.prec) + } + } +} + +func TestFloatUint64(t *testing.T) { + for _, test := range []struct { + x string + out uint64 + acc Accuracy + }{ + {"-Inf", 0, Above}, + {"-1", 0, Above}, + {"-1e-1000", 0, Above}, + {"-0", 0, Exact}, + {"0", 0, Exact}, + {"1e-1000", 0, Below}, + {"1", 1, Exact}, + {"1.000000000000000000001", 1, Below}, + {"12345.0", 12345, Exact}, + {"12345.000000000000000000001", 12345, Below}, + {"18446744073709551615", 18446744073709551615, Exact}, + {"18446744073709551615.000000000000000000001", math.MaxUint64, Below}, + {"18446744073709551616", math.MaxUint64, Below}, + {"1e10000", math.MaxUint64, Below}, + {"+Inf", math.MaxUint64, Below}, + } { + x := makeFloat(test.x) + out, acc := x.Uint64() + if out != test.out || acc != test.acc { + t.Errorf("%s: got %d (%s); want %d (%s)", test.x, out, acc, test.out, test.acc) + } + } +} + +func TestFloatInt64(t *testing.T) { + for _, test := range []struct { + x string + out int64 + acc Accuracy + }{ + {"-Inf", math.MinInt64, Above}, + {"-1e10000", math.MinInt64, Above}, + {"-9223372036854775809", math.MinInt64, Above}, + {"-9223372036854775808.000000000000000000001", math.MinInt64, Above}, + {"-9223372036854775808", -9223372036854775808, Exact}, + {"-9223372036854775807.000000000000000000001", -9223372036854775807, Above}, + {"-9223372036854775807", -9223372036854775807, Exact}, + {"-12345.000000000000000000001", -12345, Above}, + {"-12345.0", -12345, Exact}, + {"-1.000000000000000000001", -1, Above}, + {"-1.5", -1, Above}, + {"-1", -1, Exact}, + {"-1e-1000", 0, Above}, + {"0", 0, Exact}, + {"1e-1000", 0, Below}, + {"1", 1, Exact}, + {"1.000000000000000000001", 1, Below}, + {"1.5", 1, Below}, + {"12345.0", 12345, Exact}, + {"12345.000000000000000000001", 12345, Below}, + {"9223372036854775807", 9223372036854775807, Exact}, + {"9223372036854775807.000000000000000000001", math.MaxInt64, Below}, + {"9223372036854775808", math.MaxInt64, Below}, + {"1e10000", math.MaxInt64, Below}, + {"+Inf", math.MaxInt64, Below}, + } { + x := makeFloat(test.x) + out, acc := x.Int64() + if out != test.out || acc != test.acc { + t.Errorf("%s: got %d (%s); want %d (%s)", test.x, out, acc, test.out, test.acc) + } + } +} + +func TestFloatFloat32(t *testing.T) { + for _, test := range []struct { + x string + out float32 + acc Accuracy + }{ + {"0", 0, Exact}, + + // underflow to zero + {"1e-1000", 0, Below}, + {"0x0.000002p-127", 0, Below}, + {"0x.0000010p-126", 0, Below}, + + // denormals + {"1.401298464e-45", math.SmallestNonzeroFloat32, Above}, // rounded up to smallest denormal + {"0x.ffffff8p-149", math.SmallestNonzeroFloat32, Above}, // rounded up to smallest denormal + {"0x.0000018p-126", math.SmallestNonzeroFloat32, Above}, // rounded up to smallest denormal + {"0x.0000020p-126", math.SmallestNonzeroFloat32, Exact}, + {"0x.8p-148", math.SmallestNonzeroFloat32, Exact}, + {"1p-149", math.SmallestNonzeroFloat32, Exact}, + {"0x.fffffep-126", math.Float32frombits(0x7fffff), Exact}, // largest denormal + + // special denormal cases (see issues 14553, 14651) + {"0x0.0000001p-126", math.Float32frombits(0x00000000), Below}, // underflow to zero + {"0x0.0000008p-126", math.Float32frombits(0x00000000), Below}, // underflow to zero + {"0x0.0000010p-126", math.Float32frombits(0x00000000), Below}, // rounded down to even + {"0x0.0000011p-126", math.Float32frombits(0x00000001), Above}, // rounded up to smallest denormal + {"0x0.0000018p-126", math.Float32frombits(0x00000001), Above}, // rounded up to smallest denormal + + {"0x1.0000000p-149", math.Float32frombits(0x00000001), Exact}, // smallest denormal + {"0x0.0000020p-126", math.Float32frombits(0x00000001), Exact}, // smallest denormal + {"0x0.fffffe0p-126", math.Float32frombits(0x007fffff), Exact}, // largest denormal + {"0x1.0000000p-126", math.Float32frombits(0x00800000), Exact}, // smallest normal + + {"0x0.8p-149", math.Float32frombits(0x000000000), Below}, // rounded down to even + {"0x0.9p-149", math.Float32frombits(0x000000001), Above}, // rounded up to smallest denormal + {"0x0.ap-149", math.Float32frombits(0x000000001), Above}, // rounded up to smallest denormal + {"0x0.bp-149", math.Float32frombits(0x000000001), Above}, // rounded up to smallest denormal + {"0x0.cp-149", math.Float32frombits(0x000000001), Above}, // rounded up to smallest denormal + + {"0x1.0p-149", math.Float32frombits(0x000000001), Exact}, // smallest denormal + {"0x1.7p-149", math.Float32frombits(0x000000001), Below}, + {"0x1.8p-149", math.Float32frombits(0x000000002), Above}, + {"0x1.9p-149", math.Float32frombits(0x000000002), Above}, + + {"0x2.0p-149", math.Float32frombits(0x000000002), Exact}, + {"0x2.8p-149", math.Float32frombits(0x000000002), Below}, // rounded down to even + {"0x2.9p-149", math.Float32frombits(0x000000003), Above}, + + {"0x3.0p-149", math.Float32frombits(0x000000003), Exact}, + {"0x3.7p-149", math.Float32frombits(0x000000003), Below}, + {"0x3.8p-149", math.Float32frombits(0x000000004), Above}, // rounded up to even + + {"0x4.0p-149", math.Float32frombits(0x000000004), Exact}, + {"0x4.8p-149", math.Float32frombits(0x000000004), Below}, // rounded down to even + {"0x4.9p-149", math.Float32frombits(0x000000005), Above}, + + // specific case from issue 14553 + {"0x7.7p-149", math.Float32frombits(0x000000007), Below}, + {"0x7.8p-149", math.Float32frombits(0x000000008), Above}, + {"0x7.9p-149", math.Float32frombits(0x000000008), Above}, + + // normals + {"0x.ffffffp-126", math.Float32frombits(0x00800000), Above}, // rounded up to smallest normal + {"1p-126", math.Float32frombits(0x00800000), Exact}, // smallest normal + {"0x1.fffffep-126", math.Float32frombits(0x00ffffff), Exact}, + {"0x1.ffffffp-126", math.Float32frombits(0x01000000), Above}, // rounded up + {"1", 1, Exact}, + {"1.000000000000000000001", 1, Below}, + {"12345.0", 12345, Exact}, + {"12345.000000000000000000001", 12345, Below}, + {"0x1.fffffe0p127", math.MaxFloat32, Exact}, + {"0x1.fffffe8p127", math.MaxFloat32, Below}, + + // overflow + {"0x1.ffffff0p127", float32(math.Inf(+1)), Above}, + {"0x1p128", float32(math.Inf(+1)), Above}, + {"1e10000", float32(math.Inf(+1)), Above}, + {"0x1.ffffff0p2147483646", float32(math.Inf(+1)), Above}, // overflow in rounding + + // inf + {"Inf", float32(math.Inf(+1)), Exact}, + } { + for i := 0; i < 2; i++ { + // test both signs + tx, tout, tacc := test.x, test.out, test.acc + if i != 0 { + tx = "-" + tx + tout = -tout + tacc = -tacc + } + + // conversion should match strconv where syntax is agreeable + if f, err := strconv.ParseFloat(tx, 32); err == nil && !alike32(float32(f), tout) { + t.Errorf("%s: got %g; want %g (incorrect test data)", tx, f, tout) + } + + x := makeFloat(tx) + out, acc := x.Float32() + if !alike32(out, tout) || acc != tacc { + t.Errorf("%s: got %g (%#08x, %s); want %g (%#08x, %s)", tx, out, math.Float32bits(out), acc, test.out, math.Float32bits(test.out), tacc) + } + + // test that x.SetFloat64(float64(f)).Float32() == f + var x2 Float + out2, acc2 := x2.SetFloat64(float64(out)).Float32() + if !alike32(out2, out) || acc2 != Exact { + t.Errorf("idempotency test: got %g (%s); want %g (Exact)", out2, acc2, out) + } + } + } +} + +func TestFloatFloat64(t *testing.T) { + const smallestNormalFloat64 = 2.2250738585072014e-308 // 1p-1022 + for _, test := range []struct { + x string + out float64 + acc Accuracy + }{ + {"0", 0, Exact}, + + // underflow to zero + {"1e-1000", 0, Below}, + {"0x0.0000000000001p-1023", 0, Below}, + {"0x0.00000000000008p-1022", 0, Below}, + + // denormals + {"0x0.0000000000000cp-1022", math.SmallestNonzeroFloat64, Above}, // rounded up to smallest denormal + {"0x0.00000000000010p-1022", math.SmallestNonzeroFloat64, Exact}, // smallest denormal + {"0x.8p-1073", math.SmallestNonzeroFloat64, Exact}, + {"1p-1074", math.SmallestNonzeroFloat64, Exact}, + {"0x.fffffffffffffp-1022", math.Float64frombits(0x000fffffffffffff), Exact}, // largest denormal + + // special denormal cases (see issues 14553, 14651) + {"0x0.00000000000001p-1022", math.Float64frombits(0x00000000000000000), Below}, // underflow to zero + {"0x0.00000000000004p-1022", math.Float64frombits(0x00000000000000000), Below}, // underflow to zero + {"0x0.00000000000008p-1022", math.Float64frombits(0x00000000000000000), Below}, // rounded down to even + {"0x0.00000000000009p-1022", math.Float64frombits(0x00000000000000001), Above}, // rounded up to smallest denormal + {"0x0.0000000000000ap-1022", math.Float64frombits(0x00000000000000001), Above}, // rounded up to smallest denormal + + {"0x0.8p-1074", math.Float64frombits(0x00000000000000000), Below}, // rounded down to even + {"0x0.9p-1074", math.Float64frombits(0x00000000000000001), Above}, // rounded up to smallest denormal + {"0x0.ap-1074", math.Float64frombits(0x00000000000000001), Above}, // rounded up to smallest denormal + {"0x0.bp-1074", math.Float64frombits(0x00000000000000001), Above}, // rounded up to smallest denormal + {"0x0.cp-1074", math.Float64frombits(0x00000000000000001), Above}, // rounded up to smallest denormal + + {"0x1.0p-1074", math.Float64frombits(0x00000000000000001), Exact}, + {"0x1.7p-1074", math.Float64frombits(0x00000000000000001), Below}, + {"0x1.8p-1074", math.Float64frombits(0x00000000000000002), Above}, + {"0x1.9p-1074", math.Float64frombits(0x00000000000000002), Above}, + + {"0x2.0p-1074", math.Float64frombits(0x00000000000000002), Exact}, + {"0x2.8p-1074", math.Float64frombits(0x00000000000000002), Below}, // rounded down to even + {"0x2.9p-1074", math.Float64frombits(0x00000000000000003), Above}, + + {"0x3.0p-1074", math.Float64frombits(0x00000000000000003), Exact}, + {"0x3.7p-1074", math.Float64frombits(0x00000000000000003), Below}, + {"0x3.8p-1074", math.Float64frombits(0x00000000000000004), Above}, // rounded up to even + + {"0x4.0p-1074", math.Float64frombits(0x00000000000000004), Exact}, + {"0x4.8p-1074", math.Float64frombits(0x00000000000000004), Below}, // rounded down to even + {"0x4.9p-1074", math.Float64frombits(0x00000000000000005), Above}, + + // normals + {"0x.fffffffffffff8p-1022", math.Float64frombits(0x0010000000000000), Above}, // rounded up to smallest normal + {"1p-1022", math.Float64frombits(0x0010000000000000), Exact}, // smallest normal + {"1", 1, Exact}, + {"1.000000000000000000001", 1, Below}, + {"12345.0", 12345, Exact}, + {"12345.000000000000000000001", 12345, Below}, + {"0x1.fffffffffffff0p1023", math.MaxFloat64, Exact}, + {"0x1.fffffffffffff4p1023", math.MaxFloat64, Below}, + + // overflow + {"0x1.fffffffffffff8p1023", math.Inf(+1), Above}, + {"0x1p1024", math.Inf(+1), Above}, + {"1e10000", math.Inf(+1), Above}, + {"0x1.fffffffffffff8p2147483646", math.Inf(+1), Above}, // overflow in rounding + {"Inf", math.Inf(+1), Exact}, + + // selected denormalized values that were handled incorrectly in the past + {"0x.fffffffffffffp-1022", smallestNormalFloat64 - math.SmallestNonzeroFloat64, Exact}, + {"4503599627370495p-1074", smallestNormalFloat64 - math.SmallestNonzeroFloat64, Exact}, + + // https://www.exploringbinary.com/php-hangs-on-numeric-value-2-2250738585072011e-308/ + {"2.2250738585072011e-308", 2.225073858507201e-308, Below}, + // https://www.exploringbinary.com/java-hangs-when-converting-2-2250738585072012e-308/ + {"2.2250738585072012e-308", 2.2250738585072014e-308, Above}, + } { + for i := 0; i < 2; i++ { + // test both signs + tx, tout, tacc := test.x, test.out, test.acc + if i != 0 { + tx = "-" + tx + tout = -tout + tacc = -tacc + } + + // conversion should match strconv where syntax is agreeable + if f, err := strconv.ParseFloat(tx, 64); err == nil && !alike64(f, tout) { + t.Errorf("%s: got %g; want %g (incorrect test data)", tx, f, tout) + } + + x := makeFloat(tx) + out, acc := x.Float64() + if !alike64(out, tout) || acc != tacc { + t.Errorf("%s: got %g (%#016x, %s); want %g (%#016x, %s)", tx, out, math.Float64bits(out), acc, test.out, math.Float64bits(test.out), tacc) + } + + // test that x.SetFloat64(f).Float64() == f + var x2 Float + out2, acc2 := x2.SetFloat64(out).Float64() + if !alike64(out2, out) || acc2 != Exact { + t.Errorf("idempotency test: got %g (%s); want %g (Exact)", out2, acc2, out) + } + } + } +} + +func TestFloatInt(t *testing.T) { + for _, test := range []struct { + x string + want string + acc Accuracy + }{ + {"0", "0", Exact}, + {"+0", "0", Exact}, + {"-0", "0", Exact}, + {"Inf", "nil", Below}, + {"+Inf", "nil", Below}, + {"-Inf", "nil", Above}, + {"1", "1", Exact}, + {"-1", "-1", Exact}, + {"1.23", "1", Below}, + {"-1.23", "-1", Above}, + {"123e-2", "1", Below}, + {"123e-3", "0", Below}, + {"123e-4", "0", Below}, + {"1e-1000", "0", Below}, + {"-1e-1000", "0", Above}, + {"1e+10", "10000000000", Exact}, + {"1e+100", "10000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000", Exact}, + } { + x := makeFloat(test.x) + res, acc := x.Int(nil) + got := "nil" + if res != nil { + got = res.String() + } + if got != test.want || acc != test.acc { + t.Errorf("%s: got %s (%s); want %s (%s)", test.x, got, acc, test.want, test.acc) + } + } + + // check that supplied *Int is used + for _, f := range []string{"0", "1", "-1", "1234"} { + x := makeFloat(f) + i := new(Int) + if res, _ := x.Int(i); res != i { + t.Errorf("(%s).Int is not using supplied *Int", f) + } + } +} + +func TestFloatRat(t *testing.T) { + for _, test := range []struct { + x, want string + acc Accuracy + }{ + {"0", "0/1", Exact}, + {"+0", "0/1", Exact}, + {"-0", "0/1", Exact}, + {"Inf", "nil", Below}, + {"+Inf", "nil", Below}, + {"-Inf", "nil", Above}, + {"1", "1/1", Exact}, + {"-1", "-1/1", Exact}, + {"1.25", "5/4", Exact}, + {"-1.25", "-5/4", Exact}, + {"1e10", "10000000000/1", Exact}, + {"1p10", "1024/1", Exact}, + {"-1p-10", "-1/1024", Exact}, + {"3.14159265", "7244019449799623199/2305843009213693952", Exact}, + } { + x := makeFloat(test.x).SetPrec(64) + res, acc := x.Rat(nil) + got := "nil" + if res != nil { + got = res.String() + } + if got != test.want { + t.Errorf("%s: got %s; want %s", test.x, got, test.want) + continue + } + if acc != test.acc { + t.Errorf("%s: got %s; want %s", test.x, acc, test.acc) + continue + } + + // inverse conversion + if res != nil { + got := new(Float).SetPrec(64).SetRat(res) + if got.Cmp(x) != 0 { + t.Errorf("%s: got %s; want %s", test.x, got, x) + } + } + } + + // check that supplied *Rat is used + for _, f := range []string{"0", "1", "-1", "1234"} { + x := makeFloat(f) + r := new(Rat) + if res, _ := x.Rat(r); res != r { + t.Errorf("(%s).Rat is not using supplied *Rat", f) + } + } +} + +func TestFloatAbs(t *testing.T) { + for _, test := range []string{ + "0", + "1", + "1234", + "1.23e-2", + "1e-1000", + "1e1000", + "Inf", + } { + p := makeFloat(test) + a := new(Float).Abs(p) + if !alike(a, p) { + t.Errorf("%s: got %s; want %s", test, a.Text('g', 10), test) + } + + n := makeFloat("-" + test) + a.Abs(n) + if !alike(a, p) { + t.Errorf("-%s: got %s; want %s", test, a.Text('g', 10), test) + } + } +} + +func TestFloatNeg(t *testing.T) { + for _, test := range []string{ + "0", + "1", + "1234", + "1.23e-2", + "1e-1000", + "1e1000", + "Inf", + } { + p1 := makeFloat(test) + n1 := makeFloat("-" + test) + n2 := new(Float).Neg(p1) + p2 := new(Float).Neg(n2) + if !alike(n2, n1) { + t.Errorf("%s: got %s; want %s", test, n2.Text('g', 10), n1.Text('g', 10)) + } + if !alike(p2, p1) { + t.Errorf("%s: got %s; want %s", test, p2.Text('g', 10), p1.Text('g', 10)) + } + } +} + +func TestFloatInc(t *testing.T) { + const n = 10 + for _, prec := range precList { + if 1<<prec < n { + continue // prec must be large enough to hold all numbers from 0 to n + } + var x, one Float + x.SetPrec(prec) + one.SetInt64(1) + for i := 0; i < n; i++ { + x.Add(&x, &one) + } + if x.Cmp(new(Float).SetInt64(n)) != 0 { + t.Errorf("prec = %d: got %s; want %d", prec, &x, n) + } + } +} + +// Selected precisions with which to run various tests. +var precList = [...]uint{1, 2, 5, 8, 10, 16, 23, 24, 32, 50, 53, 64, 100, 128, 500, 511, 512, 513, 1000, 10000} + +// Selected bits with which to run various tests. +// Each entry is a list of bits representing a floating-point number (see fromBits). +var bitsList = [...]Bits{ + {}, // = 0 + {0}, // = 1 + {1}, // = 2 + {-1}, // = 1/2 + {10}, // = 2**10 == 1024 + {-10}, // = 2**-10 == 1/1024 + {100, 10, 1}, // = 2**100 + 2**10 + 2**1 + {0, -1, -2, -10}, + // TODO(gri) add more test cases +} + +// TestFloatAdd tests Float.Add/Sub by comparing the result of a "manual" +// addition/subtraction of arguments represented by Bits values with the +// respective Float addition/subtraction for a variety of precisions +// and rounding modes. +func TestFloatAdd(t *testing.T) { + for _, xbits := range bitsList { + for _, ybits := range bitsList { + // exact values + x := xbits.Float() + y := ybits.Float() + zbits := xbits.add(ybits) + z := zbits.Float() + + for i, mode := range [...]RoundingMode{ToZero, ToNearestEven, AwayFromZero} { + for _, prec := range precList { + got := new(Float).SetPrec(prec).SetMode(mode) + got.Add(x, y) + want := zbits.round(prec, mode) + if got.Cmp(want) != 0 { + t.Errorf("i = %d, prec = %d, %s:\n\t %s %v\n\t+ %s %v\n\t= %s\n\twant %s", + i, prec, mode, x, xbits, y, ybits, got, want) + } + + got.Sub(z, x) + want = ybits.round(prec, mode) + if got.Cmp(want) != 0 { + t.Errorf("i = %d, prec = %d, %s:\n\t %s %v\n\t- %s %v\n\t= %s\n\twant %s", + i, prec, mode, z, zbits, x, xbits, got, want) + } + } + } + } + } +} + +// TestFloatAddRoundZero tests Float.Add/Sub rounding when the result is exactly zero. +// x + (-x) or x - x for non-zero x should be +0 in all cases except when +// the rounding mode is ToNegativeInf in which case it should be -0. +func TestFloatAddRoundZero(t *testing.T) { + for _, mode := range [...]RoundingMode{ToNearestEven, ToNearestAway, ToZero, AwayFromZero, ToPositiveInf, ToNegativeInf} { + x := NewFloat(5.0) + y := new(Float).Neg(x) + want := NewFloat(0.0) + if mode == ToNegativeInf { + want.Neg(want) + } + got := new(Float).SetMode(mode) + got.Add(x, y) + if got.Cmp(want) != 0 || got.neg != (mode == ToNegativeInf) { + t.Errorf("%s:\n\t %v\n\t+ %v\n\t= %v\n\twant %v", + mode, x, y, got, want) + } + got.Sub(x, x) + if got.Cmp(want) != 0 || got.neg != (mode == ToNegativeInf) { + t.Errorf("%v:\n\t %v\n\t- %v\n\t= %v\n\twant %v", + mode, x, x, got, want) + } + } +} + +// TestFloatAdd32 tests that Float.Add/Sub of numbers with +// 24bit mantissa behaves like float32 addition/subtraction +// (excluding denormal numbers). +func TestFloatAdd32(t *testing.T) { + // chose base such that we cross the mantissa precision limit + const base = 1<<26 - 0x10 // 11...110000 (26 bits) + for d := 0; d <= 0x10; d++ { + for i := range [2]int{} { + x0, y0 := float64(base), float64(d) + if i&1 != 0 { + x0, y0 = y0, x0 + } + + x := NewFloat(x0) + y := NewFloat(y0) + z := new(Float).SetPrec(24) + + z.Add(x, y) + got, acc := z.Float32() + want := float32(y0) + float32(x0) + if got != want || acc != Exact { + t.Errorf("d = %d: %g + %g = %g (%s); want %g (Exact)", d, x0, y0, got, acc, want) + } + + z.Sub(z, y) + got, acc = z.Float32() + want = float32(want) - float32(y0) + if got != want || acc != Exact { + t.Errorf("d = %d: %g - %g = %g (%s); want %g (Exact)", d, x0+y0, y0, got, acc, want) + } + } + } +} + +// TestFloatAdd64 tests that Float.Add/Sub of numbers with +// 53bit mantissa behaves like float64 addition/subtraction. +func TestFloatAdd64(t *testing.T) { + // chose base such that we cross the mantissa precision limit + const base = 1<<55 - 0x10 // 11...110000 (55 bits) + for d := 0; d <= 0x10; d++ { + for i := range [2]int{} { + x0, y0 := float64(base), float64(d) + if i&1 != 0 { + x0, y0 = y0, x0 + } + + x := NewFloat(x0) + y := NewFloat(y0) + z := new(Float).SetPrec(53) + + z.Add(x, y) + got, acc := z.Float64() + want := x0 + y0 + if got != want || acc != Exact { + t.Errorf("d = %d: %g + %g = %g (%s); want %g (Exact)", d, x0, y0, got, acc, want) + } + + z.Sub(z, y) + got, acc = z.Float64() + want -= y0 + if got != want || acc != Exact { + t.Errorf("d = %d: %g - %g = %g (%s); want %g (Exact)", d, x0+y0, y0, got, acc, want) + } + } + } +} + +func TestIssue20490(t *testing.T) { + var tests = []struct { + a, b float64 + }{ + {4, 1}, + {-4, 1}, + {4, -1}, + {-4, -1}, + } + + for _, test := range tests { + a, b := NewFloat(test.a), NewFloat(test.b) + diff := new(Float).Sub(a, b) + b.Sub(a, b) + if b.Cmp(diff) != 0 { + t.Errorf("got %g - %g = %g; want %g\n", a, NewFloat(test.b), b, diff) + } + + b = NewFloat(test.b) + sum := new(Float).Add(a, b) + b.Add(a, b) + if b.Cmp(sum) != 0 { + t.Errorf("got %g + %g = %g; want %g\n", a, NewFloat(test.b), b, sum) + } + + } +} + +// TestFloatMul tests Float.Mul/Quo by comparing the result of a "manual" +// multiplication/division of arguments represented by Bits values with the +// respective Float multiplication/division for a variety of precisions +// and rounding modes. +func TestFloatMul(t *testing.T) { + for _, xbits := range bitsList { + for _, ybits := range bitsList { + // exact values + x := xbits.Float() + y := ybits.Float() + zbits := xbits.mul(ybits) + z := zbits.Float() + + for i, mode := range [...]RoundingMode{ToZero, ToNearestEven, AwayFromZero} { + for _, prec := range precList { + got := new(Float).SetPrec(prec).SetMode(mode) + got.Mul(x, y) + want := zbits.round(prec, mode) + if got.Cmp(want) != 0 { + t.Errorf("i = %d, prec = %d, %s:\n\t %v %v\n\t* %v %v\n\t= %v\n\twant %v", + i, prec, mode, x, xbits, y, ybits, got, want) + } + + if x.Sign() == 0 { + continue // ignore div-0 case (not invertable) + } + got.Quo(z, x) + want = ybits.round(prec, mode) + if got.Cmp(want) != 0 { + t.Errorf("i = %d, prec = %d, %s:\n\t %v %v\n\t/ %v %v\n\t= %v\n\twant %v", + i, prec, mode, z, zbits, x, xbits, got, want) + } + } + } + } + } +} + +// TestFloatMul64 tests that Float.Mul/Quo of numbers with +// 53bit mantissa behaves like float64 multiplication/division. +func TestFloatMul64(t *testing.T) { + for _, test := range []struct { + x, y float64 + }{ + {0, 0}, + {0, 1}, + {1, 1}, + {1, 1.5}, + {1.234, 0.5678}, + {2.718281828, 3.14159265358979}, + {2.718281828e10, 3.14159265358979e-32}, + {1.0 / 3, 1e200}, + } { + for i := range [8]int{} { + x0, y0 := test.x, test.y + if i&1 != 0 { + x0 = -x0 + } + if i&2 != 0 { + y0 = -y0 + } + if i&4 != 0 { + x0, y0 = y0, x0 + } + + x := NewFloat(x0) + y := NewFloat(y0) + z := new(Float).SetPrec(53) + + z.Mul(x, y) + got, _ := z.Float64() + want := x0 * y0 + if got != want { + t.Errorf("%g * %g = %g; want %g", x0, y0, got, want) + } + + if y0 == 0 { + continue // avoid division-by-zero + } + z.Quo(z, y) + got, _ = z.Float64() + want /= y0 + if got != want { + t.Errorf("%g / %g = %g; want %g", x0*y0, y0, got, want) + } + } + } +} + +func TestIssue6866(t *testing.T) { + for _, prec := range precList { + two := new(Float).SetPrec(prec).SetInt64(2) + one := new(Float).SetPrec(prec).SetInt64(1) + three := new(Float).SetPrec(prec).SetInt64(3) + msix := new(Float).SetPrec(prec).SetInt64(-6) + psix := new(Float).SetPrec(prec).SetInt64(+6) + + p := new(Float).SetPrec(prec) + z1 := new(Float).SetPrec(prec) + z2 := new(Float).SetPrec(prec) + + // z1 = 2 + 1.0/3*-6 + p.Quo(one, three) + p.Mul(p, msix) + z1.Add(two, p) + + // z2 = 2 - 1.0/3*+6 + p.Quo(one, three) + p.Mul(p, psix) + z2.Sub(two, p) + + if z1.Cmp(z2) != 0 { + t.Fatalf("prec %d: got z1 = %v != z2 = %v; want z1 == z2\n", prec, z1, z2) + } + if z1.Sign() != 0 { + t.Errorf("prec %d: got z1 = %v; want 0", prec, z1) + } + if z2.Sign() != 0 { + t.Errorf("prec %d: got z2 = %v; want 0", prec, z2) + } + } +} + +func TestFloatQuo(t *testing.T) { + // TODO(gri) make the test vary these precisions + preci := 200 // precision of integer part + precf := 20 // precision of fractional part + + for i := 0; i < 8; i++ { + // compute accurate (not rounded) result z + bits := Bits{preci - 1} + if i&3 != 0 { + bits = append(bits, 0) + } + if i&2 != 0 { + bits = append(bits, -1) + } + if i&1 != 0 { + bits = append(bits, -precf) + } + z := bits.Float() + + // compute accurate x as z*y + y := NewFloat(3.14159265358979323e123) + + x := new(Float).SetPrec(z.Prec() + y.Prec()).SetMode(ToZero) + x.Mul(z, y) + + // leave for debugging + // fmt.Printf("x = %s\ny = %s\nz = %s\n", x, y, z) + + if got := x.Acc(); got != Exact { + t.Errorf("got acc = %s; want exact", got) + } + + // round accurate z for a variety of precisions and + // modes and compare against result of x / y. + for _, mode := range [...]RoundingMode{ToZero, ToNearestEven, AwayFromZero} { + for d := -5; d < 5; d++ { + prec := uint(preci + d) + got := new(Float).SetPrec(prec).SetMode(mode).Quo(x, y) + want := bits.round(prec, mode) + if got.Cmp(want) != 0 { + t.Errorf("i = %d, prec = %d, %s:\n\t %s\n\t/ %s\n\t= %s\n\twant %s", + i, prec, mode, x, y, got, want) + } + } + } + } +} + +var long = flag.Bool("long", false, "run very long tests") + +// TestFloatQuoSmoke tests all divisions x/y for values x, y in the range [-n, +n]; +// it serves as a smoke test for basic correctness of division. +func TestFloatQuoSmoke(t *testing.T) { + n := 10 + if *long { + n = 1000 + } + + const dprec = 3 // max. precision variation + const prec = 10 + dprec // enough bits to hold n precisely + for x := -n; x <= n; x++ { + for y := -n; y < n; y++ { + if y == 0 { + continue + } + + a := float64(x) + b := float64(y) + c := a / b + + // vary operand precision (only ok as long as a, b can be represented correctly) + for ad := -dprec; ad <= dprec; ad++ { + for bd := -dprec; bd <= dprec; bd++ { + A := new(Float).SetPrec(uint(prec + ad)).SetFloat64(a) + B := new(Float).SetPrec(uint(prec + bd)).SetFloat64(b) + C := new(Float).SetPrec(53).Quo(A, B) // C has float64 mantissa width + + cc, acc := C.Float64() + if cc != c { + t.Errorf("%g/%g = %s; want %.5g\n", a, b, C.Text('g', 5), c) + continue + } + if acc != Exact { + t.Errorf("%g/%g got %s result; want exact result", a, b, acc) + } + } + } + } + } +} + +// TestFloatArithmeticSpecialValues tests that Float operations produce the +// correct results for combinations of zero (±0), finite (±1 and ±2.71828), +// and infinite (±Inf) operands. +func TestFloatArithmeticSpecialValues(t *testing.T) { + zero := 0.0 + args := []float64{math.Inf(-1), -2.71828, -1, -zero, zero, 1, 2.71828, math.Inf(1)} + xx := new(Float) + yy := new(Float) + got := new(Float) + want := new(Float) + for i := 0; i < 4; i++ { + for _, x := range args { + xx.SetFloat64(x) + // check conversion is correct + // (no need to do this for y, since we see exactly the + // same values there) + if got, acc := xx.Float64(); got != x || acc != Exact { + t.Errorf("Float(%g) == %g (%s)", x, got, acc) + } + for _, y := range args { + yy.SetFloat64(y) + var ( + op string + z float64 + f func(z, x, y *Float) *Float + ) + switch i { + case 0: + op = "+" + z = x + y + f = (*Float).Add + case 1: + op = "-" + z = x - y + f = (*Float).Sub + case 2: + op = "*" + z = x * y + f = (*Float).Mul + case 3: + op = "/" + z = x / y + f = (*Float).Quo + default: + panic("unreachable") + } + var errnan bool // set if execution of f panicked with ErrNaN + // protect execution of f + func() { + defer func() { + if p := recover(); p != nil { + _ = p.(ErrNaN) // re-panic if not ErrNaN + errnan = true + } + }() + f(got, xx, yy) + }() + if math.IsNaN(z) { + if !errnan { + t.Errorf("%5g %s %5g = %5s; want ErrNaN panic", x, op, y, got) + } + continue + } + if errnan { + t.Errorf("%5g %s %5g panicked with ErrNan; want %5s", x, op, y, want) + continue + } + want.SetFloat64(z) + if !alike(got, want) { + t.Errorf("%5g %s %5g = %5s; want %5s", x, op, y, got, want) + } + } + } + } +} + +func TestFloatArithmeticOverflow(t *testing.T) { + for _, test := range []struct { + prec uint + mode RoundingMode + op byte + x, y, want string + acc Accuracy + }{ + {4, ToNearestEven, '+', "0", "0", "0", Exact}, // smoke test + {4, ToNearestEven, '+', "0x.8p+0", "0x.8p+0", "0x.8p+1", Exact}, // smoke test + + {4, ToNearestEven, '+', "0", "0x.8p2147483647", "0x.8p+2147483647", Exact}, + {4, ToNearestEven, '+', "0x.8p2147483500", "0x.8p2147483647", "0x.8p+2147483647", Below}, // rounded to zero + {4, ToNearestEven, '+', "0x.8p2147483647", "0x.8p2147483647", "+Inf", Above}, // exponent overflow in + + {4, ToNearestEven, '+', "-0x.8p2147483647", "-0x.8p2147483647", "-Inf", Below}, // exponent overflow in + + {4, ToNearestEven, '-', "-0x.8p2147483647", "0x.8p2147483647", "-Inf", Below}, // exponent overflow in - + + {4, ToZero, '+', "0x.fp2147483647", "0x.8p2147483643", "0x.fp+2147483647", Below}, // rounded to zero + {4, ToNearestEven, '+', "0x.fp2147483647", "0x.8p2147483643", "+Inf", Above}, // exponent overflow in rounding + {4, AwayFromZero, '+', "0x.fp2147483647", "0x.8p2147483643", "+Inf", Above}, // exponent overflow in rounding + + {4, AwayFromZero, '-', "-0x.fp2147483647", "0x.8p2147483644", "-Inf", Below}, // exponent overflow in rounding + {4, ToNearestEven, '-', "-0x.fp2147483647", "0x.8p2147483643", "-Inf", Below}, // exponent overflow in rounding + {4, ToZero, '-', "-0x.fp2147483647", "0x.8p2147483643", "-0x.fp+2147483647", Above}, // rounded to zero + + {4, ToNearestEven, '+', "0", "0x.8p-2147483648", "0x.8p-2147483648", Exact}, + {4, ToNearestEven, '+', "0x.8p-2147483648", "0x.8p-2147483648", "0x.8p-2147483647", Exact}, + + {4, ToNearestEven, '*', "1", "0x.8p2147483647", "0x.8p+2147483647", Exact}, + {4, ToNearestEven, '*', "2", "0x.8p2147483647", "+Inf", Above}, // exponent overflow in * + {4, ToNearestEven, '*', "-2", "0x.8p2147483647", "-Inf", Below}, // exponent overflow in * + + {4, ToNearestEven, '/', "0.5", "0x.8p2147483647", "0x.8p-2147483646", Exact}, + {4, ToNearestEven, '/', "0x.8p+0", "0x.8p2147483647", "0x.8p-2147483646", Exact}, + {4, ToNearestEven, '/', "0x.8p-1", "0x.8p2147483647", "0x.8p-2147483647", Exact}, + {4, ToNearestEven, '/', "0x.8p-2", "0x.8p2147483647", "0x.8p-2147483648", Exact}, + {4, ToNearestEven, '/', "0x.8p-3", "0x.8p2147483647", "0", Below}, // exponent underflow in / + } { + x := makeFloat(test.x) + y := makeFloat(test.y) + z := new(Float).SetPrec(test.prec).SetMode(test.mode) + switch test.op { + case '+': + z.Add(x, y) + case '-': + z.Sub(x, y) + case '*': + z.Mul(x, y) + case '/': + z.Quo(x, y) + default: + panic("unreachable") + } + if got := z.Text('p', 0); got != test.want || z.Acc() != test.acc { + t.Errorf( + "prec = %d (%s): %s %c %s = %s (%s); want %s (%s)", + test.prec, test.mode, x.Text('p', 0), test.op, y.Text('p', 0), got, z.Acc(), test.want, test.acc, + ) + } + } +} + +// TODO(gri) Add tests that check correctness in the presence of aliasing. + +// For rounding modes ToNegativeInf and ToPositiveInf, rounding is affected +// by the sign of the value to be rounded. Test that rounding happens after +// the sign of a result has been set. +// This test uses specific values that are known to fail if rounding is +// "factored" out before setting the result sign. +func TestFloatArithmeticRounding(t *testing.T) { + for _, test := range []struct { + mode RoundingMode + prec uint + x, y, want int64 + op byte + }{ + {ToZero, 3, -0x8, -0x1, -0x8, '+'}, + {AwayFromZero, 3, -0x8, -0x1, -0xa, '+'}, + {ToNegativeInf, 3, -0x8, -0x1, -0xa, '+'}, + + {ToZero, 3, -0x8, 0x1, -0x8, '-'}, + {AwayFromZero, 3, -0x8, 0x1, -0xa, '-'}, + {ToNegativeInf, 3, -0x8, 0x1, -0xa, '-'}, + + {ToZero, 3, -0x9, 0x1, -0x8, '*'}, + {AwayFromZero, 3, -0x9, 0x1, -0xa, '*'}, + {ToNegativeInf, 3, -0x9, 0x1, -0xa, '*'}, + + {ToZero, 3, -0x9, 0x1, -0x8, '/'}, + {AwayFromZero, 3, -0x9, 0x1, -0xa, '/'}, + {ToNegativeInf, 3, -0x9, 0x1, -0xa, '/'}, + } { + var x, y, z Float + x.SetInt64(test.x) + y.SetInt64(test.y) + z.SetPrec(test.prec).SetMode(test.mode) + switch test.op { + case '+': + z.Add(&x, &y) + case '-': + z.Sub(&x, &y) + case '*': + z.Mul(&x, &y) + case '/': + z.Quo(&x, &y) + default: + panic("unreachable") + } + if got, acc := z.Int64(); got != test.want || acc != Exact { + t.Errorf("%s, %d bits: %d %c %d = %d (%s); want %d (Exact)", + test.mode, test.prec, test.x, test.op, test.y, got, acc, test.want, + ) + } + } +} + +// TestFloatCmpSpecialValues tests that Cmp produces the correct results for +// combinations of zero (±0), finite (±1 and ±2.71828), and infinite (±Inf) +// operands. +func TestFloatCmpSpecialValues(t *testing.T) { + zero := 0.0 + args := []float64{math.Inf(-1), -2.71828, -1, -zero, zero, 1, 2.71828, math.Inf(1)} + xx := new(Float) + yy := new(Float) + for i := 0; i < 4; i++ { + for _, x := range args { + xx.SetFloat64(x) + // check conversion is correct + // (no need to do this for y, since we see exactly the + // same values there) + if got, acc := xx.Float64(); got != x || acc != Exact { + t.Errorf("Float(%g) == %g (%s)", x, got, acc) + } + for _, y := range args { + yy.SetFloat64(y) + got := xx.Cmp(yy) + want := 0 + switch { + case x < y: + want = -1 + case x > y: + want = +1 + } + if got != want { + t.Errorf("(%g).Cmp(%g) = %v; want %v", x, y, got, want) + } + } + } + } +} + +func BenchmarkFloatAdd(b *testing.B) { + x := new(Float) + y := new(Float) + z := new(Float) + + for _, prec := range []uint{10, 1e2, 1e3, 1e4, 1e5} { + x.SetPrec(prec).SetRat(NewRat(1, 3)) + y.SetPrec(prec).SetRat(NewRat(1, 6)) + z.SetPrec(prec) + + b.Run(fmt.Sprintf("%v", prec), func(b *testing.B) { + b.ReportAllocs() + for i := 0; i < b.N; i++ { + z.Add(x, y) + } + }) + } +} + +func BenchmarkFloatSub(b *testing.B) { + x := new(Float) + y := new(Float) + z := new(Float) + + for _, prec := range []uint{10, 1e2, 1e3, 1e4, 1e5} { + x.SetPrec(prec).SetRat(NewRat(1, 3)) + y.SetPrec(prec).SetRat(NewRat(1, 6)) + z.SetPrec(prec) + + b.Run(fmt.Sprintf("%v", prec), func(b *testing.B) { + b.ReportAllocs() + for i := 0; i < b.N; i++ { + z.Sub(x, y) + } + }) + } +} diff --git a/src/math/big/floatconv.go b/src/math/big/floatconv.go new file mode 100644 index 0000000..57b7df3 --- /dev/null +++ b/src/math/big/floatconv.go @@ -0,0 +1,304 @@ +// Copyright 2015 The Go Authors. All rights reserved. +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. + +// This file implements string-to-Float conversion functions. + +package big + +import ( + "fmt" + "io" + "strings" +) + +var floatZero Float + +// SetString sets z to the value of s and returns z and a boolean indicating +// success. s must be a floating-point number of the same format as accepted +// by Parse, with base argument 0. The entire string (not just a prefix) must +// be valid for success. If the operation failed, the value of z is undefined +// but the returned value is nil. +func (z *Float) SetString(s string) (*Float, bool) { + if f, _, err := z.Parse(s, 0); err == nil { + return f, true + } + return nil, false +} + +// scan is like Parse but reads the longest possible prefix representing a valid +// floating point number from an io.ByteScanner rather than a string. It serves +// as the implementation of Parse. It does not recognize ±Inf and does not expect +// EOF at the end. +func (z *Float) scan(r io.ByteScanner, base int) (f *Float, b int, err error) { + prec := z.prec + if prec == 0 { + prec = 64 + } + + // A reasonable value in case of an error. + z.form = zero + + // sign + z.neg, err = scanSign(r) + if err != nil { + return + } + + // mantissa + var fcount int // fractional digit count; valid if <= 0 + z.mant, b, fcount, err = z.mant.scan(r, base, true) + if err != nil { + return + } + + // exponent + var exp int64 + var ebase int + exp, ebase, err = scanExponent(r, true, base == 0) + if err != nil { + return + } + + // special-case 0 + if len(z.mant) == 0 { + z.prec = prec + z.acc = Exact + z.form = zero + f = z + return + } + // len(z.mant) > 0 + + // The mantissa may have a radix point (fcount <= 0) and there + // may be a nonzero exponent exp. The radix point amounts to a + // division by b**(-fcount). An exponent means multiplication by + // ebase**exp. Finally, mantissa normalization (shift left) requires + // a correcting multiplication by 2**(-shiftcount). Multiplications + // are commutative, so we can apply them in any order as long as there + // is no loss of precision. We only have powers of 2 and 10, and + // we split powers of 10 into the product of the same powers of + // 2 and 5. This reduces the size of the multiplication factor + // needed for base-10 exponents. + + // normalize mantissa and determine initial exponent contributions + exp2 := int64(len(z.mant))*_W - fnorm(z.mant) + exp5 := int64(0) + + // determine binary or decimal exponent contribution of radix point + if fcount < 0 { + // The mantissa has a radix point ddd.dddd; and + // -fcount is the number of digits to the right + // of '.'. Adjust relevant exponent accordingly. + d := int64(fcount) + switch b { + case 10: + exp5 = d + fallthrough // 10**e == 5**e * 2**e + case 2: + exp2 += d + case 8: + exp2 += d * 3 // octal digits are 3 bits each + case 16: + exp2 += d * 4 // hexadecimal digits are 4 bits each + default: + panic("unexpected mantissa base") + } + // fcount consumed - not needed anymore + } + + // take actual exponent into account + switch ebase { + case 10: + exp5 += exp + fallthrough // see fallthrough above + case 2: + exp2 += exp + default: + panic("unexpected exponent base") + } + // exp consumed - not needed anymore + + // apply 2**exp2 + if MinExp <= exp2 && exp2 <= MaxExp { + z.prec = prec + z.form = finite + z.exp = int32(exp2) + f = z + } else { + err = fmt.Errorf("exponent overflow") + return + } + + if exp5 == 0 { + // no decimal exponent contribution + z.round(0) + return + } + // exp5 != 0 + + // apply 5**exp5 + p := new(Float).SetPrec(z.Prec() + 64) // use more bits for p -- TODO(gri) what is the right number? + if exp5 < 0 { + z.Quo(z, p.pow5(uint64(-exp5))) + } else { + z.Mul(z, p.pow5(uint64(exp5))) + } + + return +} + +// These powers of 5 fit into a uint64. +// +// for p, q := uint64(0), uint64(1); p < q; p, q = q, q*5 { +// fmt.Println(q) +// } +// +var pow5tab = [...]uint64{ + 1, + 5, + 25, + 125, + 625, + 3125, + 15625, + 78125, + 390625, + 1953125, + 9765625, + 48828125, + 244140625, + 1220703125, + 6103515625, + 30517578125, + 152587890625, + 762939453125, + 3814697265625, + 19073486328125, + 95367431640625, + 476837158203125, + 2384185791015625, + 11920928955078125, + 59604644775390625, + 298023223876953125, + 1490116119384765625, + 7450580596923828125, +} + +// pow5 sets z to 5**n and returns z. +// n must not be negative. +func (z *Float) pow5(n uint64) *Float { + const m = uint64(len(pow5tab) - 1) + if n <= m { + return z.SetUint64(pow5tab[n]) + } + // n > m + + z.SetUint64(pow5tab[m]) + n -= m + + // use more bits for f than for z + // TODO(gri) what is the right number? + f := new(Float).SetPrec(z.Prec() + 64).SetUint64(5) + + for n > 0 { + if n&1 != 0 { + z.Mul(z, f) + } + f.Mul(f, f) + n >>= 1 + } + + return z +} + +// Parse parses s which must contain a text representation of a floating- +// point number with a mantissa in the given conversion base (the exponent +// is always a decimal number), or a string representing an infinite value. +// +// For base 0, an underscore character ``_'' may appear between a base +// prefix and an adjacent digit, and between successive digits; such +// underscores do not change the value of the number, or the returned +// digit count. Incorrect placement of underscores is reported as an +// error if there are no other errors. If base != 0, underscores are +// not recognized and thus terminate scanning like any other character +// that is not a valid radix point or digit. +// +// It sets z to the (possibly rounded) value of the corresponding floating- +// point value, and returns z, the actual base b, and an error err, if any. +// The entire string (not just a prefix) must be consumed for success. +// If z's precision is 0, it is changed to 64 before rounding takes effect. +// The number must be of the form: +// +// number = [ sign ] ( float | "inf" | "Inf" ) . +// sign = "+" | "-" . +// float = ( mantissa | prefix pmantissa ) [ exponent ] . +// prefix = "0" [ "b" | "B" | "o" | "O" | "x" | "X" ] . +// mantissa = digits "." [ digits ] | digits | "." digits . +// pmantissa = [ "_" ] digits "." [ digits ] | [ "_" ] digits | "." digits . +// exponent = ( "e" | "E" | "p" | "P" ) [ sign ] digits . +// digits = digit { [ "_" ] digit } . +// digit = "0" ... "9" | "a" ... "z" | "A" ... "Z" . +// +// The base argument must be 0, 2, 8, 10, or 16. Providing an invalid base +// argument will lead to a run-time panic. +// +// For base 0, the number prefix determines the actual base: A prefix of +// ``0b'' or ``0B'' selects base 2, ``0o'' or ``0O'' selects base 8, and +// ``0x'' or ``0X'' selects base 16. Otherwise, the actual base is 10 and +// no prefix is accepted. The octal prefix "0" is not supported (a leading +// "0" is simply considered a "0"). +// +// A "p" or "P" exponent indicates a base 2 (rather then base 10) exponent; +// for instance, "0x1.fffffffffffffp1023" (using base 0) represents the +// maximum float64 value. For hexadecimal mantissae, the exponent character +// must be one of 'p' or 'P', if present (an "e" or "E" exponent indicator +// cannot be distinguished from a mantissa digit). +// +// The returned *Float f is nil and the value of z is valid but not +// defined if an error is reported. +// +func (z *Float) Parse(s string, base int) (f *Float, b int, err error) { + // scan doesn't handle ±Inf + if len(s) == 3 && (s == "Inf" || s == "inf") { + f = z.SetInf(false) + return + } + if len(s) == 4 && (s[0] == '+' || s[0] == '-') && (s[1:] == "Inf" || s[1:] == "inf") { + f = z.SetInf(s[0] == '-') + return + } + + r := strings.NewReader(s) + if f, b, err = z.scan(r, base); err != nil { + return + } + + // entire string must have been consumed + if ch, err2 := r.ReadByte(); err2 == nil { + err = fmt.Errorf("expected end of string, found %q", ch) + } else if err2 != io.EOF { + err = err2 + } + + return +} + +// ParseFloat is like f.Parse(s, base) with f set to the given precision +// and rounding mode. +func ParseFloat(s string, base int, prec uint, mode RoundingMode) (f *Float, b int, err error) { + return new(Float).SetPrec(prec).SetMode(mode).Parse(s, base) +} + +var _ fmt.Scanner = (*Float)(nil) // *Float must implement fmt.Scanner + +// Scan is a support routine for fmt.Scanner; it sets z to the value of +// the scanned number. It accepts formats whose verbs are supported by +// fmt.Scan for floating point values, which are: +// 'b' (binary), 'e', 'E', 'f', 'F', 'g' and 'G'. +// Scan doesn't handle ±Inf. +func (z *Float) Scan(s fmt.ScanState, ch rune) error { + s.SkipSpace() + _, _, err := z.scan(byteReader{s}, 0) + return err +} diff --git a/src/math/big/floatconv_test.go b/src/math/big/floatconv_test.go new file mode 100644 index 0000000..a1cc38a --- /dev/null +++ b/src/math/big/floatconv_test.go @@ -0,0 +1,825 @@ +// Copyright 2015 The Go Authors. All rights reserved. +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. + +package big + +import ( + "bytes" + "fmt" + "math" + "math/bits" + "strconv" + "testing" +) + +var zero_ float64 + +func TestFloatSetFloat64String(t *testing.T) { + inf := math.Inf(0) + nan := math.NaN() + + for _, test := range []struct { + s string + x float64 // NaNs represent invalid inputs + }{ + // basics + {"0", 0}, + {"-0", -zero_}, + {"+0", 0}, + {"1", 1}, + {"-1", -1}, + {"+1", 1}, + {"1.234", 1.234}, + {"-1.234", -1.234}, + {"+1.234", 1.234}, + {".1", 0.1}, + {"1.", 1}, + {"+1.", 1}, + + // various zeros + {"0e100", 0}, + {"-0e+100", -zero_}, + {"+0e-100", 0}, + {"0E100", 0}, + {"-0E+100", -zero_}, + {"+0E-100", 0}, + + // various decimal exponent formats + {"1.e10", 1e10}, + {"1e+10", 1e10}, + {"+1e-10", 1e-10}, + {"1E10", 1e10}, + {"1.E+10", 1e10}, + {"+1E-10", 1e-10}, + + // infinities + {"Inf", inf}, + {"+Inf", inf}, + {"-Inf", -inf}, + {"inf", inf}, + {"+inf", inf}, + {"-inf", -inf}, + + // invalid numbers + {"", nan}, + {"-", nan}, + {"0x", nan}, + {"0e", nan}, + {"1.2ef", nan}, + {"2..3", nan}, + {"123..", nan}, + {"infinity", nan}, + {"foobar", nan}, + + // invalid underscores + {"_", nan}, + {"0_", nan}, + {"1__0", nan}, + {"123_.", nan}, + {"123._", nan}, + {"123._4", nan}, + {"1_2.3_4_", nan}, + {"_.123", nan}, + {"_123.456", nan}, + {"10._0", nan}, + {"10.0e_0", nan}, + {"10.0e0_", nan}, + {"0P-0__0", nan}, + + // misc decimal values + {"3.14159265", 3.14159265}, + {"-687436.79457e-245", -687436.79457e-245}, + {"-687436.79457E245", -687436.79457e245}, + {".0000000000000000000000000000000000000001", 1e-40}, + {"+10000000000000000000000000000000000000000e-0", 1e40}, + + // decimal mantissa, binary exponent + {"0p0", 0}, + {"-0p0", -zero_}, + {"1p10", 1 << 10}, + {"1p+10", 1 << 10}, + {"+1p-10", 1.0 / (1 << 10)}, + {"1024p-12", 0.25}, + {"-1p10", -1024}, + {"1.5p1", 3}, + + // binary mantissa, decimal exponent + {"0b0", 0}, + {"-0b0", -zero_}, + {"0b0e+10", 0}, + {"-0b0e-10", -zero_}, + {"0b1010", 10}, + {"0B1010E2", 1000}, + {"0b.1", 0.5}, + {"0b.001", 0.125}, + {"0b.001e3", 125}, + + // binary mantissa, binary exponent + {"0b0p+10", 0}, + {"-0b0p-10", -zero_}, + {"0b.1010p4", 10}, + {"0b1p-1", 0.5}, + {"0b001p-3", 0.125}, + {"0b.001p3", 1}, + {"0b0.01p2", 1}, + {"0b0.01P+2", 1}, + + // octal mantissa, decimal exponent + {"0o0", 0}, + {"-0o0", -zero_}, + {"0o0e+10", 0}, + {"-0o0e-10", -zero_}, + {"0o12", 10}, + {"0O12E2", 1000}, + {"0o.4", 0.5}, + {"0o.01", 0.015625}, + {"0o.01e3", 15.625}, + + // octal mantissa, binary exponent + {"0o0p+10", 0}, + {"-0o0p-10", -zero_}, + {"0o.12p6", 10}, + {"0o4p-3", 0.5}, + {"0o0014p-6", 0.1875}, + {"0o.001p9", 1}, + {"0o0.01p7", 2}, + {"0O0.01P+2", 0.0625}, + + // hexadecimal mantissa and exponent + {"0x0", 0}, + {"-0x0", -zero_}, + {"0x0p+10", 0}, + {"-0x0p-10", -zero_}, + {"0xff", 255}, + {"0X.8p1", 1}, + {"-0X0.00008p16", -0.5}, + {"-0X0.00008P+16", -0.5}, + {"0x0.0000000000001p-1022", math.SmallestNonzeroFloat64}, + {"0x1.fffffffffffffp1023", math.MaxFloat64}, + + // underscores + {"0_0", 0}, + {"1_000.", 1000}, + {"1_2_3.4_5_6", 123.456}, + {"1.0e0_0", 1}, + {"1p+1_0", 1024}, + {"0b_1000", 0x8}, + {"0b_1011_1101", 0xbd}, + {"0x_f0_0d_1eP+0_8", 0xf00d1e00}, + } { + var x Float + x.SetPrec(53) + _, ok := x.SetString(test.s) + if math.IsNaN(test.x) { + // test.s is invalid + if ok { + t.Errorf("%s: want parse error", test.s) + } + continue + } + // test.s is valid + if !ok { + t.Errorf("%s: got parse error", test.s) + continue + } + f, _ := x.Float64() + want := new(Float).SetFloat64(test.x) + if x.Cmp(want) != 0 || x.Signbit() != want.Signbit() { + t.Errorf("%s: got %v (%v); want %v", test.s, &x, f, test.x) + } + } +} + +func fdiv(a, b float64) float64 { return a / b } + +const ( + below1e23 = 99999999999999974834176 + above1e23 = 100000000000000008388608 +) + +func TestFloat64Text(t *testing.T) { + for _, test := range []struct { + x float64 + format byte + prec int + want string + }{ + {0, 'f', 0, "0"}, + {math.Copysign(0, -1), 'f', 0, "-0"}, + {1, 'f', 0, "1"}, + {-1, 'f', 0, "-1"}, + + {0.001, 'e', 0, "1e-03"}, + {0.459, 'e', 0, "5e-01"}, + {1.459, 'e', 0, "1e+00"}, + {2.459, 'e', 1, "2.5e+00"}, + {3.459, 'e', 2, "3.46e+00"}, + {4.459, 'e', 3, "4.459e+00"}, + {5.459, 'e', 4, "5.4590e+00"}, + + {0.001, 'f', 0, "0"}, + {0.459, 'f', 0, "0"}, + {1.459, 'f', 0, "1"}, + {2.459, 'f', 1, "2.5"}, + {3.459, 'f', 2, "3.46"}, + {4.459, 'f', 3, "4.459"}, + {5.459, 'f', 4, "5.4590"}, + + {0, 'b', 0, "0"}, + {math.Copysign(0, -1), 'b', 0, "-0"}, + {1.0, 'b', 0, "4503599627370496p-52"}, + {-1.0, 'b', 0, "-4503599627370496p-52"}, + {4503599627370496, 'b', 0, "4503599627370496p+0"}, + + {0, 'p', 0, "0"}, + {math.Copysign(0, -1), 'p', 0, "-0"}, + {1024.0, 'p', 0, "0x.8p+11"}, + {-1024.0, 'p', 0, "-0x.8p+11"}, + + // all test cases below from strconv/ftoa_test.go + {1, 'e', 5, "1.00000e+00"}, + {1, 'f', 5, "1.00000"}, + {1, 'g', 5, "1"}, + {1, 'g', -1, "1"}, + {20, 'g', -1, "20"}, + {1234567.8, 'g', -1, "1.2345678e+06"}, + {200000, 'g', -1, "200000"}, + {2000000, 'g', -1, "2e+06"}, + + // g conversion and zero suppression + {400, 'g', 2, "4e+02"}, + {40, 'g', 2, "40"}, + {4, 'g', 2, "4"}, + {.4, 'g', 2, "0.4"}, + {.04, 'g', 2, "0.04"}, + {.004, 'g', 2, "0.004"}, + {.0004, 'g', 2, "0.0004"}, + {.00004, 'g', 2, "4e-05"}, + {.000004, 'g', 2, "4e-06"}, + + {0, 'e', 5, "0.00000e+00"}, + {0, 'f', 5, "0.00000"}, + {0, 'g', 5, "0"}, + {0, 'g', -1, "0"}, + + {-1, 'e', 5, "-1.00000e+00"}, + {-1, 'f', 5, "-1.00000"}, + {-1, 'g', 5, "-1"}, + {-1, 'g', -1, "-1"}, + + {12, 'e', 5, "1.20000e+01"}, + {12, 'f', 5, "12.00000"}, + {12, 'g', 5, "12"}, + {12, 'g', -1, "12"}, + + {123456700, 'e', 5, "1.23457e+08"}, + {123456700, 'f', 5, "123456700.00000"}, + {123456700, 'g', 5, "1.2346e+08"}, + {123456700, 'g', -1, "1.234567e+08"}, + + {1.2345e6, 'e', 5, "1.23450e+06"}, + {1.2345e6, 'f', 5, "1234500.00000"}, + {1.2345e6, 'g', 5, "1.2345e+06"}, + + {1e23, 'e', 17, "9.99999999999999916e+22"}, + {1e23, 'f', 17, "99999999999999991611392.00000000000000000"}, + {1e23, 'g', 17, "9.9999999999999992e+22"}, + + {1e23, 'e', -1, "1e+23"}, + {1e23, 'f', -1, "100000000000000000000000"}, + {1e23, 'g', -1, "1e+23"}, + + {below1e23, 'e', 17, "9.99999999999999748e+22"}, + {below1e23, 'f', 17, "99999999999999974834176.00000000000000000"}, + {below1e23, 'g', 17, "9.9999999999999975e+22"}, + + {below1e23, 'e', -1, "9.999999999999997e+22"}, + {below1e23, 'f', -1, "99999999999999970000000"}, + {below1e23, 'g', -1, "9.999999999999997e+22"}, + + {above1e23, 'e', 17, "1.00000000000000008e+23"}, + {above1e23, 'f', 17, "100000000000000008388608.00000000000000000"}, + {above1e23, 'g', 17, "1.0000000000000001e+23"}, + + {above1e23, 'e', -1, "1.0000000000000001e+23"}, + {above1e23, 'f', -1, "100000000000000010000000"}, + {above1e23, 'g', -1, "1.0000000000000001e+23"}, + + {5e-304 / 1e20, 'g', -1, "5e-324"}, + {-5e-304 / 1e20, 'g', -1, "-5e-324"}, + {fdiv(5e-304, 1e20), 'g', -1, "5e-324"}, // avoid constant arithmetic + {fdiv(-5e-304, 1e20), 'g', -1, "-5e-324"}, // avoid constant arithmetic + + {32, 'g', -1, "32"}, + {32, 'g', 0, "3e+01"}, + + {100, 'x', -1, "0x1.9p+06"}, + + // {math.NaN(), 'g', -1, "NaN"}, // Float doesn't support NaNs + // {-math.NaN(), 'g', -1, "NaN"}, // Float doesn't support NaNs + {math.Inf(0), 'g', -1, "+Inf"}, + {math.Inf(-1), 'g', -1, "-Inf"}, + {-math.Inf(0), 'g', -1, "-Inf"}, + + {-1, 'b', -1, "-4503599627370496p-52"}, + + // fixed bugs + {0.9, 'f', 1, "0.9"}, + {0.09, 'f', 1, "0.1"}, + {0.0999, 'f', 1, "0.1"}, + {0.05, 'f', 1, "0.1"}, + {0.05, 'f', 0, "0"}, + {0.5, 'f', 1, "0.5"}, + {0.5, 'f', 0, "0"}, + {1.5, 'f', 0, "2"}, + + // https://www.exploringbinary.com/java-hangs-when-converting-2-2250738585072012e-308/ + {2.2250738585072012e-308, 'g', -1, "2.2250738585072014e-308"}, + // https://www.exploringbinary.com/php-hangs-on-numeric-value-2-2250738585072011e-308/ + {2.2250738585072011e-308, 'g', -1, "2.225073858507201e-308"}, + + // Issue 2625. + {383260575764816448, 'f', 0, "383260575764816448"}, + {383260575764816448, 'g', -1, "3.8326057576481645e+17"}, + + // Issue 15918. + {1, 'f', -10, "1"}, + {1, 'f', -11, "1"}, + {1, 'f', -12, "1"}, + } { + // The test cases are from the strconv package which tests float64 values. + // When formatting values with prec = -1 (shortest representation), + // the actually available mantissa precision matters. + // For denormalized values, that precision is < 53 (SetFloat64 default). + // Compute and set the actual precision explicitly. + f := new(Float).SetPrec(actualPrec(test.x)).SetFloat64(test.x) + got := f.Text(test.format, test.prec) + if got != test.want { + t.Errorf("%v: got %s; want %s", test, got, test.want) + continue + } + + if test.format == 'b' && test.x == 0 { + continue // 'b' format in strconv.Float requires knowledge of bias for 0.0 + } + if test.format == 'p' { + continue // 'p' format not supported in strconv.Format + } + + // verify that Float format matches strconv format + want := strconv.FormatFloat(test.x, test.format, test.prec, 64) + if got != want { + t.Errorf("%v: got %s; want %s (strconv)", test, got, want) + } + } +} + +// actualPrec returns the number of actually used mantissa bits. +func actualPrec(x float64) uint { + if mant := math.Float64bits(x); x != 0 && mant&(0x7ff<<52) == 0 { + // x is denormalized + return 64 - uint(bits.LeadingZeros64(mant&(1<<52-1))) + } + return 53 +} + +func TestFloatText(t *testing.T) { + const defaultRound = ^RoundingMode(0) + + for _, test := range []struct { + x string + round RoundingMode + prec uint + format byte + digits int + want string + }{ + {"0", defaultRound, 10, 'f', 0, "0"}, + {"-0", defaultRound, 10, 'f', 0, "-0"}, + {"1", defaultRound, 10, 'f', 0, "1"}, + {"-1", defaultRound, 10, 'f', 0, "-1"}, + + {"1.459", defaultRound, 100, 'e', 0, "1e+00"}, + {"2.459", defaultRound, 100, 'e', 1, "2.5e+00"}, + {"3.459", defaultRound, 100, 'e', 2, "3.46e+00"}, + {"4.459", defaultRound, 100, 'e', 3, "4.459e+00"}, + {"5.459", defaultRound, 100, 'e', 4, "5.4590e+00"}, + + {"1.459", defaultRound, 100, 'E', 0, "1E+00"}, + {"2.459", defaultRound, 100, 'E', 1, "2.5E+00"}, + {"3.459", defaultRound, 100, 'E', 2, "3.46E+00"}, + {"4.459", defaultRound, 100, 'E', 3, "4.459E+00"}, + {"5.459", defaultRound, 100, 'E', 4, "5.4590E+00"}, + + {"1.459", defaultRound, 100, 'f', 0, "1"}, + {"2.459", defaultRound, 100, 'f', 1, "2.5"}, + {"3.459", defaultRound, 100, 'f', 2, "3.46"}, + {"4.459", defaultRound, 100, 'f', 3, "4.459"}, + {"5.459", defaultRound, 100, 'f', 4, "5.4590"}, + + {"1.459", defaultRound, 100, 'g', 0, "1"}, + {"2.459", defaultRound, 100, 'g', 1, "2"}, + {"3.459", defaultRound, 100, 'g', 2, "3.5"}, + {"4.459", defaultRound, 100, 'g', 3, "4.46"}, + {"5.459", defaultRound, 100, 'g', 4, "5.459"}, + + {"1459", defaultRound, 53, 'g', 0, "1e+03"}, + {"2459", defaultRound, 53, 'g', 1, "2e+03"}, + {"3459", defaultRound, 53, 'g', 2, "3.5e+03"}, + {"4459", defaultRound, 53, 'g', 3, "4.46e+03"}, + {"5459", defaultRound, 53, 'g', 4, "5459"}, + + {"1459", defaultRound, 53, 'G', 0, "1E+03"}, + {"2459", defaultRound, 53, 'G', 1, "2E+03"}, + {"3459", defaultRound, 53, 'G', 2, "3.5E+03"}, + {"4459", defaultRound, 53, 'G', 3, "4.46E+03"}, + {"5459", defaultRound, 53, 'G', 4, "5459"}, + + {"3", defaultRound, 10, 'e', 40, "3.0000000000000000000000000000000000000000e+00"}, + {"3", defaultRound, 10, 'f', 40, "3.0000000000000000000000000000000000000000"}, + {"3", defaultRound, 10, 'g', 40, "3"}, + + {"3e40", defaultRound, 100, 'e', 40, "3.0000000000000000000000000000000000000000e+40"}, + {"3e40", defaultRound, 100, 'f', 4, "30000000000000000000000000000000000000000.0000"}, + {"3e40", defaultRound, 100, 'g', 40, "3e+40"}, + + // make sure "stupid" exponents don't stall the machine + {"1e1000000", defaultRound, 64, 'p', 0, "0x.88b3a28a05eade3ap+3321929"}, + {"1e646456992", defaultRound, 64, 'p', 0, "0x.e883a0c5c8c7c42ap+2147483644"}, + {"1e646456993", defaultRound, 64, 'p', 0, "+Inf"}, + {"1e1000000000", defaultRound, 64, 'p', 0, "+Inf"}, + {"1e-1000000", defaultRound, 64, 'p', 0, "0x.efb4542cc8ca418ap-3321928"}, + {"1e-646456993", defaultRound, 64, 'p', 0, "0x.e17c8956983d9d59p-2147483647"}, + {"1e-646456994", defaultRound, 64, 'p', 0, "0"}, + {"1e-1000000000", defaultRound, 64, 'p', 0, "0"}, + + // minimum and maximum values + {"1p2147483646", defaultRound, 64, 'p', 0, "0x.8p+2147483647"}, + {"0x.8p2147483647", defaultRound, 64, 'p', 0, "0x.8p+2147483647"}, + {"0x.8p-2147483647", defaultRound, 64, 'p', 0, "0x.8p-2147483647"}, + {"1p-2147483649", defaultRound, 64, 'p', 0, "0x.8p-2147483648"}, + + // TODO(gri) need tests for actual large Floats + + {"0", defaultRound, 53, 'b', 0, "0"}, + {"-0", defaultRound, 53, 'b', 0, "-0"}, + {"1.0", defaultRound, 53, 'b', 0, "4503599627370496p-52"}, + {"-1.0", defaultRound, 53, 'b', 0, "-4503599627370496p-52"}, + {"4503599627370496", defaultRound, 53, 'b', 0, "4503599627370496p+0"}, + + // issue 9939 + {"3", defaultRound, 350, 'b', 0, "1720123961992553633708115671476565205597423741876210842803191629540192157066363606052513914832594264915968p-348"}, + {"03", defaultRound, 350, 'b', 0, "1720123961992553633708115671476565205597423741876210842803191629540192157066363606052513914832594264915968p-348"}, + {"3.", defaultRound, 350, 'b', 0, "1720123961992553633708115671476565205597423741876210842803191629540192157066363606052513914832594264915968p-348"}, + {"3.0", defaultRound, 350, 'b', 0, "1720123961992553633708115671476565205597423741876210842803191629540192157066363606052513914832594264915968p-348"}, + {"3.00", defaultRound, 350, 'b', 0, "1720123961992553633708115671476565205597423741876210842803191629540192157066363606052513914832594264915968p-348"}, + {"3.000", defaultRound, 350, 'b', 0, "1720123961992553633708115671476565205597423741876210842803191629540192157066363606052513914832594264915968p-348"}, + + {"3", defaultRound, 350, 'p', 0, "0x.cp+2"}, + {"03", defaultRound, 350, 'p', 0, "0x.cp+2"}, + {"3.", defaultRound, 350, 'p', 0, "0x.cp+2"}, + {"3.0", defaultRound, 350, 'p', 0, "0x.cp+2"}, + {"3.00", defaultRound, 350, 'p', 0, "0x.cp+2"}, + {"3.000", defaultRound, 350, 'p', 0, "0x.cp+2"}, + + {"0", defaultRound, 64, 'p', 0, "0"}, + {"-0", defaultRound, 64, 'p', 0, "-0"}, + {"1024.0", defaultRound, 64, 'p', 0, "0x.8p+11"}, + {"-1024.0", defaultRound, 64, 'p', 0, "-0x.8p+11"}, + + {"0", defaultRound, 64, 'x', -1, "0x0p+00"}, + {"0", defaultRound, 64, 'x', 0, "0x0p+00"}, + {"0", defaultRound, 64, 'x', 1, "0x0.0p+00"}, + {"0", defaultRound, 64, 'x', 5, "0x0.00000p+00"}, + {"3.25", defaultRound, 64, 'x', 0, "0x1p+02"}, + {"-3.25", defaultRound, 64, 'x', 0, "-0x1p+02"}, + {"3.25", defaultRound, 64, 'x', 1, "0x1.ap+01"}, + {"-3.25", defaultRound, 64, 'x', 1, "-0x1.ap+01"}, + {"3.25", defaultRound, 64, 'x', -1, "0x1.ap+01"}, + {"-3.25", defaultRound, 64, 'x', -1, "-0x1.ap+01"}, + {"1024.0", defaultRound, 64, 'x', 0, "0x1p+10"}, + {"-1024.0", defaultRound, 64, 'x', 0, "-0x1p+10"}, + {"1024.0", defaultRound, 64, 'x', 5, "0x1.00000p+10"}, + {"8191.0", defaultRound, 53, 'x', -1, "0x1.fffp+12"}, + {"8191.5", defaultRound, 53, 'x', -1, "0x1.fff8p+12"}, + {"8191.53125", defaultRound, 53, 'x', -1, "0x1.fff88p+12"}, + {"8191.53125", defaultRound, 53, 'x', 4, "0x1.fff8p+12"}, + {"8191.53125", defaultRound, 53, 'x', 3, "0x1.000p+13"}, + {"8191.53125", defaultRound, 53, 'x', 0, "0x1p+13"}, + {"8191.533203125", defaultRound, 53, 'x', -1, "0x1.fff888p+12"}, + {"8191.533203125", defaultRound, 53, 'x', 5, "0x1.fff88p+12"}, + {"8191.533203125", defaultRound, 53, 'x', 4, "0x1.fff9p+12"}, + + {"8191.53125", defaultRound, 53, 'x', -1, "0x1.fff88p+12"}, + {"8191.53125", ToNearestEven, 53, 'x', 5, "0x1.fff88p+12"}, + {"8191.53125", ToNearestAway, 53, 'x', 5, "0x1.fff88p+12"}, + {"8191.53125", ToZero, 53, 'x', 5, "0x1.fff88p+12"}, + {"8191.53125", AwayFromZero, 53, 'x', 5, "0x1.fff88p+12"}, + {"8191.53125", ToNegativeInf, 53, 'x', 5, "0x1.fff88p+12"}, + {"8191.53125", ToPositiveInf, 53, 'x', 5, "0x1.fff88p+12"}, + + {"8191.53125", defaultRound, 53, 'x', 4, "0x1.fff8p+12"}, + {"8191.53125", defaultRound, 53, 'x', 3, "0x1.000p+13"}, + {"8191.53125", defaultRound, 53, 'x', 0, "0x1p+13"}, + {"8191.533203125", defaultRound, 53, 'x', -1, "0x1.fff888p+12"}, + {"8191.533203125", defaultRound, 53, 'x', 6, "0x1.fff888p+12"}, + {"8191.533203125", defaultRound, 53, 'x', 5, "0x1.fff88p+12"}, + {"8191.533203125", defaultRound, 53, 'x', 4, "0x1.fff9p+12"}, + + {"8191.53125", ToNearestEven, 53, 'x', 4, "0x1.fff8p+12"}, + {"8191.53125", ToNearestAway, 53, 'x', 4, "0x1.fff9p+12"}, + {"8191.53125", ToZero, 53, 'x', 4, "0x1.fff8p+12"}, + {"8191.53125", ToZero, 53, 'x', 2, "0x1.ffp+12"}, + {"8191.53125", AwayFromZero, 53, 'x', 4, "0x1.fff9p+12"}, + {"8191.53125", ToNegativeInf, 53, 'x', 4, "0x1.fff8p+12"}, + {"-8191.53125", ToNegativeInf, 53, 'x', 4, "-0x1.fff9p+12"}, + {"8191.53125", ToPositiveInf, 53, 'x', 4, "0x1.fff9p+12"}, + {"-8191.53125", ToPositiveInf, 53, 'x', 4, "-0x1.fff8p+12"}, + + // issue 34343 + {"0x.8p-2147483648", ToNearestEven, 4, 'p', -1, "0x.8p-2147483648"}, + {"0x.8p-2147483648", ToNearestEven, 4, 'x', -1, "0x1p-2147483649"}, + } { + f, _, err := ParseFloat(test.x, 0, test.prec, ToNearestEven) + if err != nil { + t.Errorf("%v: %s", test, err) + continue + } + if test.round != defaultRound { + f.SetMode(test.round) + } + + got := f.Text(test.format, test.digits) + if got != test.want { + t.Errorf("%v: got %s; want %s", test, got, test.want) + } + + // compare with strconv.FormatFloat output if possible + // ('p' format is not supported by strconv.FormatFloat, + // and its output for 0.0 prints a biased exponent value + // as in 0p-1074 which makes no sense to emulate here) + if test.prec == 53 && test.format != 'p' && f.Sign() != 0 && (test.round == ToNearestEven || test.round == defaultRound) { + f64, acc := f.Float64() + if acc != Exact { + t.Errorf("%v: expected exact conversion to float64", test) + continue + } + got := strconv.FormatFloat(f64, test.format, test.digits, 64) + if got != test.want { + t.Errorf("%v: got %s; want %s", test, got, test.want) + } + } + } +} + +func TestFloatFormat(t *testing.T) { + for _, test := range []struct { + format string + value any // float32, float64, or string (== 512bit *Float) + want string + }{ + // from fmt/fmt_test.go + {"%+.3e", 0.0, "+0.000e+00"}, + {"%+.3e", 1.0, "+1.000e+00"}, + {"%+.3f", -1.0, "-1.000"}, + {"%+.3F", -1.0, "-1.000"}, + {"%+.3F", float32(-1.0), "-1.000"}, + {"%+07.2f", 1.0, "+001.00"}, + {"%+07.2f", -1.0, "-001.00"}, + {"%+10.2f", +1.0, " +1.00"}, + {"%+10.2f", -1.0, " -1.00"}, + {"% .3E", -1.0, "-1.000E+00"}, + {"% .3e", 1.0, " 1.000e+00"}, + {"%+.3g", 0.0, "+0"}, + {"%+.3g", 1.0, "+1"}, + {"%+.3g", -1.0, "-1"}, + {"% .3g", -1.0, "-1"}, + {"% .3g", 1.0, " 1"}, + {"%b", float32(1.0), "8388608p-23"}, + {"%b", 1.0, "4503599627370496p-52"}, + + // from fmt/fmt_test.go: old test/fmt_test.go + {"%e", 1.0, "1.000000e+00"}, + {"%e", 1234.5678e3, "1.234568e+06"}, + {"%e", 1234.5678e-8, "1.234568e-05"}, + {"%e", -7.0, "-7.000000e+00"}, + {"%e", -1e-9, "-1.000000e-09"}, + {"%f", 1234.5678e3, "1234567.800000"}, + {"%f", 1234.5678e-8, "0.000012"}, + {"%f", -7.0, "-7.000000"}, + {"%f", -1e-9, "-0.000000"}, + {"%g", 1234.5678e3, "1.2345678e+06"}, + {"%g", float32(1234.5678e3), "1.2345678e+06"}, + {"%g", 1234.5678e-8, "1.2345678e-05"}, + {"%g", -7.0, "-7"}, + {"%g", -1e-9, "-1e-09"}, + {"%g", float32(-1e-9), "-1e-09"}, + {"%E", 1.0, "1.000000E+00"}, + {"%E", 1234.5678e3, "1.234568E+06"}, + {"%E", 1234.5678e-8, "1.234568E-05"}, + {"%E", -7.0, "-7.000000E+00"}, + {"%E", -1e-9, "-1.000000E-09"}, + {"%G", 1234.5678e3, "1.2345678E+06"}, + {"%G", float32(1234.5678e3), "1.2345678E+06"}, + {"%G", 1234.5678e-8, "1.2345678E-05"}, + {"%G", -7.0, "-7"}, + {"%G", -1e-9, "-1E-09"}, + {"%G", float32(-1e-9), "-1E-09"}, + + {"%20.6e", 1.2345e3, " 1.234500e+03"}, + {"%20.6e", 1.2345e-3, " 1.234500e-03"}, + {"%20e", 1.2345e3, " 1.234500e+03"}, + {"%20e", 1.2345e-3, " 1.234500e-03"}, + {"%20.8e", 1.2345e3, " 1.23450000e+03"}, + {"%20f", 1.23456789e3, " 1234.567890"}, + {"%20f", 1.23456789e-3, " 0.001235"}, + {"%20f", 12345678901.23456789, " 12345678901.234568"}, + {"%-20f", 1.23456789e3, "1234.567890 "}, + {"%20.8f", 1.23456789e3, " 1234.56789000"}, + {"%20.8f", 1.23456789e-3, " 0.00123457"}, + {"%g", 1.23456789e3, "1234.56789"}, + {"%g", 1.23456789e-3, "0.00123456789"}, + {"%g", 1.23456789e20, "1.23456789e+20"}, + {"%20e", math.Inf(1), " +Inf"}, + {"%-20f", math.Inf(-1), "-Inf "}, + + // from fmt/fmt_test.go: comparison of padding rules with C printf + {"%.2f", 1.0, "1.00"}, + {"%.2f", -1.0, "-1.00"}, + {"% .2f", 1.0, " 1.00"}, + {"% .2f", -1.0, "-1.00"}, + {"%+.2f", 1.0, "+1.00"}, + {"%+.2f", -1.0, "-1.00"}, + {"%7.2f", 1.0, " 1.00"}, + {"%7.2f", -1.0, " -1.00"}, + {"% 7.2f", 1.0, " 1.00"}, + {"% 7.2f", -1.0, " -1.00"}, + {"%+7.2f", 1.0, " +1.00"}, + {"%+7.2f", -1.0, " -1.00"}, + {"%07.2f", 1.0, "0001.00"}, + {"%07.2f", -1.0, "-001.00"}, + {"% 07.2f", 1.0, " 001.00"}, + {"% 07.2f", -1.0, "-001.00"}, + {"%+07.2f", 1.0, "+001.00"}, + {"%+07.2f", -1.0, "-001.00"}, + + // from fmt/fmt_test.go: zero padding does not apply to infinities + {"%020f", math.Inf(-1), " -Inf"}, + {"%020f", math.Inf(+1), " +Inf"}, + {"% 020f", math.Inf(-1), " -Inf"}, + {"% 020f", math.Inf(+1), " Inf"}, + {"%+020f", math.Inf(-1), " -Inf"}, + {"%+020f", math.Inf(+1), " +Inf"}, + {"%20f", -1.0, " -1.000000"}, + + // handle %v like %g + {"%v", 0.0, "0"}, + {"%v", -7.0, "-7"}, + {"%v", -1e-9, "-1e-09"}, + {"%v", float32(-1e-9), "-1e-09"}, + {"%010v", 0.0, "0000000000"}, + + // *Float cases + {"%.20f", "1e-20", "0.00000000000000000001"}, + {"%.20f", "-1e-20", "-0.00000000000000000001"}, + {"%30.20f", "-1e-20", " -0.00000000000000000001"}, + {"%030.20f", "-1e-20", "-00000000.00000000000000000001"}, + {"%030.20f", "+1e-20", "000000000.00000000000000000001"}, + {"% 030.20f", "+1e-20", " 00000000.00000000000000000001"}, + + // erroneous formats + {"%s", 1.0, "%!s(*big.Float=1)"}, + } { + value := new(Float) + switch v := test.value.(type) { + case float32: + value.SetPrec(24).SetFloat64(float64(v)) + case float64: + value.SetPrec(53).SetFloat64(v) + case string: + value.SetPrec(512).Parse(v, 0) + default: + t.Fatalf("unsupported test value: %v (%T)", v, v) + } + + if got := fmt.Sprintf(test.format, value); got != test.want { + t.Errorf("%v: got %q; want %q", test, got, test.want) + } + } +} + +func BenchmarkParseFloatSmallExp(b *testing.B) { + for i := 0; i < b.N; i++ { + for _, s := range []string{ + "1e0", + "1e-1", + "1e-2", + "1e-3", + "1e-4", + "1e-5", + "1e-10", + "1e-20", + "1e-50", + "1e1", + "1e2", + "1e3", + "1e4", + "1e5", + "1e10", + "1e20", + "1e50", + } { + var x Float + _, _, err := x.Parse(s, 0) + if err != nil { + b.Fatalf("%s: %v", s, err) + } + } + } +} + +func BenchmarkParseFloatLargeExp(b *testing.B) { + for i := 0; i < b.N; i++ { + for _, s := range []string{ + "1e0", + "1e-10", + "1e-20", + "1e-30", + "1e-40", + "1e-50", + "1e-100", + "1e-500", + "1e-1000", + "1e-5000", + "1e-10000", + "1e10", + "1e20", + "1e30", + "1e40", + "1e50", + "1e100", + "1e500", + "1e1000", + "1e5000", + "1e10000", + } { + var x Float + _, _, err := x.Parse(s, 0) + if err != nil { + b.Fatalf("%s: %v", s, err) + } + } + } +} + +func TestFloatScan(t *testing.T) { + var floatScanTests = []struct { + input string + format string + output string + remaining int + wantErr bool + }{ + 0: {"10.0", "%f", "10", 0, false}, + 1: {"23.98+2.0", "%v", "23.98", 4, false}, + 2: {"-1+1", "%v", "-1", 2, false}, + 3: {" 00000", "%v", "0", 0, false}, + 4: {"-123456p-78", "%b", "-4.084816388e-19", 0, false}, + 5: {"+123", "%b", "123", 0, false}, + 6: {"-1.234e+56", "%e", "-1.234e+56", 0, false}, + 7: {"-1.234E-56", "%E", "-1.234e-56", 0, false}, + 8: {"-1.234e+567", "%g", "-1.234e+567", 0, false}, + 9: {"+1234567891011.234", "%G", "1.234567891e+12", 0, false}, + + // Scan doesn't handle ±Inf. + 10: {"Inf", "%v", "", 3, true}, + 11: {"-Inf", "%v", "", 3, true}, + 12: {"-Inf", "%v", "", 3, true}, + } + + var buf bytes.Buffer + for i, test := range floatScanTests { + x := new(Float) + buf.Reset() + buf.WriteString(test.input) + _, err := fmt.Fscanf(&buf, test.format, x) + if test.wantErr { + if err == nil { + t.Errorf("#%d want non-nil err", i) + } + continue + } + + if err != nil { + t.Errorf("#%d error: %s", i, err) + } + + if x.String() != test.output { + t.Errorf("#%d got %s; want %s", i, x.String(), test.output) + } + if buf.Len() != test.remaining { + t.Errorf("#%d got %d bytes remaining; want %d", i, buf.Len(), test.remaining) + } + } +} diff --git a/src/math/big/floatexample_test.go b/src/math/big/floatexample_test.go new file mode 100644 index 0000000..0c6668c --- /dev/null +++ b/src/math/big/floatexample_test.go @@ -0,0 +1,141 @@ +// Copyright 2015 The Go Authors. All rights reserved. +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. + +package big_test + +import ( + "fmt" + "math" + "math/big" +) + +func ExampleFloat_Add() { + // Operate on numbers of different precision. + var x, y, z big.Float + x.SetInt64(1000) // x is automatically set to 64bit precision + y.SetFloat64(2.718281828) // y is automatically set to 53bit precision + z.SetPrec(32) + z.Add(&x, &y) + fmt.Printf("x = %.10g (%s, prec = %d, acc = %s)\n", &x, x.Text('p', 0), x.Prec(), x.Acc()) + fmt.Printf("y = %.10g (%s, prec = %d, acc = %s)\n", &y, y.Text('p', 0), y.Prec(), y.Acc()) + fmt.Printf("z = %.10g (%s, prec = %d, acc = %s)\n", &z, z.Text('p', 0), z.Prec(), z.Acc()) + // Output: + // x = 1000 (0x.fap+10, prec = 64, acc = Exact) + // y = 2.718281828 (0x.adf85458248cd8p+2, prec = 53, acc = Exact) + // z = 1002.718282 (0x.faadf854p+10, prec = 32, acc = Below) +} + +func ExampleFloat_shift() { + // Implement Float "shift" by modifying the (binary) exponents directly. + for s := -5; s <= 5; s++ { + x := big.NewFloat(0.5) + x.SetMantExp(x, x.MantExp(nil)+s) // shift x by s + fmt.Println(x) + } + // Output: + // 0.015625 + // 0.03125 + // 0.0625 + // 0.125 + // 0.25 + // 0.5 + // 1 + // 2 + // 4 + // 8 + // 16 +} + +func ExampleFloat_Cmp() { + inf := math.Inf(1) + zero := 0.0 + + operands := []float64{-inf, -1.2, -zero, 0, +1.2, +inf} + + fmt.Println(" x y cmp") + fmt.Println("---------------") + for _, x64 := range operands { + x := big.NewFloat(x64) + for _, y64 := range operands { + y := big.NewFloat(y64) + fmt.Printf("%4g %4g %3d\n", x, y, x.Cmp(y)) + } + fmt.Println() + } + + // Output: + // x y cmp + // --------------- + // -Inf -Inf 0 + // -Inf -1.2 -1 + // -Inf -0 -1 + // -Inf 0 -1 + // -Inf 1.2 -1 + // -Inf +Inf -1 + // + // -1.2 -Inf 1 + // -1.2 -1.2 0 + // -1.2 -0 -1 + // -1.2 0 -1 + // -1.2 1.2 -1 + // -1.2 +Inf -1 + // + // -0 -Inf 1 + // -0 -1.2 1 + // -0 -0 0 + // -0 0 0 + // -0 1.2 -1 + // -0 +Inf -1 + // + // 0 -Inf 1 + // 0 -1.2 1 + // 0 -0 0 + // 0 0 0 + // 0 1.2 -1 + // 0 +Inf -1 + // + // 1.2 -Inf 1 + // 1.2 -1.2 1 + // 1.2 -0 1 + // 1.2 0 1 + // 1.2 1.2 0 + // 1.2 +Inf -1 + // + // +Inf -Inf 1 + // +Inf -1.2 1 + // +Inf -0 1 + // +Inf 0 1 + // +Inf 1.2 1 + // +Inf +Inf 0 +} + +func ExampleRoundingMode() { + operands := []float64{2.6, 2.5, 2.1, -2.1, -2.5, -2.6} + + fmt.Print(" x") + for mode := big.ToNearestEven; mode <= big.ToPositiveInf; mode++ { + fmt.Printf(" %s", mode) + } + fmt.Println() + + for _, f64 := range operands { + fmt.Printf("%4g", f64) + for mode := big.ToNearestEven; mode <= big.ToPositiveInf; mode++ { + // sample operands above require 2 bits to represent mantissa + // set binary precision to 2 to round them to integer values + f := new(big.Float).SetPrec(2).SetMode(mode).SetFloat64(f64) + fmt.Printf(" %*g", len(mode.String()), f) + } + fmt.Println() + } + + // Output: + // x ToNearestEven ToNearestAway ToZero AwayFromZero ToNegativeInf ToPositiveInf + // 2.6 3 3 2 3 2 3 + // 2.5 2 3 2 3 2 3 + // 2.1 2 2 2 3 2 3 + // -2.1 -2 -2 -2 -3 -3 -2 + // -2.5 -2 -3 -2 -3 -3 -2 + // -2.6 -3 -3 -2 -3 -3 -2 +} diff --git a/src/math/big/floatmarsh.go b/src/math/big/floatmarsh.go new file mode 100644 index 0000000..990e085 --- /dev/null +++ b/src/math/big/floatmarsh.go @@ -0,0 +1,127 @@ +// Copyright 2015 The Go Authors. All rights reserved. +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. + +// This file implements encoding/decoding of Floats. + +package big + +import ( + "encoding/binary" + "errors" + "fmt" +) + +// Gob codec version. Permits backward-compatible changes to the encoding. +const floatGobVersion byte = 1 + +// GobEncode implements the gob.GobEncoder interface. +// The Float value and all its attributes (precision, +// rounding mode, accuracy) are marshaled. +func (x *Float) GobEncode() ([]byte, error) { + if x == nil { + return nil, nil + } + + // determine max. space (bytes) required for encoding + sz := 1 + 1 + 4 // version + mode|acc|form|neg (3+2+2+1bit) + prec + n := 0 // number of mantissa words + if x.form == finite { + // add space for mantissa and exponent + n = int((x.prec + (_W - 1)) / _W) // required mantissa length in words for given precision + // actual mantissa slice could be shorter (trailing 0's) or longer (unused bits): + // - if shorter, only encode the words present + // - if longer, cut off unused words when encoding in bytes + // (in practice, this should never happen since rounding + // takes care of it, but be safe and do it always) + if len(x.mant) < n { + n = len(x.mant) + } + // len(x.mant) >= n + sz += 4 + n*_S // exp + mant + } + buf := make([]byte, sz) + + buf[0] = floatGobVersion + b := byte(x.mode&7)<<5 | byte((x.acc+1)&3)<<3 | byte(x.form&3)<<1 + if x.neg { + b |= 1 + } + buf[1] = b + binary.BigEndian.PutUint32(buf[2:], x.prec) + + if x.form == finite { + binary.BigEndian.PutUint32(buf[6:], uint32(x.exp)) + x.mant[len(x.mant)-n:].bytes(buf[10:]) // cut off unused trailing words + } + + return buf, nil +} + +// GobDecode implements the gob.GobDecoder interface. +// The result is rounded per the precision and rounding mode of +// z unless z's precision is 0, in which case z is set exactly +// to the decoded value. +func (z *Float) GobDecode(buf []byte) error { + if len(buf) == 0 { + // Other side sent a nil or default value. + *z = Float{} + return nil + } + if len(buf) < 6 { + return errors.New("Float.GobDecode: buffer too small") + } + + if buf[0] != floatGobVersion { + return fmt.Errorf("Float.GobDecode: encoding version %d not supported", buf[0]) + } + + oldPrec := z.prec + oldMode := z.mode + + b := buf[1] + z.mode = RoundingMode((b >> 5) & 7) + z.acc = Accuracy((b>>3)&3) - 1 + z.form = form((b >> 1) & 3) + z.neg = b&1 != 0 + z.prec = binary.BigEndian.Uint32(buf[2:]) + + if z.form == finite { + if len(buf) < 10 { + return errors.New("Float.GobDecode: buffer too small for finite form float") + } + z.exp = int32(binary.BigEndian.Uint32(buf[6:])) + z.mant = z.mant.setBytes(buf[10:]) + } + + if oldPrec != 0 { + z.mode = oldMode + z.SetPrec(uint(oldPrec)) + } + + return nil +} + +// MarshalText implements the encoding.TextMarshaler interface. +// Only the Float value is marshaled (in full precision), other +// attributes such as precision or accuracy are ignored. +func (x *Float) MarshalText() (text []byte, err error) { + if x == nil { + return []byte("<nil>"), nil + } + var buf []byte + return x.Append(buf, 'g', -1), nil +} + +// UnmarshalText implements the encoding.TextUnmarshaler interface. +// The result is rounded per the precision and rounding mode of z. +// If z's precision is 0, it is changed to 64 before rounding takes +// effect. +func (z *Float) UnmarshalText(text []byte) error { + // TODO(gri): get rid of the []byte/string conversion + _, _, err := z.Parse(string(text), 0) + if err != nil { + err = fmt.Errorf("math/big: cannot unmarshal %q into a *big.Float (%v)", text, err) + } + return err +} diff --git a/src/math/big/floatmarsh_test.go b/src/math/big/floatmarsh_test.go new file mode 100644 index 0000000..401f45a --- /dev/null +++ b/src/math/big/floatmarsh_test.go @@ -0,0 +1,151 @@ +// Copyright 2015 The Go Authors. All rights reserved. +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. + +package big + +import ( + "bytes" + "encoding/gob" + "encoding/json" + "io" + "testing" +) + +var floatVals = []string{ + "0", + "1", + "0.1", + "2.71828", + "1234567890", + "3.14e1234", + "3.14e-1234", + "0.738957395793475734757349579759957975985497e100", + "0.73895739579347546656564656573475734957975995797598589749859834759476745986795497e100", + "inf", + "Inf", +} + +func TestFloatGobEncoding(t *testing.T) { + var medium bytes.Buffer + enc := gob.NewEncoder(&medium) + dec := gob.NewDecoder(&medium) + for _, test := range floatVals { + for _, sign := range []string{"", "+", "-"} { + for _, prec := range []uint{0, 1, 2, 10, 53, 64, 100, 1000} { + for _, mode := range []RoundingMode{ToNearestEven, ToNearestAway, ToZero, AwayFromZero, ToNegativeInf, ToPositiveInf} { + medium.Reset() // empty buffer for each test case (in case of failures) + x := sign + test + + var tx Float + _, _, err := tx.SetPrec(prec).SetMode(mode).Parse(x, 0) + if err != nil { + t.Errorf("parsing of %s (%dbits, %v) failed (invalid test case): %v", x, prec, mode, err) + continue + } + + // If tx was set to prec == 0, tx.Parse(x, 0) assumes precision 64. Correct it. + if prec == 0 { + tx.SetPrec(0) + } + + if err := enc.Encode(&tx); err != nil { + t.Errorf("encoding of %v (%dbits, %v) failed: %v", &tx, prec, mode, err) + continue + } + + var rx Float + if err := dec.Decode(&rx); err != nil { + t.Errorf("decoding of %v (%dbits, %v) failed: %v", &tx, prec, mode, err) + continue + } + + if rx.Cmp(&tx) != 0 { + t.Errorf("transmission of %s failed: got %s want %s", x, rx.String(), tx.String()) + continue + } + + if rx.Prec() != prec { + t.Errorf("transmission of %s's prec failed: got %d want %d", x, rx.Prec(), prec) + } + + if rx.Mode() != mode { + t.Errorf("transmission of %s's mode failed: got %s want %s", x, rx.Mode(), mode) + } + + if rx.Acc() != tx.Acc() { + t.Errorf("transmission of %s's accuracy failed: got %s want %s", x, rx.Acc(), tx.Acc()) + } + } + } + } + } +} + +func TestFloatCorruptGob(t *testing.T) { + var buf bytes.Buffer + tx := NewFloat(4 / 3).SetPrec(1000).SetMode(ToPositiveInf) + if err := gob.NewEncoder(&buf).Encode(tx); err != nil { + t.Fatal(err) + } + b := buf.Bytes() + + var rx Float + if err := gob.NewDecoder(bytes.NewReader(b)).Decode(&rx); err != nil { + t.Fatal(err) + } + + if err := gob.NewDecoder(bytes.NewReader(b[:10])).Decode(&rx); err != io.ErrUnexpectedEOF { + t.Errorf("got %v want EOF", err) + } + + b[1] = 0 + if err := gob.NewDecoder(bytes.NewReader(b)).Decode(&rx); err == nil { + t.Fatal("got nil want version error") + } +} + +func TestFloatJSONEncoding(t *testing.T) { + for _, test := range floatVals { + for _, sign := range []string{"", "+", "-"} { + for _, prec := range []uint{0, 1, 2, 10, 53, 64, 100, 1000} { + if prec > 53 && testing.Short() { + continue + } + x := sign + test + var tx Float + _, _, err := tx.SetPrec(prec).Parse(x, 0) + if err != nil { + t.Errorf("parsing of %s (prec = %d) failed (invalid test case): %v", x, prec, err) + continue + } + b, err := json.Marshal(&tx) + if err != nil { + t.Errorf("marshaling of %v (prec = %d) failed: %v", &tx, prec, err) + continue + } + var rx Float + rx.SetPrec(prec) + if err := json.Unmarshal(b, &rx); err != nil { + t.Errorf("unmarshaling of %v (prec = %d) failed: %v", &tx, prec, err) + continue + } + if rx.Cmp(&tx) != 0 { + t.Errorf("JSON encoding of %v (prec = %d) failed: got %v want %v", &tx, prec, &rx, &tx) + } + } + } + } +} + +func TestFloatGobDecodeShortBuffer(t *testing.T) { + for _, tc := range [][]byte{ + []byte{0x1, 0x0, 0x0, 0x0}, + []byte{0x1, 0xfa, 0x0, 0x0, 0x0, 0x0}, + } { + err := NewFloat(0).GobDecode(tc) + if err == nil { + t.Error("expected GobDecode to return error for malformed input") + } + } +} diff --git a/src/math/big/ftoa.go b/src/math/big/ftoa.go new file mode 100644 index 0000000..5506e6e --- /dev/null +++ b/src/math/big/ftoa.go @@ -0,0 +1,536 @@ +// Copyright 2015 The Go Authors. All rights reserved. +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. + +// This file implements Float-to-string conversion functions. +// It is closely following the corresponding implementation +// in strconv/ftoa.go, but modified and simplified for Float. + +package big + +import ( + "bytes" + "fmt" + "strconv" +) + +// Text converts the floating-point number x to a string according +// to the given format and precision prec. The format is one of: +// +// 'e' -d.dddde±dd, decimal exponent, at least two (possibly 0) exponent digits +// 'E' -d.ddddE±dd, decimal exponent, at least two (possibly 0) exponent digits +// 'f' -ddddd.dddd, no exponent +// 'g' like 'e' for large exponents, like 'f' otherwise +// 'G' like 'E' for large exponents, like 'f' otherwise +// 'x' -0xd.dddddp±dd, hexadecimal mantissa, decimal power of two exponent +// 'p' -0x.dddp±dd, hexadecimal mantissa, decimal power of two exponent (non-standard) +// 'b' -ddddddp±dd, decimal mantissa, decimal power of two exponent (non-standard) +// +// For the power-of-two exponent formats, the mantissa is printed in normalized form: +// +// 'x' hexadecimal mantissa in [1, 2), or 0 +// 'p' hexadecimal mantissa in [½, 1), or 0 +// 'b' decimal integer mantissa using x.Prec() bits, or 0 +// +// Note that the 'x' form is the one used by most other languages and libraries. +// +// If format is a different character, Text returns a "%" followed by the +// unrecognized format character. +// +// The precision prec controls the number of digits (excluding the exponent) +// printed by the 'e', 'E', 'f', 'g', 'G', and 'x' formats. +// For 'e', 'E', 'f', and 'x', it is the number of digits after the decimal point. +// For 'g' and 'G' it is the total number of digits. A negative precision selects +// the smallest number of decimal digits necessary to identify the value x uniquely +// using x.Prec() mantissa bits. +// The prec value is ignored for the 'b' and 'p' formats. +func (x *Float) Text(format byte, prec int) string { + cap := 10 // TODO(gri) determine a good/better value here + if prec > 0 { + cap += prec + } + return string(x.Append(make([]byte, 0, cap), format, prec)) +} + +// String formats x like x.Text('g', 10). +// (String must be called explicitly, Float.Format does not support %s verb.) +func (x *Float) String() string { + return x.Text('g', 10) +} + +// Append appends to buf the string form of the floating-point number x, +// as generated by x.Text, and returns the extended buffer. +func (x *Float) Append(buf []byte, fmt byte, prec int) []byte { + // sign + if x.neg { + buf = append(buf, '-') + } + + // Inf + if x.form == inf { + if !x.neg { + buf = append(buf, '+') + } + return append(buf, "Inf"...) + } + + // pick off easy formats + switch fmt { + case 'b': + return x.fmtB(buf) + case 'p': + return x.fmtP(buf) + case 'x': + return x.fmtX(buf, prec) + } + + // Algorithm: + // 1) convert Float to multiprecision decimal + // 2) round to desired precision + // 3) read digits out and format + + // 1) convert Float to multiprecision decimal + var d decimal // == 0.0 + if x.form == finite { + // x != 0 + d.init(x.mant, int(x.exp)-x.mant.bitLen()) + } + + // 2) round to desired precision + shortest := false + if prec < 0 { + shortest = true + roundShortest(&d, x) + // Precision for shortest representation mode. + switch fmt { + case 'e', 'E': + prec = len(d.mant) - 1 + case 'f': + prec = max(len(d.mant)-d.exp, 0) + case 'g', 'G': + prec = len(d.mant) + } + } else { + // round appropriately + switch fmt { + case 'e', 'E': + // one digit before and number of digits after decimal point + d.round(1 + prec) + case 'f': + // number of digits before and after decimal point + d.round(d.exp + prec) + case 'g', 'G': + if prec == 0 { + prec = 1 + } + d.round(prec) + } + } + + // 3) read digits out and format + switch fmt { + case 'e', 'E': + return fmtE(buf, fmt, prec, d) + case 'f': + return fmtF(buf, prec, d) + case 'g', 'G': + // trim trailing fractional zeros in %e format + eprec := prec + if eprec > len(d.mant) && len(d.mant) >= d.exp { + eprec = len(d.mant) + } + // %e is used if the exponent from the conversion + // is less than -4 or greater than or equal to the precision. + // If precision was the shortest possible, use eprec = 6 for + // this decision. + if shortest { + eprec = 6 + } + exp := d.exp - 1 + if exp < -4 || exp >= eprec { + if prec > len(d.mant) { + prec = len(d.mant) + } + return fmtE(buf, fmt+'e'-'g', prec-1, d) + } + if prec > d.exp { + prec = len(d.mant) + } + return fmtF(buf, max(prec-d.exp, 0), d) + } + + // unknown format + if x.neg { + buf = buf[:len(buf)-1] // sign was added prematurely - remove it again + } + return append(buf, '%', fmt) +} + +func roundShortest(d *decimal, x *Float) { + // if the mantissa is zero, the number is zero - stop now + if len(d.mant) == 0 { + return + } + + // Approach: All numbers in the interval [x - 1/2ulp, x + 1/2ulp] + // (possibly exclusive) round to x for the given precision of x. + // Compute the lower and upper bound in decimal form and find the + // shortest decimal number d such that lower <= d <= upper. + + // TODO(gri) strconv/ftoa.do describes a shortcut in some cases. + // See if we can use it (in adjusted form) here as well. + + // 1) Compute normalized mantissa mant and exponent exp for x such + // that the lsb of mant corresponds to 1/2 ulp for the precision of + // x (i.e., for mant we want x.prec + 1 bits). + mant := nat(nil).set(x.mant) + exp := int(x.exp) - mant.bitLen() + s := mant.bitLen() - int(x.prec+1) + switch { + case s < 0: + mant = mant.shl(mant, uint(-s)) + case s > 0: + mant = mant.shr(mant, uint(+s)) + } + exp += s + // x = mant * 2**exp with lsb(mant) == 1/2 ulp of x.prec + + // 2) Compute lower bound by subtracting 1/2 ulp. + var lower decimal + var tmp nat + lower.init(tmp.sub(mant, natOne), exp) + + // 3) Compute upper bound by adding 1/2 ulp. + var upper decimal + upper.init(tmp.add(mant, natOne), exp) + + // The upper and lower bounds are possible outputs only if + // the original mantissa is even, so that ToNearestEven rounding + // would round to the original mantissa and not the neighbors. + inclusive := mant[0]&2 == 0 // test bit 1 since original mantissa was shifted by 1 + + // Now we can figure out the minimum number of digits required. + // Walk along until d has distinguished itself from upper and lower. + for i, m := range d.mant { + l := lower.at(i) + u := upper.at(i) + + // Okay to round down (truncate) if lower has a different digit + // or if lower is inclusive and is exactly the result of rounding + // down (i.e., and we have reached the final digit of lower). + okdown := l != m || inclusive && i+1 == len(lower.mant) + + // Okay to round up if upper has a different digit and either upper + // is inclusive or upper is bigger than the result of rounding up. + okup := m != u && (inclusive || m+1 < u || i+1 < len(upper.mant)) + + // If it's okay to do either, then round to the nearest one. + // If it's okay to do only one, do it. + switch { + case okdown && okup: + d.round(i + 1) + return + case okdown: + d.roundDown(i + 1) + return + case okup: + d.roundUp(i + 1) + return + } + } +} + +// %e: d.ddddde±dd +func fmtE(buf []byte, fmt byte, prec int, d decimal) []byte { + // first digit + ch := byte('0') + if len(d.mant) > 0 { + ch = d.mant[0] + } + buf = append(buf, ch) + + // .moredigits + if prec > 0 { + buf = append(buf, '.') + i := 1 + m := min(len(d.mant), prec+1) + if i < m { + buf = append(buf, d.mant[i:m]...) + i = m + } + for ; i <= prec; i++ { + buf = append(buf, '0') + } + } + + // e± + buf = append(buf, fmt) + var exp int64 + if len(d.mant) > 0 { + exp = int64(d.exp) - 1 // -1 because first digit was printed before '.' + } + if exp < 0 { + ch = '-' + exp = -exp + } else { + ch = '+' + } + buf = append(buf, ch) + + // dd...d + if exp < 10 { + buf = append(buf, '0') // at least 2 exponent digits + } + return strconv.AppendInt(buf, exp, 10) +} + +// %f: ddddddd.ddddd +func fmtF(buf []byte, prec int, d decimal) []byte { + // integer, padded with zeros as needed + if d.exp > 0 { + m := min(len(d.mant), d.exp) + buf = append(buf, d.mant[:m]...) + for ; m < d.exp; m++ { + buf = append(buf, '0') + } + } else { + buf = append(buf, '0') + } + + // fraction + if prec > 0 { + buf = append(buf, '.') + for i := 0; i < prec; i++ { + buf = append(buf, d.at(d.exp+i)) + } + } + + return buf +} + +// fmtB appends the string of x in the format mantissa "p" exponent +// with a decimal mantissa and a binary exponent, or 0" if x is zero, +// and returns the extended buffer. +// The mantissa is normalized such that is uses x.Prec() bits in binary +// representation. +// The sign of x is ignored, and x must not be an Inf. +// (The caller handles Inf before invoking fmtB.) +func (x *Float) fmtB(buf []byte) []byte { + if x.form == zero { + return append(buf, '0') + } + + if debugFloat && x.form != finite { + panic("non-finite float") + } + // x != 0 + + // adjust mantissa to use exactly x.prec bits + m := x.mant + switch w := uint32(len(x.mant)) * _W; { + case w < x.prec: + m = nat(nil).shl(m, uint(x.prec-w)) + case w > x.prec: + m = nat(nil).shr(m, uint(w-x.prec)) + } + + buf = append(buf, m.utoa(10)...) + buf = append(buf, 'p') + e := int64(x.exp) - int64(x.prec) + if e >= 0 { + buf = append(buf, '+') + } + return strconv.AppendInt(buf, e, 10) +} + +// fmtX appends the string of x in the format "0x1." mantissa "p" exponent +// with a hexadecimal mantissa and a binary exponent, or "0x0p0" if x is zero, +// and returns the extended buffer. +// A non-zero mantissa is normalized such that 1.0 <= mantissa < 2.0. +// The sign of x is ignored, and x must not be an Inf. +// (The caller handles Inf before invoking fmtX.) +func (x *Float) fmtX(buf []byte, prec int) []byte { + if x.form == zero { + buf = append(buf, "0x0"...) + if prec > 0 { + buf = append(buf, '.') + for i := 0; i < prec; i++ { + buf = append(buf, '0') + } + } + buf = append(buf, "p+00"...) + return buf + } + + if debugFloat && x.form != finite { + panic("non-finite float") + } + + // round mantissa to n bits + var n uint + if prec < 0 { + n = 1 + (x.MinPrec()-1+3)/4*4 // round MinPrec up to 1 mod 4 + } else { + n = 1 + 4*uint(prec) + } + // n%4 == 1 + x = new(Float).SetPrec(n).SetMode(x.mode).Set(x) + + // adjust mantissa to use exactly n bits + m := x.mant + switch w := uint(len(x.mant)) * _W; { + case w < n: + m = nat(nil).shl(m, n-w) + case w > n: + m = nat(nil).shr(m, w-n) + } + exp64 := int64(x.exp) - 1 // avoid wrap-around + + hm := m.utoa(16) + if debugFloat && hm[0] != '1' { + panic("incorrect mantissa: " + string(hm)) + } + buf = append(buf, "0x1"...) + if len(hm) > 1 { + buf = append(buf, '.') + buf = append(buf, hm[1:]...) + } + + buf = append(buf, 'p') + if exp64 >= 0 { + buf = append(buf, '+') + } else { + exp64 = -exp64 + buf = append(buf, '-') + } + // Force at least two exponent digits, to match fmt. + if exp64 < 10 { + buf = append(buf, '0') + } + return strconv.AppendInt(buf, exp64, 10) +} + +// fmtP appends the string of x in the format "0x." mantissa "p" exponent +// with a hexadecimal mantissa and a binary exponent, or "0" if x is zero, +// and returns the extended buffer. +// The mantissa is normalized such that 0.5 <= 0.mantissa < 1.0. +// The sign of x is ignored, and x must not be an Inf. +// (The caller handles Inf before invoking fmtP.) +func (x *Float) fmtP(buf []byte) []byte { + if x.form == zero { + return append(buf, '0') + } + + if debugFloat && x.form != finite { + panic("non-finite float") + } + // x != 0 + + // remove trailing 0 words early + // (no need to convert to hex 0's and trim later) + m := x.mant + i := 0 + for i < len(m) && m[i] == 0 { + i++ + } + m = m[i:] + + buf = append(buf, "0x."...) + buf = append(buf, bytes.TrimRight(m.utoa(16), "0")...) + buf = append(buf, 'p') + if x.exp >= 0 { + buf = append(buf, '+') + } + return strconv.AppendInt(buf, int64(x.exp), 10) +} + +func min(x, y int) int { + if x < y { + return x + } + return y +} + +var _ fmt.Formatter = &floatZero // *Float must implement fmt.Formatter + +// Format implements fmt.Formatter. It accepts all the regular +// formats for floating-point numbers ('b', 'e', 'E', 'f', 'F', +// 'g', 'G', 'x') as well as 'p' and 'v'. See (*Float).Text for the +// interpretation of 'p'. The 'v' format is handled like 'g'. +// Format also supports specification of the minimum precision +// in digits, the output field width, as well as the format flags +// '+' and ' ' for sign control, '0' for space or zero padding, +// and '-' for left or right justification. See the fmt package +// for details. +func (x *Float) Format(s fmt.State, format rune) { + prec, hasPrec := s.Precision() + if !hasPrec { + prec = 6 // default precision for 'e', 'f' + } + + switch format { + case 'e', 'E', 'f', 'b', 'p', 'x': + // nothing to do + case 'F': + // (*Float).Text doesn't support 'F'; handle like 'f' + format = 'f' + case 'v': + // handle like 'g' + format = 'g' + fallthrough + case 'g', 'G': + if !hasPrec { + prec = -1 // default precision for 'g', 'G' + } + default: + fmt.Fprintf(s, "%%!%c(*big.Float=%s)", format, x.String()) + return + } + var buf []byte + buf = x.Append(buf, byte(format), prec) + if len(buf) == 0 { + buf = []byte("?") // should never happen, but don't crash + } + // len(buf) > 0 + + var sign string + switch { + case buf[0] == '-': + sign = "-" + buf = buf[1:] + case buf[0] == '+': + // +Inf + sign = "+" + if s.Flag(' ') { + sign = " " + } + buf = buf[1:] + case s.Flag('+'): + sign = "+" + case s.Flag(' '): + sign = " " + } + + var padding int + if width, hasWidth := s.Width(); hasWidth && width > len(sign)+len(buf) { + padding = width - len(sign) - len(buf) + } + + switch { + case s.Flag('0') && !x.IsInf(): + // 0-padding on left + writeMultiple(s, sign, 1) + writeMultiple(s, "0", padding) + s.Write(buf) + case s.Flag('-'): + // padding on right + writeMultiple(s, sign, 1) + s.Write(buf) + writeMultiple(s, " ", padding) + default: + // padding on left + writeMultiple(s, " ", padding) + writeMultiple(s, sign, 1) + s.Write(buf) + } +} diff --git a/src/math/big/gcd_test.go b/src/math/big/gcd_test.go new file mode 100644 index 0000000..3cca2ec --- /dev/null +++ b/src/math/big/gcd_test.go @@ -0,0 +1,64 @@ +// Copyright 2012 The Go Authors. All rights reserved. +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. + +// This file implements a GCD benchmark. +// Usage: go test math/big -test.bench GCD + +package big + +import ( + "math/rand" + "testing" +) + +// randInt returns a pseudo-random Int in the range [1<<(size-1), (1<<size) - 1] +func randInt(r *rand.Rand, size uint) *Int { + n := new(Int).Lsh(intOne, size-1) + x := new(Int).Rand(r, n) + return x.Add(x, n) // make sure result > 1<<(size-1) +} + +func runGCD(b *testing.B, aSize, bSize uint) { + if isRaceBuilder && (aSize > 1000 || bSize > 1000) { + b.Skip("skipping on race builder") + } + b.Run("WithoutXY", func(b *testing.B) { + runGCDExt(b, aSize, bSize, false) + }) + b.Run("WithXY", func(b *testing.B) { + runGCDExt(b, aSize, bSize, true) + }) +} + +func runGCDExt(b *testing.B, aSize, bSize uint, calcXY bool) { + b.StopTimer() + var r = rand.New(rand.NewSource(1234)) + aa := randInt(r, aSize) + bb := randInt(r, bSize) + var x, y *Int + if calcXY { + x = new(Int) + y = new(Int) + } + b.StartTimer() + for i := 0; i < b.N; i++ { + new(Int).GCD(x, y, aa, bb) + } +} + +func BenchmarkGCD10x10(b *testing.B) { runGCD(b, 10, 10) } +func BenchmarkGCD10x100(b *testing.B) { runGCD(b, 10, 100) } +func BenchmarkGCD10x1000(b *testing.B) { runGCD(b, 10, 1000) } +func BenchmarkGCD10x10000(b *testing.B) { runGCD(b, 10, 10000) } +func BenchmarkGCD10x100000(b *testing.B) { runGCD(b, 10, 100000) } +func BenchmarkGCD100x100(b *testing.B) { runGCD(b, 100, 100) } +func BenchmarkGCD100x1000(b *testing.B) { runGCD(b, 100, 1000) } +func BenchmarkGCD100x10000(b *testing.B) { runGCD(b, 100, 10000) } +func BenchmarkGCD100x100000(b *testing.B) { runGCD(b, 100, 100000) } +func BenchmarkGCD1000x1000(b *testing.B) { runGCD(b, 1000, 1000) } +func BenchmarkGCD1000x10000(b *testing.B) { runGCD(b, 1000, 10000) } +func BenchmarkGCD1000x100000(b *testing.B) { runGCD(b, 1000, 100000) } +func BenchmarkGCD10000x10000(b *testing.B) { runGCD(b, 10000, 10000) } +func BenchmarkGCD10000x100000(b *testing.B) { runGCD(b, 10000, 100000) } +func BenchmarkGCD100000x100000(b *testing.B) { runGCD(b, 100000, 100000) } diff --git a/src/math/big/hilbert_test.go b/src/math/big/hilbert_test.go new file mode 100644 index 0000000..1a84341 --- /dev/null +++ b/src/math/big/hilbert_test.go @@ -0,0 +1,160 @@ +// Copyright 2009 The Go Authors. All rights reserved. +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. + +// A little test program and benchmark for rational arithmetics. +// Computes a Hilbert matrix, its inverse, multiplies them +// and verifies that the product is the identity matrix. + +package big + +import ( + "fmt" + "testing" +) + +type matrix struct { + n, m int + a []*Rat +} + +func (a *matrix) at(i, j int) *Rat { + if !(0 <= i && i < a.n && 0 <= j && j < a.m) { + panic("index out of range") + } + return a.a[i*a.m+j] +} + +func (a *matrix) set(i, j int, x *Rat) { + if !(0 <= i && i < a.n && 0 <= j && j < a.m) { + panic("index out of range") + } + a.a[i*a.m+j] = x +} + +func newMatrix(n, m int) *matrix { + if !(0 <= n && 0 <= m) { + panic("illegal matrix") + } + a := new(matrix) + a.n = n + a.m = m + a.a = make([]*Rat, n*m) + return a +} + +func newUnit(n int) *matrix { + a := newMatrix(n, n) + for i := 0; i < n; i++ { + for j := 0; j < n; j++ { + x := NewRat(0, 1) + if i == j { + x.SetInt64(1) + } + a.set(i, j, x) + } + } + return a +} + +func newHilbert(n int) *matrix { + a := newMatrix(n, n) + for i := 0; i < n; i++ { + for j := 0; j < n; j++ { + a.set(i, j, NewRat(1, int64(i+j+1))) + } + } + return a +} + +func newInverseHilbert(n int) *matrix { + a := newMatrix(n, n) + for i := 0; i < n; i++ { + for j := 0; j < n; j++ { + x1 := new(Rat).SetInt64(int64(i + j + 1)) + x2 := new(Rat).SetInt(new(Int).Binomial(int64(n+i), int64(n-j-1))) + x3 := new(Rat).SetInt(new(Int).Binomial(int64(n+j), int64(n-i-1))) + x4 := new(Rat).SetInt(new(Int).Binomial(int64(i+j), int64(i))) + + x1.Mul(x1, x2) + x1.Mul(x1, x3) + x1.Mul(x1, x4) + x1.Mul(x1, x4) + + if (i+j)&1 != 0 { + x1.Neg(x1) + } + + a.set(i, j, x1) + } + } + return a +} + +func (a *matrix) mul(b *matrix) *matrix { + if a.m != b.n { + panic("illegal matrix multiply") + } + c := newMatrix(a.n, b.m) + for i := 0; i < c.n; i++ { + for j := 0; j < c.m; j++ { + x := NewRat(0, 1) + for k := 0; k < a.m; k++ { + x.Add(x, new(Rat).Mul(a.at(i, k), b.at(k, j))) + } + c.set(i, j, x) + } + } + return c +} + +func (a *matrix) eql(b *matrix) bool { + if a.n != b.n || a.m != b.m { + return false + } + for i := 0; i < a.n; i++ { + for j := 0; j < a.m; j++ { + if a.at(i, j).Cmp(b.at(i, j)) != 0 { + return false + } + } + } + return true +} + +func (a *matrix) String() string { + s := "" + for i := 0; i < a.n; i++ { + for j := 0; j < a.m; j++ { + s += fmt.Sprintf("\t%s", a.at(i, j)) + } + s += "\n" + } + return s +} + +func doHilbert(t *testing.T, n int) { + a := newHilbert(n) + b := newInverseHilbert(n) + I := newUnit(n) + ab := a.mul(b) + if !ab.eql(I) { + if t == nil { + panic("Hilbert failed") + } + t.Errorf("a = %s\n", a) + t.Errorf("b = %s\n", b) + t.Errorf("a*b = %s\n", ab) + t.Errorf("I = %s\n", I) + } +} + +func TestHilbert(t *testing.T) { + doHilbert(t, 10) +} + +func BenchmarkHilbert(b *testing.B) { + for i := 0; i < b.N; i++ { + doHilbert(nil, 10) + } +} diff --git a/src/math/big/int.go b/src/math/big/int.go new file mode 100644 index 0000000..7647346 --- /dev/null +++ b/src/math/big/int.go @@ -0,0 +1,1218 @@ +// Copyright 2009 The Go Authors. All rights reserved. +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. + +// This file implements signed multi-precision integers. + +package big + +import ( + "fmt" + "io" + "math/rand" + "strings" +) + +// An Int represents a signed multi-precision integer. +// The zero value for an Int represents the value 0. +// +// Operations always take pointer arguments (*Int) rather +// than Int values, and each unique Int value requires +// its own unique *Int pointer. To "copy" an Int value, +// an existing (or newly allocated) Int must be set to +// a new value using the Int.Set method; shallow copies +// of Ints are not supported and may lead to errors. +type Int struct { + neg bool // sign + abs nat // absolute value of the integer +} + +var intOne = &Int{false, natOne} + +// Sign returns: +// +// -1 if x < 0 +// 0 if x == 0 +// +1 if x > 0 +// +func (x *Int) Sign() int { + if len(x.abs) == 0 { + return 0 + } + if x.neg { + return -1 + } + return 1 +} + +// SetInt64 sets z to x and returns z. +func (z *Int) SetInt64(x int64) *Int { + neg := false + if x < 0 { + neg = true + x = -x + } + z.abs = z.abs.setUint64(uint64(x)) + z.neg = neg + return z +} + +// SetUint64 sets z to x and returns z. +func (z *Int) SetUint64(x uint64) *Int { + z.abs = z.abs.setUint64(x) + z.neg = false + return z +} + +// NewInt allocates and returns a new Int set to x. +func NewInt(x int64) *Int { + return new(Int).SetInt64(x) +} + +// Set sets z to x and returns z. +func (z *Int) Set(x *Int) *Int { + if z != x { + z.abs = z.abs.set(x.abs) + z.neg = x.neg + } + return z +} + +// Bits provides raw (unchecked but fast) access to x by returning its +// absolute value as a little-endian Word slice. The result and x share +// the same underlying array. +// Bits is intended to support implementation of missing low-level Int +// functionality outside this package; it should be avoided otherwise. +func (x *Int) Bits() []Word { + return x.abs +} + +// SetBits provides raw (unchecked but fast) access to z by setting its +// value to abs, interpreted as a little-endian Word slice, and returning +// z. The result and abs share the same underlying array. +// SetBits is intended to support implementation of missing low-level Int +// functionality outside this package; it should be avoided otherwise. +func (z *Int) SetBits(abs []Word) *Int { + z.abs = nat(abs).norm() + z.neg = false + return z +} + +// Abs sets z to |x| (the absolute value of x) and returns z. +func (z *Int) Abs(x *Int) *Int { + z.Set(x) + z.neg = false + return z +} + +// Neg sets z to -x and returns z. +func (z *Int) Neg(x *Int) *Int { + z.Set(x) + z.neg = len(z.abs) > 0 && !z.neg // 0 has no sign + return z +} + +// Add sets z to the sum x+y and returns z. +func (z *Int) Add(x, y *Int) *Int { + neg := x.neg + if x.neg == y.neg { + // x + y == x + y + // (-x) + (-y) == -(x + y) + z.abs = z.abs.add(x.abs, y.abs) + } else { + // x + (-y) == x - y == -(y - x) + // (-x) + y == y - x == -(x - y) + if x.abs.cmp(y.abs) >= 0 { + z.abs = z.abs.sub(x.abs, y.abs) + } else { + neg = !neg + z.abs = z.abs.sub(y.abs, x.abs) + } + } + z.neg = len(z.abs) > 0 && neg // 0 has no sign + return z +} + +// Sub sets z to the difference x-y and returns z. +func (z *Int) Sub(x, y *Int) *Int { + neg := x.neg + if x.neg != y.neg { + // x - (-y) == x + y + // (-x) - y == -(x + y) + z.abs = z.abs.add(x.abs, y.abs) + } else { + // x - y == x - y == -(y - x) + // (-x) - (-y) == y - x == -(x - y) + if x.abs.cmp(y.abs) >= 0 { + z.abs = z.abs.sub(x.abs, y.abs) + } else { + neg = !neg + z.abs = z.abs.sub(y.abs, x.abs) + } + } + z.neg = len(z.abs) > 0 && neg // 0 has no sign + return z +} + +// Mul sets z to the product x*y and returns z. +func (z *Int) Mul(x, y *Int) *Int { + // x * y == x * y + // x * (-y) == -(x * y) + // (-x) * y == -(x * y) + // (-x) * (-y) == x * y + if x == y { + z.abs = z.abs.sqr(x.abs) + z.neg = false + return z + } + z.abs = z.abs.mul(x.abs, y.abs) + z.neg = len(z.abs) > 0 && x.neg != y.neg // 0 has no sign + return z +} + +// MulRange sets z to the product of all integers +// in the range [a, b] inclusively and returns z. +// If a > b (empty range), the result is 1. +func (z *Int) MulRange(a, b int64) *Int { + switch { + case a > b: + return z.SetInt64(1) // empty range + case a <= 0 && b >= 0: + return z.SetInt64(0) // range includes 0 + } + // a <= b && (b < 0 || a > 0) + + neg := false + if a < 0 { + neg = (b-a)&1 == 0 + a, b = -b, -a + } + + z.abs = z.abs.mulRange(uint64(a), uint64(b)) + z.neg = neg + return z +} + +// Binomial sets z to the binomial coefficient of (n, k) and returns z. +func (z *Int) Binomial(n, k int64) *Int { + // reduce the number of multiplications by reducing k + if n/2 < k && k <= n { + k = n - k // Binomial(n, k) == Binomial(n, n-k) + } + var a, b Int + a.MulRange(n-k+1, n) + b.MulRange(1, k) + return z.Quo(&a, &b) +} + +// Quo sets z to the quotient x/y for y != 0 and returns z. +// If y == 0, a division-by-zero run-time panic occurs. +// Quo implements truncated division (like Go); see QuoRem for more details. +func (z *Int) Quo(x, y *Int) *Int { + z.abs, _ = z.abs.div(nil, x.abs, y.abs) + z.neg = len(z.abs) > 0 && x.neg != y.neg // 0 has no sign + return z +} + +// Rem sets z to the remainder x%y for y != 0 and returns z. +// If y == 0, a division-by-zero run-time panic occurs. +// Rem implements truncated modulus (like Go); see QuoRem for more details. +func (z *Int) Rem(x, y *Int) *Int { + _, z.abs = nat(nil).div(z.abs, x.abs, y.abs) + z.neg = len(z.abs) > 0 && x.neg // 0 has no sign + return z +} + +// QuoRem sets z to the quotient x/y and r to the remainder x%y +// and returns the pair (z, r) for y != 0. +// If y == 0, a division-by-zero run-time panic occurs. +// +// QuoRem implements T-division and modulus (like Go): +// +// q = x/y with the result truncated to zero +// r = x - y*q +// +// (See Daan Leijen, ``Division and Modulus for Computer Scientists''.) +// See DivMod for Euclidean division and modulus (unlike Go). +// +func (z *Int) QuoRem(x, y, r *Int) (*Int, *Int) { + z.abs, r.abs = z.abs.div(r.abs, x.abs, y.abs) + z.neg, r.neg = len(z.abs) > 0 && x.neg != y.neg, len(r.abs) > 0 && x.neg // 0 has no sign + return z, r +} + +// Div sets z to the quotient x/y for y != 0 and returns z. +// If y == 0, a division-by-zero run-time panic occurs. +// Div implements Euclidean division (unlike Go); see DivMod for more details. +func (z *Int) Div(x, y *Int) *Int { + y_neg := y.neg // z may be an alias for y + var r Int + z.QuoRem(x, y, &r) + if r.neg { + if y_neg { + z.Add(z, intOne) + } else { + z.Sub(z, intOne) + } + } + return z +} + +// Mod sets z to the modulus x%y for y != 0 and returns z. +// If y == 0, a division-by-zero run-time panic occurs. +// Mod implements Euclidean modulus (unlike Go); see DivMod for more details. +func (z *Int) Mod(x, y *Int) *Int { + y0 := y // save y + if z == y || alias(z.abs, y.abs) { + y0 = new(Int).Set(y) + } + var q Int + q.QuoRem(x, y, z) + if z.neg { + if y0.neg { + z.Sub(z, y0) + } else { + z.Add(z, y0) + } + } + return z +} + +// DivMod sets z to the quotient x div y and m to the modulus x mod y +// and returns the pair (z, m) for y != 0. +// If y == 0, a division-by-zero run-time panic occurs. +// +// DivMod implements Euclidean division and modulus (unlike Go): +// +// q = x div y such that +// m = x - y*q with 0 <= m < |y| +// +// (See Raymond T. Boute, ``The Euclidean definition of the functions +// div and mod''. ACM Transactions on Programming Languages and +// Systems (TOPLAS), 14(2):127-144, New York, NY, USA, 4/1992. +// ACM press.) +// See QuoRem for T-division and modulus (like Go). +// +func (z *Int) DivMod(x, y, m *Int) (*Int, *Int) { + y0 := y // save y + if z == y || alias(z.abs, y.abs) { + y0 = new(Int).Set(y) + } + z.QuoRem(x, y, m) + if m.neg { + if y0.neg { + z.Add(z, intOne) + m.Sub(m, y0) + } else { + z.Sub(z, intOne) + m.Add(m, y0) + } + } + return z, m +} + +// Cmp compares x and y and returns: +// +// -1 if x < y +// 0 if x == y +// +1 if x > y +// +func (x *Int) Cmp(y *Int) (r int) { + // x cmp y == x cmp y + // x cmp (-y) == x + // (-x) cmp y == y + // (-x) cmp (-y) == -(x cmp y) + switch { + case x == y: + // nothing to do + case x.neg == y.neg: + r = x.abs.cmp(y.abs) + if x.neg { + r = -r + } + case x.neg: + r = -1 + default: + r = 1 + } + return +} + +// CmpAbs compares the absolute values of x and y and returns: +// +// -1 if |x| < |y| +// 0 if |x| == |y| +// +1 if |x| > |y| +// +func (x *Int) CmpAbs(y *Int) int { + return x.abs.cmp(y.abs) +} + +// low32 returns the least significant 32 bits of x. +func low32(x nat) uint32 { + if len(x) == 0 { + return 0 + } + return uint32(x[0]) +} + +// low64 returns the least significant 64 bits of x. +func low64(x nat) uint64 { + if len(x) == 0 { + return 0 + } + v := uint64(x[0]) + if _W == 32 && len(x) > 1 { + return uint64(x[1])<<32 | v + } + return v +} + +// Int64 returns the int64 representation of x. +// If x cannot be represented in an int64, the result is undefined. +func (x *Int) Int64() int64 { + v := int64(low64(x.abs)) + if x.neg { + v = -v + } + return v +} + +// Uint64 returns the uint64 representation of x. +// If x cannot be represented in a uint64, the result is undefined. +func (x *Int) Uint64() uint64 { + return low64(x.abs) +} + +// IsInt64 reports whether x can be represented as an int64. +func (x *Int) IsInt64() bool { + if len(x.abs) <= 64/_W { + w := int64(low64(x.abs)) + return w >= 0 || x.neg && w == -w + } + return false +} + +// IsUint64 reports whether x can be represented as a uint64. +func (x *Int) IsUint64() bool { + return !x.neg && len(x.abs) <= 64/_W +} + +// SetString sets z to the value of s, interpreted in the given base, +// and returns z and a boolean indicating success. The entire string +// (not just a prefix) must be valid for success. If SetString fails, +// the value of z is undefined but the returned value is nil. +// +// The base argument must be 0 or a value between 2 and MaxBase. +// For base 0, the number prefix determines the actual base: A prefix of +// ``0b'' or ``0B'' selects base 2, ``0'', ``0o'' or ``0O'' selects base 8, +// and ``0x'' or ``0X'' selects base 16. Otherwise, the selected base is 10 +// and no prefix is accepted. +// +// For bases <= 36, lower and upper case letters are considered the same: +// The letters 'a' to 'z' and 'A' to 'Z' represent digit values 10 to 35. +// For bases > 36, the upper case letters 'A' to 'Z' represent the digit +// values 36 to 61. +// +// For base 0, an underscore character ``_'' may appear between a base +// prefix and an adjacent digit, and between successive digits; such +// underscores do not change the value of the number. +// Incorrect placement of underscores is reported as an error if there +// are no other errors. If base != 0, underscores are not recognized +// and act like any other character that is not a valid digit. +// +func (z *Int) SetString(s string, base int) (*Int, bool) { + return z.setFromScanner(strings.NewReader(s), base) +} + +// setFromScanner implements SetString given an io.ByteScanner. +// For documentation see comments of SetString. +func (z *Int) setFromScanner(r io.ByteScanner, base int) (*Int, bool) { + if _, _, err := z.scan(r, base); err != nil { + return nil, false + } + // entire content must have been consumed + if _, err := r.ReadByte(); err != io.EOF { + return nil, false + } + return z, true // err == io.EOF => scan consumed all content of r +} + +// SetBytes interprets buf as the bytes of a big-endian unsigned +// integer, sets z to that value, and returns z. +func (z *Int) SetBytes(buf []byte) *Int { + z.abs = z.abs.setBytes(buf) + z.neg = false + return z +} + +// Bytes returns the absolute value of x as a big-endian byte slice. +// +// To use a fixed length slice, or a preallocated one, use FillBytes. +func (x *Int) Bytes() []byte { + buf := make([]byte, len(x.abs)*_S) + return buf[x.abs.bytes(buf):] +} + +// FillBytes sets buf to the absolute value of x, storing it as a zero-extended +// big-endian byte slice, and returns buf. +// +// If the absolute value of x doesn't fit in buf, FillBytes will panic. +func (x *Int) FillBytes(buf []byte) []byte { + // Clear whole buffer. (This gets optimized into a memclr.) + for i := range buf { + buf[i] = 0 + } + x.abs.bytes(buf) + return buf +} + +// BitLen returns the length of the absolute value of x in bits. +// The bit length of 0 is 0. +func (x *Int) BitLen() int { + return x.abs.bitLen() +} + +// TrailingZeroBits returns the number of consecutive least significant zero +// bits of |x|. +func (x *Int) TrailingZeroBits() uint { + return x.abs.trailingZeroBits() +} + +// Exp sets z = x**y mod |m| (i.e. the sign of m is ignored), and returns z. +// If m == nil or m == 0, z = x**y unless y <= 0 then z = 1. If m != 0, y < 0, +// and x and m are not relatively prime, z is unchanged and nil is returned. +// +// Modular exponentiation of inputs of a particular size is not a +// cryptographically constant-time operation. +func (z *Int) Exp(x, y, m *Int) *Int { + // See Knuth, volume 2, section 4.6.3. + xWords := x.abs + if y.neg { + if m == nil || len(m.abs) == 0 { + return z.SetInt64(1) + } + // for y < 0: x**y mod m == (x**(-1))**|y| mod m + inverse := new(Int).ModInverse(x, m) + if inverse == nil { + return nil + } + xWords = inverse.abs + } + yWords := y.abs + + var mWords nat + if m != nil { + mWords = m.abs // m.abs may be nil for m == 0 + } + + z.abs = z.abs.expNN(xWords, yWords, mWords) + z.neg = len(z.abs) > 0 && x.neg && len(yWords) > 0 && yWords[0]&1 == 1 // 0 has no sign + if z.neg && len(mWords) > 0 { + // make modulus result positive + z.abs = z.abs.sub(mWords, z.abs) // z == x**y mod |m| && 0 <= z < |m| + z.neg = false + } + + return z +} + +// GCD sets z to the greatest common divisor of a and b and returns z. +// If x or y are not nil, GCD sets their value such that z = a*x + b*y. +// +// a and b may be positive, zero or negative. (Before Go 1.14 both had +// to be > 0.) Regardless of the signs of a and b, z is always >= 0. +// +// If a == b == 0, GCD sets z = x = y = 0. +// +// If a == 0 and b != 0, GCD sets z = |b|, x = 0, y = sign(b) * 1. +// +// If a != 0 and b == 0, GCD sets z = |a|, x = sign(a) * 1, y = 0. +func (z *Int) GCD(x, y, a, b *Int) *Int { + if len(a.abs) == 0 || len(b.abs) == 0 { + lenA, lenB, negA, negB := len(a.abs), len(b.abs), a.neg, b.neg + if lenA == 0 { + z.Set(b) + } else { + z.Set(a) + } + z.neg = false + if x != nil { + if lenA == 0 { + x.SetUint64(0) + } else { + x.SetUint64(1) + x.neg = negA + } + } + if y != nil { + if lenB == 0 { + y.SetUint64(0) + } else { + y.SetUint64(1) + y.neg = negB + } + } + return z + } + + return z.lehmerGCD(x, y, a, b) +} + +// lehmerSimulate attempts to simulate several Euclidean update steps +// using the leading digits of A and B. It returns u0, u1, v0, v1 +// such that A and B can be updated as: +// A = u0*A + v0*B +// B = u1*A + v1*B +// Requirements: A >= B and len(B.abs) >= 2 +// Since we are calculating with full words to avoid overflow, +// we use 'even' to track the sign of the cosequences. +// For even iterations: u0, v1 >= 0 && u1, v0 <= 0 +// For odd iterations: u0, v1 <= 0 && u1, v0 >= 0 +func lehmerSimulate(A, B *Int) (u0, u1, v0, v1 Word, even bool) { + // initialize the digits + var a1, a2, u2, v2 Word + + m := len(B.abs) // m >= 2 + n := len(A.abs) // n >= m >= 2 + + // extract the top Word of bits from A and B + h := nlz(A.abs[n-1]) + a1 = A.abs[n-1]<<h | A.abs[n-2]>>(_W-h) + // B may have implicit zero words in the high bits if the lengths differ + switch { + case n == m: + a2 = B.abs[n-1]<<h | B.abs[n-2]>>(_W-h) + case n == m+1: + a2 = B.abs[n-2] >> (_W - h) + default: + a2 = 0 + } + + // Since we are calculating with full words to avoid overflow, + // we use 'even' to track the sign of the cosequences. + // For even iterations: u0, v1 >= 0 && u1, v0 <= 0 + // For odd iterations: u0, v1 <= 0 && u1, v0 >= 0 + // The first iteration starts with k=1 (odd). + even = false + // variables to track the cosequences + u0, u1, u2 = 0, 1, 0 + v0, v1, v2 = 0, 0, 1 + + // Calculate the quotient and cosequences using Collins' stopping condition. + // Note that overflow of a Word is not possible when computing the remainder + // sequence and cosequences since the cosequence size is bounded by the input size. + // See section 4.2 of Jebelean for details. + for a2 >= v2 && a1-a2 >= v1+v2 { + q, r := a1/a2, a1%a2 + a1, a2 = a2, r + u0, u1, u2 = u1, u2, u1+q*u2 + v0, v1, v2 = v1, v2, v1+q*v2 + even = !even + } + return +} + +// lehmerUpdate updates the inputs A and B such that: +// A = u0*A + v0*B +// B = u1*A + v1*B +// where the signs of u0, u1, v0, v1 are given by even +// For even == true: u0, v1 >= 0 && u1, v0 <= 0 +// For even == false: u0, v1 <= 0 && u1, v0 >= 0 +// q, r, s, t are temporary variables to avoid allocations in the multiplication +func lehmerUpdate(A, B, q, r, s, t *Int, u0, u1, v0, v1 Word, even bool) { + + t.abs = t.abs.setWord(u0) + s.abs = s.abs.setWord(v0) + t.neg = !even + s.neg = even + + t.Mul(A, t) + s.Mul(B, s) + + r.abs = r.abs.setWord(u1) + q.abs = q.abs.setWord(v1) + r.neg = even + q.neg = !even + + r.Mul(A, r) + q.Mul(B, q) + + A.Add(t, s) + B.Add(r, q) +} + +// euclidUpdate performs a single step of the Euclidean GCD algorithm +// if extended is true, it also updates the cosequence Ua, Ub +func euclidUpdate(A, B, Ua, Ub, q, r, s, t *Int, extended bool) { + q, r = q.QuoRem(A, B, r) + + *A, *B, *r = *B, *r, *A + + if extended { + // Ua, Ub = Ub, Ua - q*Ub + t.Set(Ub) + s.Mul(Ub, q) + Ub.Sub(Ua, s) + Ua.Set(t) + } +} + +// lehmerGCD sets z to the greatest common divisor of a and b, +// which both must be != 0, and returns z. +// If x or y are not nil, their values are set such that z = a*x + b*y. +// See Knuth, The Art of Computer Programming, Vol. 2, Section 4.5.2, Algorithm L. +// This implementation uses the improved condition by Collins requiring only one +// quotient and avoiding the possibility of single Word overflow. +// See Jebelean, "Improving the multiprecision Euclidean algorithm", +// Design and Implementation of Symbolic Computation Systems, pp 45-58. +// The cosequences are updated according to Algorithm 10.45 from +// Cohen et al. "Handbook of Elliptic and Hyperelliptic Curve Cryptography" pp 192. +func (z *Int) lehmerGCD(x, y, a, b *Int) *Int { + var A, B, Ua, Ub *Int + + A = new(Int).Abs(a) + B = new(Int).Abs(b) + + extended := x != nil || y != nil + + if extended { + // Ua (Ub) tracks how many times input a has been accumulated into A (B). + Ua = new(Int).SetInt64(1) + Ub = new(Int) + } + + // temp variables for multiprecision update + q := new(Int) + r := new(Int) + s := new(Int) + t := new(Int) + + // ensure A >= B + if A.abs.cmp(B.abs) < 0 { + A, B = B, A + Ub, Ua = Ua, Ub + } + + // loop invariant A >= B + for len(B.abs) > 1 { + // Attempt to calculate in single-precision using leading words of A and B. + u0, u1, v0, v1, even := lehmerSimulate(A, B) + + // multiprecision Step + if v0 != 0 { + // Simulate the effect of the single-precision steps using the cosequences. + // A = u0*A + v0*B + // B = u1*A + v1*B + lehmerUpdate(A, B, q, r, s, t, u0, u1, v0, v1, even) + + if extended { + // Ua = u0*Ua + v0*Ub + // Ub = u1*Ua + v1*Ub + lehmerUpdate(Ua, Ub, q, r, s, t, u0, u1, v0, v1, even) + } + + } else { + // Single-digit calculations failed to simulate any quotients. + // Do a standard Euclidean step. + euclidUpdate(A, B, Ua, Ub, q, r, s, t, extended) + } + } + + if len(B.abs) > 0 { + // extended Euclidean algorithm base case if B is a single Word + if len(A.abs) > 1 { + // A is longer than a single Word, so one update is needed. + euclidUpdate(A, B, Ua, Ub, q, r, s, t, extended) + } + if len(B.abs) > 0 { + // A and B are both a single Word. + aWord, bWord := A.abs[0], B.abs[0] + if extended { + var ua, ub, va, vb Word + ua, ub = 1, 0 + va, vb = 0, 1 + even := true + for bWord != 0 { + q, r := aWord/bWord, aWord%bWord + aWord, bWord = bWord, r + ua, ub = ub, ua+q*ub + va, vb = vb, va+q*vb + even = !even + } + + t.abs = t.abs.setWord(ua) + s.abs = s.abs.setWord(va) + t.neg = !even + s.neg = even + + t.Mul(Ua, t) + s.Mul(Ub, s) + + Ua.Add(t, s) + } else { + for bWord != 0 { + aWord, bWord = bWord, aWord%bWord + } + } + A.abs[0] = aWord + } + } + negA := a.neg + if y != nil { + // avoid aliasing b needed in the division below + if y == b { + B.Set(b) + } else { + B = b + } + // y = (z - a*x)/b + y.Mul(a, Ua) // y can safely alias a + if negA { + y.neg = !y.neg + } + y.Sub(A, y) + y.Div(y, B) + } + + if x != nil { + *x = *Ua + if negA { + x.neg = !x.neg + } + } + + *z = *A + + return z +} + +// Rand sets z to a pseudo-random number in [0, n) and returns z. +// +// As this uses the math/rand package, it must not be used for +// security-sensitive work. Use crypto/rand.Int instead. +func (z *Int) Rand(rnd *rand.Rand, n *Int) *Int { + z.neg = false + if n.neg || len(n.abs) == 0 { + z.abs = nil + return z + } + z.abs = z.abs.random(rnd, n.abs, n.abs.bitLen()) + return z +} + +// ModInverse sets z to the multiplicative inverse of g in the ring ℤ/nℤ +// and returns z. If g and n are not relatively prime, g has no multiplicative +// inverse in the ring ℤ/nℤ. In this case, z is unchanged and the return value +// is nil. +func (z *Int) ModInverse(g, n *Int) *Int { + // GCD expects parameters a and b to be > 0. + if n.neg { + var n2 Int + n = n2.Neg(n) + } + if g.neg { + var g2 Int + g = g2.Mod(g, n) + } + var d, x Int + d.GCD(&x, nil, g, n) + + // if and only if d==1, g and n are relatively prime + if d.Cmp(intOne) != 0 { + return nil + } + + // x and y are such that g*x + n*y = 1, therefore x is the inverse element, + // but it may be negative, so convert to the range 0 <= z < |n| + if x.neg { + z.Add(&x, n) + } else { + z.Set(&x) + } + return z +} + +// Jacobi returns the Jacobi symbol (x/y), either +1, -1, or 0. +// The y argument must be an odd integer. +func Jacobi(x, y *Int) int { + if len(y.abs) == 0 || y.abs[0]&1 == 0 { + panic(fmt.Sprintf("big: invalid 2nd argument to Int.Jacobi: need odd integer but got %s", y)) + } + + // We use the formulation described in chapter 2, section 2.4, + // "The Yacas Book of Algorithms": + // http://yacas.sourceforge.net/Algo.book.pdf + + var a, b, c Int + a.Set(x) + b.Set(y) + j := 1 + + if b.neg { + if a.neg { + j = -1 + } + b.neg = false + } + + for { + if b.Cmp(intOne) == 0 { + return j + } + if len(a.abs) == 0 { + return 0 + } + a.Mod(&a, &b) + if len(a.abs) == 0 { + return 0 + } + // a > 0 + + // handle factors of 2 in 'a' + s := a.abs.trailingZeroBits() + if s&1 != 0 { + bmod8 := b.abs[0] & 7 + if bmod8 == 3 || bmod8 == 5 { + j = -j + } + } + c.Rsh(&a, s) // a = 2^s*c + + // swap numerator and denominator + if b.abs[0]&3 == 3 && c.abs[0]&3 == 3 { + j = -j + } + a.Set(&b) + b.Set(&c) + } +} + +// modSqrt3Mod4 uses the identity +// (a^((p+1)/4))^2 mod p +// == u^(p+1) mod p +// == u^2 mod p +// to calculate the square root of any quadratic residue mod p quickly for 3 +// mod 4 primes. +func (z *Int) modSqrt3Mod4Prime(x, p *Int) *Int { + e := new(Int).Add(p, intOne) // e = p + 1 + e.Rsh(e, 2) // e = (p + 1) / 4 + z.Exp(x, e, p) // z = x^e mod p + return z +} + +// modSqrt5Mod8 uses Atkin's observation that 2 is not a square mod p +// alpha == (2*a)^((p-5)/8) mod p +// beta == 2*a*alpha^2 mod p is a square root of -1 +// b == a*alpha*(beta-1) mod p is a square root of a +// to calculate the square root of any quadratic residue mod p quickly for 5 +// mod 8 primes. +func (z *Int) modSqrt5Mod8Prime(x, p *Int) *Int { + // p == 5 mod 8 implies p = e*8 + 5 + // e is the quotient and 5 the remainder on division by 8 + e := new(Int).Rsh(p, 3) // e = (p - 5) / 8 + tx := new(Int).Lsh(x, 1) // tx = 2*x + alpha := new(Int).Exp(tx, e, p) + beta := new(Int).Mul(alpha, alpha) + beta.Mod(beta, p) + beta.Mul(beta, tx) + beta.Mod(beta, p) + beta.Sub(beta, intOne) + beta.Mul(beta, x) + beta.Mod(beta, p) + beta.Mul(beta, alpha) + z.Mod(beta, p) + return z +} + +// modSqrtTonelliShanks uses the Tonelli-Shanks algorithm to find the square +// root of a quadratic residue modulo any prime. +func (z *Int) modSqrtTonelliShanks(x, p *Int) *Int { + // Break p-1 into s*2^e such that s is odd. + var s Int + s.Sub(p, intOne) + e := s.abs.trailingZeroBits() + s.Rsh(&s, e) + + // find some non-square n + var n Int + n.SetInt64(2) + for Jacobi(&n, p) != -1 { + n.Add(&n, intOne) + } + + // Core of the Tonelli-Shanks algorithm. Follows the description in + // section 6 of "Square roots from 1; 24, 51, 10 to Dan Shanks" by Ezra + // Brown: + // https://www.maa.org/sites/default/files/pdf/upload_library/22/Polya/07468342.di020786.02p0470a.pdf + var y, b, g, t Int + y.Add(&s, intOne) + y.Rsh(&y, 1) + y.Exp(x, &y, p) // y = x^((s+1)/2) + b.Exp(x, &s, p) // b = x^s + g.Exp(&n, &s, p) // g = n^s + r := e + for { + // find the least m such that ord_p(b) = 2^m + var m uint + t.Set(&b) + for t.Cmp(intOne) != 0 { + t.Mul(&t, &t).Mod(&t, p) + m++ + } + + if m == 0 { + return z.Set(&y) + } + + t.SetInt64(0).SetBit(&t, int(r-m-1), 1).Exp(&g, &t, p) + // t = g^(2^(r-m-1)) mod p + g.Mul(&t, &t).Mod(&g, p) // g = g^(2^(r-m)) mod p + y.Mul(&y, &t).Mod(&y, p) + b.Mul(&b, &g).Mod(&b, p) + r = m + } +} + +// ModSqrt sets z to a square root of x mod p if such a square root exists, and +// returns z. The modulus p must be an odd prime. If x is not a square mod p, +// ModSqrt leaves z unchanged and returns nil. This function panics if p is +// not an odd integer. +func (z *Int) ModSqrt(x, p *Int) *Int { + switch Jacobi(x, p) { + case -1: + return nil // x is not a square mod p + case 0: + return z.SetInt64(0) // sqrt(0) mod p = 0 + case 1: + break + } + if x.neg || x.Cmp(p) >= 0 { // ensure 0 <= x < p + x = new(Int).Mod(x, p) + } + + switch { + case p.abs[0]%4 == 3: + // Check whether p is 3 mod 4, and if so, use the faster algorithm. + return z.modSqrt3Mod4Prime(x, p) + case p.abs[0]%8 == 5: + // Check whether p is 5 mod 8, use Atkin's algorithm. + return z.modSqrt5Mod8Prime(x, p) + default: + // Otherwise, use Tonelli-Shanks. + return z.modSqrtTonelliShanks(x, p) + } +} + +// Lsh sets z = x << n and returns z. +func (z *Int) Lsh(x *Int, n uint) *Int { + z.abs = z.abs.shl(x.abs, n) + z.neg = x.neg + return z +} + +// Rsh sets z = x >> n and returns z. +func (z *Int) Rsh(x *Int, n uint) *Int { + if x.neg { + // (-x) >> s == ^(x-1) >> s == ^((x-1) >> s) == -(((x-1) >> s) + 1) + t := z.abs.sub(x.abs, natOne) // no underflow because |x| > 0 + t = t.shr(t, n) + z.abs = t.add(t, natOne) + z.neg = true // z cannot be zero if x is negative + return z + } + + z.abs = z.abs.shr(x.abs, n) + z.neg = false + return z +} + +// Bit returns the value of the i'th bit of x. That is, it +// returns (x>>i)&1. The bit index i must be >= 0. +func (x *Int) Bit(i int) uint { + if i == 0 { + // optimization for common case: odd/even test of x + if len(x.abs) > 0 { + return uint(x.abs[0] & 1) // bit 0 is same for -x + } + return 0 + } + if i < 0 { + panic("negative bit index") + } + if x.neg { + t := nat(nil).sub(x.abs, natOne) + return t.bit(uint(i)) ^ 1 + } + + return x.abs.bit(uint(i)) +} + +// SetBit sets z to x, with x's i'th bit set to b (0 or 1). +// That is, if b is 1 SetBit sets z = x | (1 << i); +// if b is 0 SetBit sets z = x &^ (1 << i). If b is not 0 or 1, +// SetBit will panic. +func (z *Int) SetBit(x *Int, i int, b uint) *Int { + if i < 0 { + panic("negative bit index") + } + if x.neg { + t := z.abs.sub(x.abs, natOne) + t = t.setBit(t, uint(i), b^1) + z.abs = t.add(t, natOne) + z.neg = len(z.abs) > 0 + return z + } + z.abs = z.abs.setBit(x.abs, uint(i), b) + z.neg = false + return z +} + +// And sets z = x & y and returns z. +func (z *Int) And(x, y *Int) *Int { + if x.neg == y.neg { + if x.neg { + // (-x) & (-y) == ^(x-1) & ^(y-1) == ^((x-1) | (y-1)) == -(((x-1) | (y-1)) + 1) + x1 := nat(nil).sub(x.abs, natOne) + y1 := nat(nil).sub(y.abs, natOne) + z.abs = z.abs.add(z.abs.or(x1, y1), natOne) + z.neg = true // z cannot be zero if x and y are negative + return z + } + + // x & y == x & y + z.abs = z.abs.and(x.abs, y.abs) + z.neg = false + return z + } + + // x.neg != y.neg + if x.neg { + x, y = y, x // & is symmetric + } + + // x & (-y) == x & ^(y-1) == x &^ (y-1) + y1 := nat(nil).sub(y.abs, natOne) + z.abs = z.abs.andNot(x.abs, y1) + z.neg = false + return z +} + +// AndNot sets z = x &^ y and returns z. +func (z *Int) AndNot(x, y *Int) *Int { + if x.neg == y.neg { + if x.neg { + // (-x) &^ (-y) == ^(x-1) &^ ^(y-1) == ^(x-1) & (y-1) == (y-1) &^ (x-1) + x1 := nat(nil).sub(x.abs, natOne) + y1 := nat(nil).sub(y.abs, natOne) + z.abs = z.abs.andNot(y1, x1) + z.neg = false + return z + } + + // x &^ y == x &^ y + z.abs = z.abs.andNot(x.abs, y.abs) + z.neg = false + return z + } + + if x.neg { + // (-x) &^ y == ^(x-1) &^ y == ^(x-1) & ^y == ^((x-1) | y) == -(((x-1) | y) + 1) + x1 := nat(nil).sub(x.abs, natOne) + z.abs = z.abs.add(z.abs.or(x1, y.abs), natOne) + z.neg = true // z cannot be zero if x is negative and y is positive + return z + } + + // x &^ (-y) == x &^ ^(y-1) == x & (y-1) + y1 := nat(nil).sub(y.abs, natOne) + z.abs = z.abs.and(x.abs, y1) + z.neg = false + return z +} + +// Or sets z = x | y and returns z. +func (z *Int) Or(x, y *Int) *Int { + if x.neg == y.neg { + if x.neg { + // (-x) | (-y) == ^(x-1) | ^(y-1) == ^((x-1) & (y-1)) == -(((x-1) & (y-1)) + 1) + x1 := nat(nil).sub(x.abs, natOne) + y1 := nat(nil).sub(y.abs, natOne) + z.abs = z.abs.add(z.abs.and(x1, y1), natOne) + z.neg = true // z cannot be zero if x and y are negative + return z + } + + // x | y == x | y + z.abs = z.abs.or(x.abs, y.abs) + z.neg = false + return z + } + + // x.neg != y.neg + if x.neg { + x, y = y, x // | is symmetric + } + + // x | (-y) == x | ^(y-1) == ^((y-1) &^ x) == -(^((y-1) &^ x) + 1) + y1 := nat(nil).sub(y.abs, natOne) + z.abs = z.abs.add(z.abs.andNot(y1, x.abs), natOne) + z.neg = true // z cannot be zero if one of x or y is negative + return z +} + +// Xor sets z = x ^ y and returns z. +func (z *Int) Xor(x, y *Int) *Int { + if x.neg == y.neg { + if x.neg { + // (-x) ^ (-y) == ^(x-1) ^ ^(y-1) == (x-1) ^ (y-1) + x1 := nat(nil).sub(x.abs, natOne) + y1 := nat(nil).sub(y.abs, natOne) + z.abs = z.abs.xor(x1, y1) + z.neg = false + return z + } + + // x ^ y == x ^ y + z.abs = z.abs.xor(x.abs, y.abs) + z.neg = false + return z + } + + // x.neg != y.neg + if x.neg { + x, y = y, x // ^ is symmetric + } + + // x ^ (-y) == x ^ ^(y-1) == ^(x ^ (y-1)) == -((x ^ (y-1)) + 1) + y1 := nat(nil).sub(y.abs, natOne) + z.abs = z.abs.add(z.abs.xor(x.abs, y1), natOne) + z.neg = true // z cannot be zero if only one of x or y is negative + return z +} + +// Not sets z = ^x and returns z. +func (z *Int) Not(x *Int) *Int { + if x.neg { + // ^(-x) == ^(^(x-1)) == x-1 + z.abs = z.abs.sub(x.abs, natOne) + z.neg = false + return z + } + + // ^x == -x-1 == -(x+1) + z.abs = z.abs.add(x.abs, natOne) + z.neg = true // z cannot be zero if x is positive + return z +} + +// Sqrt sets z to ⌊√x⌋, the largest integer such that z² ≤ x, and returns z. +// It panics if x is negative. +func (z *Int) Sqrt(x *Int) *Int { + if x.neg { + panic("square root of negative number") + } + z.neg = false + z.abs = z.abs.sqrt(x.abs) + return z +} diff --git a/src/math/big/int_test.go b/src/math/big/int_test.go new file mode 100644 index 0000000..3c85573 --- /dev/null +++ b/src/math/big/int_test.go @@ -0,0 +1,1896 @@ +// Copyright 2009 The Go Authors. All rights reserved. +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. + +package big + +import ( + "bytes" + "encoding/hex" + "fmt" + "math/rand" + "strconv" + "strings" + "testing" + "testing/quick" +) + +func isNormalized(x *Int) bool { + if len(x.abs) == 0 { + return !x.neg + } + // len(x.abs) > 0 + return x.abs[len(x.abs)-1] != 0 +} + +type funZZ func(z, x, y *Int) *Int +type argZZ struct { + z, x, y *Int +} + +var sumZZ = []argZZ{ + {NewInt(0), NewInt(0), NewInt(0)}, + {NewInt(1), NewInt(1), NewInt(0)}, + {NewInt(1111111110), NewInt(123456789), NewInt(987654321)}, + {NewInt(-1), NewInt(-1), NewInt(0)}, + {NewInt(864197532), NewInt(-123456789), NewInt(987654321)}, + {NewInt(-1111111110), NewInt(-123456789), NewInt(-987654321)}, +} + +var prodZZ = []argZZ{ + {NewInt(0), NewInt(0), NewInt(0)}, + {NewInt(0), NewInt(1), NewInt(0)}, + {NewInt(1), NewInt(1), NewInt(1)}, + {NewInt(-991 * 991), NewInt(991), NewInt(-991)}, + // TODO(gri) add larger products +} + +func TestSignZ(t *testing.T) { + var zero Int + for _, a := range sumZZ { + s := a.z.Sign() + e := a.z.Cmp(&zero) + if s != e { + t.Errorf("got %d; want %d for z = %v", s, e, a.z) + } + } +} + +func TestSetZ(t *testing.T) { + for _, a := range sumZZ { + var z Int + z.Set(a.z) + if !isNormalized(&z) { + t.Errorf("%v is not normalized", z) + } + if (&z).Cmp(a.z) != 0 { + t.Errorf("got z = %v; want %v", z, a.z) + } + } +} + +func TestAbsZ(t *testing.T) { + var zero Int + for _, a := range sumZZ { + var z Int + z.Abs(a.z) + var e Int + e.Set(a.z) + if e.Cmp(&zero) < 0 { + e.Sub(&zero, &e) + } + if z.Cmp(&e) != 0 { + t.Errorf("got z = %v; want %v", z, e) + } + } +} + +func testFunZZ(t *testing.T, msg string, f funZZ, a argZZ) { + var z Int + f(&z, a.x, a.y) + if !isNormalized(&z) { + t.Errorf("%s%v is not normalized", msg, z) + } + if (&z).Cmp(a.z) != 0 { + t.Errorf("%s%+v\n\tgot z = %v; want %v", msg, a, &z, a.z) + } +} + +func TestSumZZ(t *testing.T) { + AddZZ := func(z, x, y *Int) *Int { return z.Add(x, y) } + SubZZ := func(z, x, y *Int) *Int { return z.Sub(x, y) } + for _, a := range sumZZ { + arg := a + testFunZZ(t, "AddZZ", AddZZ, arg) + + arg = argZZ{a.z, a.y, a.x} + testFunZZ(t, "AddZZ symmetric", AddZZ, arg) + + arg = argZZ{a.x, a.z, a.y} + testFunZZ(t, "SubZZ", SubZZ, arg) + + arg = argZZ{a.y, a.z, a.x} + testFunZZ(t, "SubZZ symmetric", SubZZ, arg) + } +} + +func TestProdZZ(t *testing.T) { + MulZZ := func(z, x, y *Int) *Int { return z.Mul(x, y) } + for _, a := range prodZZ { + arg := a + testFunZZ(t, "MulZZ", MulZZ, arg) + + arg = argZZ{a.z, a.y, a.x} + testFunZZ(t, "MulZZ symmetric", MulZZ, arg) + } +} + +// mulBytes returns x*y via grade school multiplication. Both inputs +// and the result are assumed to be in big-endian representation (to +// match the semantics of Int.Bytes and Int.SetBytes). +func mulBytes(x, y []byte) []byte { + z := make([]byte, len(x)+len(y)) + + // multiply + k0 := len(z) - 1 + for j := len(y) - 1; j >= 0; j-- { + d := int(y[j]) + if d != 0 { + k := k0 + carry := 0 + for i := len(x) - 1; i >= 0; i-- { + t := int(z[k]) + int(x[i])*d + carry + z[k], carry = byte(t), t>>8 + k-- + } + z[k] = byte(carry) + } + k0-- + } + + // normalize (remove leading 0's) + i := 0 + for i < len(z) && z[i] == 0 { + i++ + } + + return z[i:] +} + +func checkMul(a, b []byte) bool { + var x, y, z1 Int + x.SetBytes(a) + y.SetBytes(b) + z1.Mul(&x, &y) + + var z2 Int + z2.SetBytes(mulBytes(a, b)) + + return z1.Cmp(&z2) == 0 +} + +func TestMul(t *testing.T) { + if err := quick.Check(checkMul, nil); err != nil { + t.Error(err) + } +} + +var mulRangesZ = []struct { + a, b int64 + prod string +}{ + // entirely positive ranges are covered by mulRangesN + {-1, 1, "0"}, + {-2, -1, "2"}, + {-3, -2, "6"}, + {-3, -1, "-6"}, + {1, 3, "6"}, + {-10, -10, "-10"}, + {0, -1, "1"}, // empty range + {-1, -100, "1"}, // empty range + {-1, 1, "0"}, // range includes 0 + {-1e9, 0, "0"}, // range includes 0 + {-1e9, 1e9, "0"}, // range includes 0 + {-10, -1, "3628800"}, // 10! + {-20, -2, "-2432902008176640000"}, // -20! + {-99, -1, + "-933262154439441526816992388562667004907159682643816214685929" + + "638952175999932299156089414639761565182862536979208272237582" + + "511852109168640000000000000000000000", // -99! + }, +} + +func TestMulRangeZ(t *testing.T) { + var tmp Int + // test entirely positive ranges + for i, r := range mulRangesN { + prod := tmp.MulRange(int64(r.a), int64(r.b)).String() + if prod != r.prod { + t.Errorf("#%da: got %s; want %s", i, prod, r.prod) + } + } + // test other ranges + for i, r := range mulRangesZ { + prod := tmp.MulRange(r.a, r.b).String() + if prod != r.prod { + t.Errorf("#%db: got %s; want %s", i, prod, r.prod) + } + } +} + +func TestBinomial(t *testing.T) { + var z Int + for _, test := range []struct { + n, k int64 + want string + }{ + {0, 0, "1"}, + {0, 1, "0"}, + {1, 0, "1"}, + {1, 1, "1"}, + {1, 10, "0"}, + {4, 0, "1"}, + {4, 1, "4"}, + {4, 2, "6"}, + {4, 3, "4"}, + {4, 4, "1"}, + {10, 1, "10"}, + {10, 9, "10"}, + {10, 5, "252"}, + {11, 5, "462"}, + {11, 6, "462"}, + {100, 10, "17310309456440"}, + {100, 90, "17310309456440"}, + {1000, 10, "263409560461970212832400"}, + {1000, 990, "263409560461970212832400"}, + } { + if got := z.Binomial(test.n, test.k).String(); got != test.want { + t.Errorf("Binomial(%d, %d) = %s; want %s", test.n, test.k, got, test.want) + } + } +} + +func BenchmarkBinomial(b *testing.B) { + var z Int + for i := b.N - 1; i >= 0; i-- { + z.Binomial(1000, 990) + } +} + +// Examples from the Go Language Spec, section "Arithmetic operators" +var divisionSignsTests = []struct { + x, y int64 + q, r int64 // T-division + d, m int64 // Euclidean division +}{ + {5, 3, 1, 2, 1, 2}, + {-5, 3, -1, -2, -2, 1}, + {5, -3, -1, 2, -1, 2}, + {-5, -3, 1, -2, 2, 1}, + {1, 2, 0, 1, 0, 1}, + {8, 4, 2, 0, 2, 0}, +} + +func TestDivisionSigns(t *testing.T) { + for i, test := range divisionSignsTests { + x := NewInt(test.x) + y := NewInt(test.y) + q := NewInt(test.q) + r := NewInt(test.r) + d := NewInt(test.d) + m := NewInt(test.m) + + q1 := new(Int).Quo(x, y) + r1 := new(Int).Rem(x, y) + if !isNormalized(q1) { + t.Errorf("#%d Quo: %v is not normalized", i, *q1) + } + if !isNormalized(r1) { + t.Errorf("#%d Rem: %v is not normalized", i, *r1) + } + if q1.Cmp(q) != 0 || r1.Cmp(r) != 0 { + t.Errorf("#%d QuoRem: got (%s, %s), want (%s, %s)", i, q1, r1, q, r) + } + + q2, r2 := new(Int).QuoRem(x, y, new(Int)) + if !isNormalized(q2) { + t.Errorf("#%d Quo: %v is not normalized", i, *q2) + } + if !isNormalized(r2) { + t.Errorf("#%d Rem: %v is not normalized", i, *r2) + } + if q2.Cmp(q) != 0 || r2.Cmp(r) != 0 { + t.Errorf("#%d QuoRem: got (%s, %s), want (%s, %s)", i, q2, r2, q, r) + } + + d1 := new(Int).Div(x, y) + m1 := new(Int).Mod(x, y) + if !isNormalized(d1) { + t.Errorf("#%d Div: %v is not normalized", i, *d1) + } + if !isNormalized(m1) { + t.Errorf("#%d Mod: %v is not normalized", i, *m1) + } + if d1.Cmp(d) != 0 || m1.Cmp(m) != 0 { + t.Errorf("#%d DivMod: got (%s, %s), want (%s, %s)", i, d1, m1, d, m) + } + + d2, m2 := new(Int).DivMod(x, y, new(Int)) + if !isNormalized(d2) { + t.Errorf("#%d Div: %v is not normalized", i, *d2) + } + if !isNormalized(m2) { + t.Errorf("#%d Mod: %v is not normalized", i, *m2) + } + if d2.Cmp(d) != 0 || m2.Cmp(m) != 0 { + t.Errorf("#%d DivMod: got (%s, %s), want (%s, %s)", i, d2, m2, d, m) + } + } +} + +func norm(x nat) nat { + i := len(x) + for i > 0 && x[i-1] == 0 { + i-- + } + return x[:i] +} + +func TestBits(t *testing.T) { + for _, test := range []nat{ + nil, + {0}, + {1}, + {0, 1, 2, 3, 4}, + {4, 3, 2, 1, 0}, + {4, 3, 2, 1, 0, 0, 0, 0}, + } { + var z Int + z.neg = true + got := z.SetBits(test) + want := norm(test) + if got.abs.cmp(want) != 0 { + t.Errorf("SetBits(%v) = %v; want %v", test, got.abs, want) + } + + if got.neg { + t.Errorf("SetBits(%v): got negative result", test) + } + + bits := nat(z.Bits()) + if bits.cmp(want) != 0 { + t.Errorf("%v.Bits() = %v; want %v", z.abs, bits, want) + } + } +} + +func checkSetBytes(b []byte) bool { + hex1 := hex.EncodeToString(new(Int).SetBytes(b).Bytes()) + hex2 := hex.EncodeToString(b) + + for len(hex1) < len(hex2) { + hex1 = "0" + hex1 + } + + for len(hex1) > len(hex2) { + hex2 = "0" + hex2 + } + + return hex1 == hex2 +} + +func TestSetBytes(t *testing.T) { + if err := quick.Check(checkSetBytes, nil); err != nil { + t.Error(err) + } +} + +func checkBytes(b []byte) bool { + // trim leading zero bytes since Bytes() won't return them + // (was issue 12231) + for len(b) > 0 && b[0] == 0 { + b = b[1:] + } + b2 := new(Int).SetBytes(b).Bytes() + return bytes.Equal(b, b2) +} + +func TestBytes(t *testing.T) { + if err := quick.Check(checkBytes, nil); err != nil { + t.Error(err) + } +} + +func checkQuo(x, y []byte) bool { + u := new(Int).SetBytes(x) + v := new(Int).SetBytes(y) + + if len(v.abs) == 0 { + return true + } + + r := new(Int) + q, r := new(Int).QuoRem(u, v, r) + + if r.Cmp(v) >= 0 { + return false + } + + uprime := new(Int).Set(q) + uprime.Mul(uprime, v) + uprime.Add(uprime, r) + + return uprime.Cmp(u) == 0 +} + +var quoTests = []struct { + x, y string + q, r string +}{ + { + "476217953993950760840509444250624797097991362735329973741718102894495832294430498335824897858659711275234906400899559094370964723884706254265559534144986498357", + "9353930466774385905609975137998169297361893554149986716853295022578535724979483772383667534691121982974895531435241089241440253066816724367338287092081996", + "50911", + "1", + }, + { + "11510768301994997771168", + "1328165573307167369775", + "8", + "885443715537658812968", + }, +} + +func TestQuo(t *testing.T) { + if err := quick.Check(checkQuo, nil); err != nil { + t.Error(err) + } + + for i, test := range quoTests { + x, _ := new(Int).SetString(test.x, 10) + y, _ := new(Int).SetString(test.y, 10) + expectedQ, _ := new(Int).SetString(test.q, 10) + expectedR, _ := new(Int).SetString(test.r, 10) + + r := new(Int) + q, r := new(Int).QuoRem(x, y, r) + + if q.Cmp(expectedQ) != 0 || r.Cmp(expectedR) != 0 { + t.Errorf("#%d got (%s, %s) want (%s, %s)", i, q, r, expectedQ, expectedR) + } + } +} + +func TestQuoStepD6(t *testing.T) { + // See Knuth, Volume 2, section 4.3.1, exercise 21. This code exercises + // a code path which only triggers 1 in 10^{-19} cases. + + u := &Int{false, nat{0, 0, 1 + 1<<(_W-1), _M ^ (1 << (_W - 1))}} + v := &Int{false, nat{5, 2 + 1<<(_W-1), 1 << (_W - 1)}} + + r := new(Int) + q, r := new(Int).QuoRem(u, v, r) + const expectedQ64 = "18446744073709551613" + const expectedR64 = "3138550867693340382088035895064302439801311770021610913807" + const expectedQ32 = "4294967293" + const expectedR32 = "39614081266355540837921718287" + if q.String() != expectedQ64 && q.String() != expectedQ32 || + r.String() != expectedR64 && r.String() != expectedR32 { + t.Errorf("got (%s, %s) want (%s, %s) or (%s, %s)", q, r, expectedQ64, expectedR64, expectedQ32, expectedR32) + } +} + +func BenchmarkQuoRem(b *testing.B) { + x, _ := new(Int).SetString("153980389784927331788354528594524332344709972855165340650588877572729725338415474372475094155672066328274535240275856844648695200875763869073572078279316458648124537905600131008790701752441155668003033945258023841165089852359980273279085783159654751552359397986180318708491098942831252291841441726305535546071", 0) + y, _ := new(Int).SetString("7746362281539803897849273317883545285945243323447099728551653406505888775727297253384154743724750941556720663282745352402758568446486952008757638690735720782793164586481245379056001310087907017524411556680030339452580238411650898523599802732790857831596547515523593979861803187084910989428312522918414417263055355460715745539358014631136245887418412633787074173796862711588221766398229333338511838891484974940633857861775630560092874987828057333663969469797013996401149696897591265769095952887917296740109742927689053276850469671231961384715398038978492733178835452859452433234470997285516534065058887757272972533841547437247509415567206632827453524027585684464869520087576386907357207827931645864812453790560013100879070175244115566800303394525802384116508985235998027327908578315965475155235939798618031870849109894283125229184144172630553554607112725169432413343763989564437170644270643461665184965150423819594083121075825", 0) + q := new(Int) + r := new(Int) + + b.ResetTimer() + for i := 0; i < b.N; i++ { + q.QuoRem(y, x, r) + } +} + +var bitLenTests = []struct { + in string + out int +}{ + {"-1", 1}, + {"0", 0}, + {"1", 1}, + {"2", 2}, + {"4", 3}, + {"0xabc", 12}, + {"0x8000", 16}, + {"0x80000000", 32}, + {"0x800000000000", 48}, + {"0x8000000000000000", 64}, + {"0x80000000000000000000", 80}, + {"-0x4000000000000000000000", 87}, +} + +func TestBitLen(t *testing.T) { + for i, test := range bitLenTests { + x, ok := new(Int).SetString(test.in, 0) + if !ok { + t.Errorf("#%d test input invalid: %s", i, test.in) + continue + } + + if n := x.BitLen(); n != test.out { + t.Errorf("#%d got %d want %d", i, n, test.out) + } + } +} + +var expTests = []struct { + x, y, m string + out string +}{ + // y <= 0 + {"0", "0", "", "1"}, + {"1", "0", "", "1"}, + {"-10", "0", "", "1"}, + {"1234", "-1", "", "1"}, + {"1234", "-1", "0", "1"}, + {"17", "-100", "1234", "865"}, + {"2", "-100", "1234", ""}, + + // m == 1 + {"0", "0", "1", "0"}, + {"1", "0", "1", "0"}, + {"-10", "0", "1", "0"}, + {"1234", "-1", "1", "0"}, + + // misc + {"5", "1", "3", "2"}, + {"5", "-7", "", "1"}, + {"-5", "-7", "", "1"}, + {"5", "0", "", "1"}, + {"-5", "0", "", "1"}, + {"5", "1", "", "5"}, + {"-5", "1", "", "-5"}, + {"-5", "1", "7", "2"}, + {"-2", "3", "2", "0"}, + {"5", "2", "", "25"}, + {"1", "65537", "2", "1"}, + {"0x8000000000000000", "2", "", "0x40000000000000000000000000000000"}, + {"0x8000000000000000", "2", "6719", "4944"}, + {"0x8000000000000000", "3", "6719", "5447"}, + {"0x8000000000000000", "1000", "6719", "1603"}, + {"0x8000000000000000", "1000000", "6719", "3199"}, + {"0x8000000000000000", "-1000000", "6719", "3663"}, // 3663 = ModInverse(3199, 6719) Issue #25865 + + {"0xffffffffffffffffffffffffffffffff", "0x12345678123456781234567812345678123456789", "0x01112222333344445555666677778889", "0x36168FA1DB3AAE6C8CE647E137F97A"}, + + { + "2938462938472983472983659726349017249287491026512746239764525612965293865296239471239874193284792387498274256129746192347", + "298472983472983471903246121093472394872319615612417471234712061", + "29834729834729834729347290846729561262544958723956495615629569234729836259263598127342374289365912465901365498236492183464", + "23537740700184054162508175125554701713153216681790245129157191391322321508055833908509185839069455749219131480588829346291", + }, + // test case for issue 8822 + { + "11001289118363089646017359372117963499250546375269047542777928006103246876688756735760905680604646624353196869572752623285140408755420374049317646428185270079555372763503115646054602867593662923894140940837479507194934267532831694565516466765025434902348314525627418515646588160955862839022051353653052947073136084780742729727874803457643848197499548297570026926927502505634297079527299004267769780768565695459945235586892627059178884998772989397505061206395455591503771677500931269477503508150175717121828518985901959919560700853226255420793148986854391552859459511723547532575574664944815966793196961286234040892865", + "0xB08FFB20760FFED58FADA86DFEF71AD72AA0FA763219618FE022C197E54708BB1191C66470250FCE8879487507CEE41381CA4D932F81C2B3F1AB20B539D50DCD", + "0xAC6BDB41324A9A9BF166DE5E1389582FAF72B6651987EE07FC3192943DB56050A37329CBB4A099ED8193E0757767A13DD52312AB4B03310DCD7F48A9DA04FD50E8083969EDB767B0CF6095179A163AB3661A05FBD5FAAAE82918A9962F0B93B855F97993EC975EEAA80D740ADBF4FF747359D041D5C33EA71D281E446B14773BCA97B43A23FB801676BD207A436C6481F1D2B9078717461A5B9D32E688F87748544523B524B0D57D5EA77A2775D2ECFA032CFBDBF52FB3786160279004E57AE6AF874E7303CE53299CCC041C7BC308D82A5698F3A8D0C38271AE35F8E9DBFBB694B5C803D89F7AE435DE236D525F54759B65E372FCD68EF20FA7111F9E4AFF73", + "21484252197776302499639938883777710321993113097987201050501182909581359357618579566746556372589385361683610524730509041328855066514963385522570894839035884713051640171474186548713546686476761306436434146475140156284389181808675016576845833340494848283681088886584219750554408060556769486628029028720727393293111678826356480455433909233520504112074401376133077150471237549474149190242010469539006449596611576612573955754349042329130631128234637924786466585703488460540228477440853493392086251021228087076124706778899179648655221663765993962724699135217212118535057766739392069738618682722216712319320435674779146070442", + }, + { + "-0x1BCE04427D8032319A89E5C4136456671AC620883F2C4139E57F91307C485AD2D6204F4F87A58262652DB5DBBAC72B0613E51B835E7153BEC6068F5C8D696B74DBD18FEC316AEF73985CF0475663208EB46B4F17DD9DA55367B03323E5491A70997B90C059FB34809E6EE55BCFBD5F2F52233BFE62E6AA9E4E26A1D4C2439883D14F2633D55D8AA66A1ACD5595E778AC3A280517F1157989E70C1A437B849F1877B779CC3CDDEDE2DAA6594A6C66D181A00A5F777EE60596D8773998F6E988DEAE4CCA60E4DDCF9590543C89F74F603259FCAD71660D30294FBBE6490300F78A9D63FA660DC9417B8B9DDA28BEB3977B621B988E23D4D954F322C3540541BC649ABD504C50FADFD9F0987D58A2BF689313A285E773FF02899A6EF887D1D4A0D2", + "0xB08FFB20760FFED58FADA86DFEF71AD72AA0FA763219618FE022C197E54708BB1191C66470250FCE8879487507CEE41381CA4D932F81C2B3F1AB20B539D50DCD", + "0xAC6BDB41324A9A9BF166DE5E1389582FAF72B6651987EE07FC3192943DB56050A37329CBB4A099ED8193E0757767A13DD52312AB4B03310DCD7F48A9DA04FD50E8083969EDB767B0CF6095179A163AB3661A05FBD5FAAAE82918A9962F0B93B855F97993EC975EEAA80D740ADBF4FF747359D041D5C33EA71D281E446B14773BCA97B43A23FB801676BD207A436C6481F1D2B9078717461A5B9D32E688F87748544523B524B0D57D5EA77A2775D2ECFA032CFBDBF52FB3786160279004E57AE6AF874E7303CE53299CCC041C7BC308D82A5698F3A8D0C38271AE35F8E9DBFBB694B5C803D89F7AE435DE236D525F54759B65E372FCD68EF20FA7111F9E4AFF73", + "21484252197776302499639938883777710321993113097987201050501182909581359357618579566746556372589385361683610524730509041328855066514963385522570894839035884713051640171474186548713546686476761306436434146475140156284389181808675016576845833340494848283681088886584219750554408060556769486628029028720727393293111678826356480455433909233520504112074401376133077150471237549474149190242010469539006449596611576612573955754349042329130631128234637924786466585703488460540228477440853493392086251021228087076124706778899179648655221663765993962724699135217212118535057766739392069738618682722216712319320435674779146070442", + }, + + // test cases for issue 13907 + {"0xffffffff00000001", "0xffffffff00000001", "0xffffffff00000001", "0"}, + {"0xffffffffffffffff00000001", "0xffffffffffffffff00000001", "0xffffffffffffffff00000001", "0"}, + {"0xffffffffffffffffffffffff00000001", "0xffffffffffffffffffffffff00000001", "0xffffffffffffffffffffffff00000001", "0"}, + {"0xffffffffffffffffffffffffffffffff00000001", "0xffffffffffffffffffffffffffffffff00000001", "0xffffffffffffffffffffffffffffffff00000001", "0"}, + + { + "2", + "0xB08FFB20760FFED58FADA86DFEF71AD72AA0FA763219618FE022C197E54708BB1191C66470250FCE8879487507CEE41381CA4D932F81C2B3F1AB20B539D50DCD", + "0xAC6BDB41324A9A9BF166DE5E1389582FAF72B6651987EE07FC3192943DB56050A37329CBB4A099ED8193E0757767A13DD52312AB4B03310DCD7F48A9DA04FD50E8083969EDB767B0CF6095179A163AB3661A05FBD5FAAAE82918A9962F0B93B855F97993EC975EEAA80D740ADBF4FF747359D041D5C33EA71D281E446B14773BCA97B43A23FB801676BD207A436C6481F1D2B9078717461A5B9D32E688F87748544523B524B0D57D5EA77A2775D2ECFA032CFBDBF52FB3786160279004E57AE6AF874E7303CE53299CCC041C7BC308D82A5698F3A8D0C38271AE35F8E9DBFBB694B5C803D89F7AE435DE236D525F54759B65E372FCD68EF20FA7111F9E4AFF73", // odd + "0x6AADD3E3E424D5B713FCAA8D8945B1E055166132038C57BBD2D51C833F0C5EA2007A2324CE514F8E8C2F008A2F36F44005A4039CB55830986F734C93DAF0EB4BAB54A6A8C7081864F44346E9BC6F0A3EB9F2C0146A00C6A05187D0C101E1F2D038CDB70CB5E9E05A2D188AB6CBB46286624D4415E7D4DBFAD3BCC6009D915C406EED38F468B940F41E6BEDC0430DD78E6F19A7DA3A27498A4181E24D738B0072D8F6ADB8C9809A5B033A09785814FD9919F6EF9F83EEA519BEC593855C4C10CBEEC582D4AE0792158823B0275E6AEC35242740468FAF3D5C60FD1E376362B6322F78B7ED0CA1C5BBCD2B49734A56C0967A1D01A100932C837B91D592CE08ABFF", + }, + { + "2", + "0xB08FFB20760FFED58FADA86DFEF71AD72AA0FA763219618FE022C197E54708BB1191C66470250FCE8879487507CEE41381CA4D932F81C2B3F1AB20B539D50DCD", + "0xAC6BDB41324A9A9BF166DE5E1389582FAF72B6651987EE07FC3192943DB56050A37329CBB4A099ED8193E0757767A13DD52312AB4B03310DCD7F48A9DA04FD50E8083969EDB767B0CF6095179A163AB3661A05FBD5FAAAE82918A9962F0B93B855F97993EC975EEAA80D740ADBF4FF747359D041D5C33EA71D281E446B14773BCA97B43A23FB801676BD207A436C6481F1D2B9078717461A5B9D32E688F87748544523B524B0D57D5EA77A2775D2ECFA032CFBDBF52FB3786160279004E57AE6AF874E7303CE53299CCC041C7BC308D82A5698F3A8D0C38271AE35F8E9DBFBB694B5C803D89F7AE435DE236D525F54759B65E372FCD68EF20FA7111F9E4AFF72", // even + "0x7858794B5897C29F4ED0B40913416AB6C48588484E6A45F2ED3E26C941D878E923575AAC434EE2750E6439A6976F9BB4D64CEDB2A53CE8D04DD48CADCDF8E46F22747C6B81C6CEA86C0D873FBF7CEF262BAAC43A522BD7F32F3CDAC52B9337C77B3DCFB3DB3EDD80476331E82F4B1DF8EFDC1220C92656DFC9197BDC1877804E28D928A2A284B8DED506CBA304435C9D0133C246C98A7D890D1DE60CBC53A024361DA83A9B8775019083D22AC6820ED7C3C68F8E801DD4EC779EE0A05C6EB682EF9840D285B838369BA7E148FA27691D524FAEAF7C6ECE2A4B99A294B9F2C241857B5B90CC8BFFCFCF18DFA7D676131D5CD3855A5A3E8EBFA0CDFADB4D198B4A", + }, +} + +func TestExp(t *testing.T) { + for i, test := range expTests { + x, ok1 := new(Int).SetString(test.x, 0) + y, ok2 := new(Int).SetString(test.y, 0) + + var ok3, ok4 bool + var out, m *Int + + if len(test.out) == 0 { + out, ok3 = nil, true + } else { + out, ok3 = new(Int).SetString(test.out, 0) + } + + if len(test.m) == 0 { + m, ok4 = nil, true + } else { + m, ok4 = new(Int).SetString(test.m, 0) + } + + if !ok1 || !ok2 || !ok3 || !ok4 { + t.Errorf("#%d: error in input", i) + continue + } + + z1 := new(Int).Exp(x, y, m) + if z1 != nil && !isNormalized(z1) { + t.Errorf("#%d: %v is not normalized", i, *z1) + } + if !(z1 == nil && out == nil || z1.Cmp(out) == 0) { + t.Errorf("#%d: got %x want %x", i, z1, out) + } + + if m == nil { + // The result should be the same as for m == 0; + // specifically, there should be no div-zero panic. + m = &Int{abs: nat{}} // m != nil && len(m.abs) == 0 + z2 := new(Int).Exp(x, y, m) + if z2.Cmp(z1) != 0 { + t.Errorf("#%d: got %x want %x", i, z2, z1) + } + } + } +} + +func BenchmarkExp(b *testing.B) { + x, _ := new(Int).SetString("11001289118363089646017359372117963499250546375269047542777928006103246876688756735760905680604646624353196869572752623285140408755420374049317646428185270079555372763503115646054602867593662923894140940837479507194934267532831694565516466765025434902348314525627418515646588160955862839022051353653052947073136084780742729727874803457643848197499548297570026926927502505634297079527299004267769780768565695459945235586892627059178884998772989397505061206395455591503771677500931269477503508150175717121828518985901959919560700853226255420793148986854391552859459511723547532575574664944815966793196961286234040892865", 0) + y, _ := new(Int).SetString("0xAC6BDB41324A9A9BF166DE5E1389582FAF72B6651987EE07FC3192943DB56050A37329CBB4A099ED8193E0757767A13DD52312AB4B03310DCD7F48A9DA04FD50E8083969EDB767B0CF6095179A163AB3661A05FBD5FAAAE82918A9962F0B93B855F97993EC975EEAA80D740ADBF4FF747359D041D5C33EA71D281E446B14773BCA97B43A23FB801676BD207A436C6481F1D2B9078717461A5B9D32E688F87748544523B524B0D57D5EA77A2775D2ECFA032CFBDBF52FB3786160279004E57AE6AF874E7303CE53299CCC041C7BC308D82A5698F3A8D0C38271AE35F8E9DBFBB694B5C803D89F7AE435DE236D525F54759B65E372FCD68EF20FA7111F9E4AFF72", 0) + n, _ := new(Int).SetString("0xAC6BDB41324A9A9BF166DE5E1389582FAF72B6651987EE07FC3192943DB56050A37329CBB4A099ED8193E0757767A13DD52312AB4B03310DCD7F48A9DA04FD50E8083969EDB767B0CF6095179A163AB3661A05FBD5FAAAE82918A9962F0B93B855F97993EC975EEAA80D740ADBF4FF747359D041D5C33EA71D281E446B14773BCA97B43A23FB801676BD207A436C6481F1D2B9078717461A5B9D32E688F87748544523B524B0D57D5EA77A2775D2ECFA032CFBDBF52FB3786160279004E57AE6AF874E7303CE53299CCC041C7BC308D82A5698F3A8D0C38271AE35F8E9DBFBB694B5C803D89F7AE435DE236D525F54759B65E372FCD68EF20FA7111F9E4AFF73", 0) + out := new(Int) + for i := 0; i < b.N; i++ { + out.Exp(x, y, n) + } +} + +func BenchmarkExp2(b *testing.B) { + x, _ := new(Int).SetString("2", 0) + y, _ := new(Int).SetString("0xAC6BDB41324A9A9BF166DE5E1389582FAF72B6651987EE07FC3192943DB56050A37329CBB4A099ED8193E0757767A13DD52312AB4B03310DCD7F48A9DA04FD50E8083969EDB767B0CF6095179A163AB3661A05FBD5FAAAE82918A9962F0B93B855F97993EC975EEAA80D740ADBF4FF747359D041D5C33EA71D281E446B14773BCA97B43A23FB801676BD207A436C6481F1D2B9078717461A5B9D32E688F87748544523B524B0D57D5EA77A2775D2ECFA032CFBDBF52FB3786160279004E57AE6AF874E7303CE53299CCC041C7BC308D82A5698F3A8D0C38271AE35F8E9DBFBB694B5C803D89F7AE435DE236D525F54759B65E372FCD68EF20FA7111F9E4AFF72", 0) + n, _ := new(Int).SetString("0xAC6BDB41324A9A9BF166DE5E1389582FAF72B6651987EE07FC3192943DB56050A37329CBB4A099ED8193E0757767A13DD52312AB4B03310DCD7F48A9DA04FD50E8083969EDB767B0CF6095179A163AB3661A05FBD5FAAAE82918A9962F0B93B855F97993EC975EEAA80D740ADBF4FF747359D041D5C33EA71D281E446B14773BCA97B43A23FB801676BD207A436C6481F1D2B9078717461A5B9D32E688F87748544523B524B0D57D5EA77A2775D2ECFA032CFBDBF52FB3786160279004E57AE6AF874E7303CE53299CCC041C7BC308D82A5698F3A8D0C38271AE35F8E9DBFBB694B5C803D89F7AE435DE236D525F54759B65E372FCD68EF20FA7111F9E4AFF73", 0) + out := new(Int) + for i := 0; i < b.N; i++ { + out.Exp(x, y, n) + } +} + +func checkGcd(aBytes, bBytes []byte) bool { + x := new(Int) + y := new(Int) + a := new(Int).SetBytes(aBytes) + b := new(Int).SetBytes(bBytes) + + d := new(Int).GCD(x, y, a, b) + x.Mul(x, a) + y.Mul(y, b) + x.Add(x, y) + + return x.Cmp(d) == 0 +} + +// euclidExtGCD is a reference implementation of Euclid's +// extended GCD algorithm for testing against optimized algorithms. +// Requirements: a, b > 0 +func euclidExtGCD(a, b *Int) (g, x, y *Int) { + A := new(Int).Set(a) + B := new(Int).Set(b) + + // A = Ua*a + Va*b + // B = Ub*a + Vb*b + Ua := new(Int).SetInt64(1) + Va := new(Int) + + Ub := new(Int) + Vb := new(Int).SetInt64(1) + + q := new(Int) + temp := new(Int) + + r := new(Int) + for len(B.abs) > 0 { + q, r = q.QuoRem(A, B, r) + + A, B, r = B, r, A + + // Ua, Ub = Ub, Ua-q*Ub + temp.Set(Ub) + Ub.Mul(Ub, q) + Ub.Sub(Ua, Ub) + Ua.Set(temp) + + // Va, Vb = Vb, Va-q*Vb + temp.Set(Vb) + Vb.Mul(Vb, q) + Vb.Sub(Va, Vb) + Va.Set(temp) + } + return A, Ua, Va +} + +func checkLehmerGcd(aBytes, bBytes []byte) bool { + a := new(Int).SetBytes(aBytes) + b := new(Int).SetBytes(bBytes) + + if a.Sign() <= 0 || b.Sign() <= 0 { + return true // can only test positive arguments + } + + d := new(Int).lehmerGCD(nil, nil, a, b) + d0, _, _ := euclidExtGCD(a, b) + + return d.Cmp(d0) == 0 +} + +func checkLehmerExtGcd(aBytes, bBytes []byte) bool { + a := new(Int).SetBytes(aBytes) + b := new(Int).SetBytes(bBytes) + x := new(Int) + y := new(Int) + + if a.Sign() <= 0 || b.Sign() <= 0 { + return true // can only test positive arguments + } + + d := new(Int).lehmerGCD(x, y, a, b) + d0, x0, y0 := euclidExtGCD(a, b) + + return d.Cmp(d0) == 0 && x.Cmp(x0) == 0 && y.Cmp(y0) == 0 +} + +var gcdTests = []struct { + d, x, y, a, b string +}{ + // a <= 0 || b <= 0 + {"0", "0", "0", "0", "0"}, + {"7", "0", "1", "0", "7"}, + {"7", "0", "-1", "0", "-7"}, + {"11", "1", "0", "11", "0"}, + {"7", "-1", "-2", "-77", "35"}, + {"935", "-3", "8", "64515", "24310"}, + {"935", "-3", "-8", "64515", "-24310"}, + {"935", "3", "-8", "-64515", "-24310"}, + + {"1", "-9", "47", "120", "23"}, + {"7", "1", "-2", "77", "35"}, + {"935", "-3", "8", "64515", "24310"}, + {"935000000000000000", "-3", "8", "64515000000000000000", "24310000000000000000"}, + {"1", "-221", "22059940471369027483332068679400581064239780177629666810348940098015901108344", "98920366548084643601728869055592650835572950932266967461790948584315647051443", "991"}, +} + +func testGcd(t *testing.T, d, x, y, a, b *Int) { + var X *Int + if x != nil { + X = new(Int) + } + var Y *Int + if y != nil { + Y = new(Int) + } + + D := new(Int).GCD(X, Y, a, b) + if D.Cmp(d) != 0 { + t.Errorf("GCD(%s, %s, %s, %s): got d = %s, want %s", x, y, a, b, D, d) + } + if x != nil && X.Cmp(x) != 0 { + t.Errorf("GCD(%s, %s, %s, %s): got x = %s, want %s", x, y, a, b, X, x) + } + if y != nil && Y.Cmp(y) != 0 { + t.Errorf("GCD(%s, %s, %s, %s): got y = %s, want %s", x, y, a, b, Y, y) + } + + // check results in presence of aliasing (issue #11284) + a2 := new(Int).Set(a) + b2 := new(Int).Set(b) + a2.GCD(X, Y, a2, b2) // result is same as 1st argument + if a2.Cmp(d) != 0 { + t.Errorf("aliased z = a GCD(%s, %s, %s, %s): got d = %s, want %s", x, y, a, b, a2, d) + } + if x != nil && X.Cmp(x) != 0 { + t.Errorf("aliased z = a GCD(%s, %s, %s, %s): got x = %s, want %s", x, y, a, b, X, x) + } + if y != nil && Y.Cmp(y) != 0 { + t.Errorf("aliased z = a GCD(%s, %s, %s, %s): got y = %s, want %s", x, y, a, b, Y, y) + } + + a2 = new(Int).Set(a) + b2 = new(Int).Set(b) + b2.GCD(X, Y, a2, b2) // result is same as 2nd argument + if b2.Cmp(d) != 0 { + t.Errorf("aliased z = b GCD(%s, %s, %s, %s): got d = %s, want %s", x, y, a, b, b2, d) + } + if x != nil && X.Cmp(x) != 0 { + t.Errorf("aliased z = b GCD(%s, %s, %s, %s): got x = %s, want %s", x, y, a, b, X, x) + } + if y != nil && Y.Cmp(y) != 0 { + t.Errorf("aliased z = b GCD(%s, %s, %s, %s): got y = %s, want %s", x, y, a, b, Y, y) + } + + a2 = new(Int).Set(a) + b2 = new(Int).Set(b) + D = new(Int).GCD(a2, b2, a2, b2) // x = a, y = b + if D.Cmp(d) != 0 { + t.Errorf("aliased x = a, y = b GCD(%s, %s, %s, %s): got d = %s, want %s", x, y, a, b, D, d) + } + if x != nil && a2.Cmp(x) != 0 { + t.Errorf("aliased x = a, y = b GCD(%s, %s, %s, %s): got x = %s, want %s", x, y, a, b, a2, x) + } + if y != nil && b2.Cmp(y) != 0 { + t.Errorf("aliased x = a, y = b GCD(%s, %s, %s, %s): got y = %s, want %s", x, y, a, b, b2, y) + } + + a2 = new(Int).Set(a) + b2 = new(Int).Set(b) + D = new(Int).GCD(b2, a2, a2, b2) // x = b, y = a + if D.Cmp(d) != 0 { + t.Errorf("aliased x = b, y = a GCD(%s, %s, %s, %s): got d = %s, want %s", x, y, a, b, D, d) + } + if x != nil && b2.Cmp(x) != 0 { + t.Errorf("aliased x = b, y = a GCD(%s, %s, %s, %s): got x = %s, want %s", x, y, a, b, b2, x) + } + if y != nil && a2.Cmp(y) != 0 { + t.Errorf("aliased x = b, y = a GCD(%s, %s, %s, %s): got y = %s, want %s", x, y, a, b, a2, y) + } +} + +func TestGcd(t *testing.T) { + for _, test := range gcdTests { + d, _ := new(Int).SetString(test.d, 0) + x, _ := new(Int).SetString(test.x, 0) + y, _ := new(Int).SetString(test.y, 0) + a, _ := new(Int).SetString(test.a, 0) + b, _ := new(Int).SetString(test.b, 0) + + testGcd(t, d, nil, nil, a, b) + testGcd(t, d, x, nil, a, b) + testGcd(t, d, nil, y, a, b) + testGcd(t, d, x, y, a, b) + } + + if err := quick.Check(checkGcd, nil); err != nil { + t.Error(err) + } + + if err := quick.Check(checkLehmerGcd, nil); err != nil { + t.Error(err) + } + + if err := quick.Check(checkLehmerExtGcd, nil); err != nil { + t.Error(err) + } +} + +type intShiftTest struct { + in string + shift uint + out string +} + +var rshTests = []intShiftTest{ + {"0", 0, "0"}, + {"-0", 0, "0"}, + {"0", 1, "0"}, + {"0", 2, "0"}, + {"1", 0, "1"}, + {"1", 1, "0"}, + {"1", 2, "0"}, + {"2", 0, "2"}, + {"2", 1, "1"}, + {"-1", 0, "-1"}, + {"-1", 1, "-1"}, + {"-1", 10, "-1"}, + {"-100", 2, "-25"}, + {"-100", 3, "-13"}, + {"-100", 100, "-1"}, + {"4294967296", 0, "4294967296"}, + {"4294967296", 1, "2147483648"}, + {"4294967296", 2, "1073741824"}, + {"18446744073709551616", 0, "18446744073709551616"}, + {"18446744073709551616", 1, "9223372036854775808"}, + {"18446744073709551616", 2, "4611686018427387904"}, + {"18446744073709551616", 64, "1"}, + {"340282366920938463463374607431768211456", 64, "18446744073709551616"}, + {"340282366920938463463374607431768211456", 128, "1"}, +} + +func TestRsh(t *testing.T) { + for i, test := range rshTests { + in, _ := new(Int).SetString(test.in, 10) + expected, _ := new(Int).SetString(test.out, 10) + out := new(Int).Rsh(in, test.shift) + + if !isNormalized(out) { + t.Errorf("#%d: %v is not normalized", i, *out) + } + if out.Cmp(expected) != 0 { + t.Errorf("#%d: got %s want %s", i, out, expected) + } + } +} + +func TestRshSelf(t *testing.T) { + for i, test := range rshTests { + z, _ := new(Int).SetString(test.in, 10) + expected, _ := new(Int).SetString(test.out, 10) + z.Rsh(z, test.shift) + + if !isNormalized(z) { + t.Errorf("#%d: %v is not normalized", i, *z) + } + if z.Cmp(expected) != 0 { + t.Errorf("#%d: got %s want %s", i, z, expected) + } + } +} + +var lshTests = []intShiftTest{ + {"0", 0, "0"}, + {"0", 1, "0"}, + {"0", 2, "0"}, + {"1", 0, "1"}, + {"1", 1, "2"}, + {"1", 2, "4"}, + {"2", 0, "2"}, + {"2", 1, "4"}, + {"2", 2, "8"}, + {"-87", 1, "-174"}, + {"4294967296", 0, "4294967296"}, + {"4294967296", 1, "8589934592"}, + {"4294967296", 2, "17179869184"}, + {"18446744073709551616", 0, "18446744073709551616"}, + {"9223372036854775808", 1, "18446744073709551616"}, + {"4611686018427387904", 2, "18446744073709551616"}, + {"1", 64, "18446744073709551616"}, + {"18446744073709551616", 64, "340282366920938463463374607431768211456"}, + {"1", 128, "340282366920938463463374607431768211456"}, +} + +func TestLsh(t *testing.T) { + for i, test := range lshTests { + in, _ := new(Int).SetString(test.in, 10) + expected, _ := new(Int).SetString(test.out, 10) + out := new(Int).Lsh(in, test.shift) + + if !isNormalized(out) { + t.Errorf("#%d: %v is not normalized", i, *out) + } + if out.Cmp(expected) != 0 { + t.Errorf("#%d: got %s want %s", i, out, expected) + } + } +} + +func TestLshSelf(t *testing.T) { + for i, test := range lshTests { + z, _ := new(Int).SetString(test.in, 10) + expected, _ := new(Int).SetString(test.out, 10) + z.Lsh(z, test.shift) + + if !isNormalized(z) { + t.Errorf("#%d: %v is not normalized", i, *z) + } + if z.Cmp(expected) != 0 { + t.Errorf("#%d: got %s want %s", i, z, expected) + } + } +} + +func TestLshRsh(t *testing.T) { + for i, test := range rshTests { + in, _ := new(Int).SetString(test.in, 10) + out := new(Int).Lsh(in, test.shift) + out = out.Rsh(out, test.shift) + + if !isNormalized(out) { + t.Errorf("#%d: %v is not normalized", i, *out) + } + if in.Cmp(out) != 0 { + t.Errorf("#%d: got %s want %s", i, out, in) + } + } + for i, test := range lshTests { + in, _ := new(Int).SetString(test.in, 10) + out := new(Int).Lsh(in, test.shift) + out.Rsh(out, test.shift) + + if !isNormalized(out) { + t.Errorf("#%d: %v is not normalized", i, *out) + } + if in.Cmp(out) != 0 { + t.Errorf("#%d: got %s want %s", i, out, in) + } + } +} + +// Entries must be sorted by value in ascending order. +var cmpAbsTests = []string{ + "0", + "1", + "2", + "10", + "10000000", + "2783678367462374683678456387645876387564783686583485", + "2783678367462374683678456387645876387564783686583486", + "32957394867987420967976567076075976570670947609750670956097509670576075067076027578341538", +} + +func TestCmpAbs(t *testing.T) { + values := make([]*Int, len(cmpAbsTests)) + var prev *Int + for i, s := range cmpAbsTests { + x, ok := new(Int).SetString(s, 0) + if !ok { + t.Fatalf("SetString(%s, 0) failed", s) + } + if prev != nil && prev.Cmp(x) >= 0 { + t.Fatal("cmpAbsTests entries not sorted in ascending order") + } + values[i] = x + prev = x + } + + for i, x := range values { + for j, y := range values { + // try all combinations of signs for x, y + for k := 0; k < 4; k++ { + var a, b Int + a.Set(x) + b.Set(y) + if k&1 != 0 { + a.Neg(&a) + } + if k&2 != 0 { + b.Neg(&b) + } + + got := a.CmpAbs(&b) + want := 0 + switch { + case i > j: + want = 1 + case i < j: + want = -1 + } + if got != want { + t.Errorf("absCmp |%s|, |%s|: got %d; want %d", &a, &b, got, want) + } + } + } + } +} + +func TestIntCmpSelf(t *testing.T) { + for _, s := range cmpAbsTests { + x, ok := new(Int).SetString(s, 0) + if !ok { + t.Fatalf("SetString(%s, 0) failed", s) + } + got := x.Cmp(x) + want := 0 + if got != want { + t.Errorf("x = %s: x.Cmp(x): got %d; want %d", x, got, want) + } + } +} + +var int64Tests = []string{ + // int64 + "0", + "1", + "-1", + "4294967295", + "-4294967295", + "4294967296", + "-4294967296", + "9223372036854775807", + "-9223372036854775807", + "-9223372036854775808", + + // not int64 + "0x8000000000000000", + "-0x8000000000000001", + "38579843757496759476987459679745", + "-38579843757496759476987459679745", +} + +func TestInt64(t *testing.T) { + for _, s := range int64Tests { + var x Int + _, ok := x.SetString(s, 0) + if !ok { + t.Errorf("SetString(%s, 0) failed", s) + continue + } + + want, err := strconv.ParseInt(s, 0, 64) + if err != nil { + if err.(*strconv.NumError).Err == strconv.ErrRange { + if x.IsInt64() { + t.Errorf("IsInt64(%s) succeeded unexpectedly", s) + } + } else { + t.Errorf("ParseInt(%s) failed", s) + } + continue + } + + if !x.IsInt64() { + t.Errorf("IsInt64(%s) failed unexpectedly", s) + } + + got := x.Int64() + if got != want { + t.Errorf("Int64(%s) = %d; want %d", s, got, want) + } + } +} + +var uint64Tests = []string{ + // uint64 + "0", + "1", + "4294967295", + "4294967296", + "8589934591", + "8589934592", + "9223372036854775807", + "9223372036854775808", + "0x08000000000000000", + + // not uint64 + "0x10000000000000000", + "-0x08000000000000000", + "-1", +} + +func TestUint64(t *testing.T) { + for _, s := range uint64Tests { + var x Int + _, ok := x.SetString(s, 0) + if !ok { + t.Errorf("SetString(%s, 0) failed", s) + continue + } + + want, err := strconv.ParseUint(s, 0, 64) + if err != nil { + // check for sign explicitly (ErrRange doesn't cover signed input) + if s[0] == '-' || err.(*strconv.NumError).Err == strconv.ErrRange { + if x.IsUint64() { + t.Errorf("IsUint64(%s) succeeded unexpectedly", s) + } + } else { + t.Errorf("ParseUint(%s) failed", s) + } + continue + } + + if !x.IsUint64() { + t.Errorf("IsUint64(%s) failed unexpectedly", s) + } + + got := x.Uint64() + if got != want { + t.Errorf("Uint64(%s) = %d; want %d", s, got, want) + } + } +} + +var bitwiseTests = []struct { + x, y string + and, or, xor, andNot string +}{ + {"0x00", "0x00", "0x00", "0x00", "0x00", "0x00"}, + {"0x00", "0x01", "0x00", "0x01", "0x01", "0x00"}, + {"0x01", "0x00", "0x00", "0x01", "0x01", "0x01"}, + {"-0x01", "0x00", "0x00", "-0x01", "-0x01", "-0x01"}, + {"-0xaf", "-0x50", "-0xf0", "-0x0f", "0xe1", "0x41"}, + {"0x00", "-0x01", "0x00", "-0x01", "-0x01", "0x00"}, + {"0x01", "0x01", "0x01", "0x01", "0x00", "0x00"}, + {"-0x01", "-0x01", "-0x01", "-0x01", "0x00", "0x00"}, + {"0x07", "0x08", "0x00", "0x0f", "0x0f", "0x07"}, + {"0x05", "0x0f", "0x05", "0x0f", "0x0a", "0x00"}, + {"0xff", "-0x0a", "0xf6", "-0x01", "-0xf7", "0x09"}, + {"0x013ff6", "0x9a4e", "0x1a46", "0x01bffe", "0x01a5b8", "0x0125b0"}, + {"-0x013ff6", "0x9a4e", "0x800a", "-0x0125b2", "-0x01a5bc", "-0x01c000"}, + {"-0x013ff6", "-0x9a4e", "-0x01bffe", "-0x1a46", "0x01a5b8", "0x8008"}, + { + "0x1000009dc6e3d9822cba04129bcbe3401", + "0xb9bd7d543685789d57cb918e833af352559021483cdb05cc21fd", + "0x1000001186210100001000009048c2001", + "0xb9bd7d543685789d57cb918e8bfeff7fddb2ebe87dfbbdfe35fd", + "0xb9bd7d543685789d57ca918e8ae69d6fcdb2eae87df2b97215fc", + "0x8c40c2d8822caa04120b8321400", + }, + { + "0x1000009dc6e3d9822cba04129bcbe3401", + "-0xb9bd7d543685789d57cb918e833af352559021483cdb05cc21fd", + "0x8c40c2d8822caa04120b8321401", + "-0xb9bd7d543685789d57ca918e82229142459020483cd2014001fd", + "-0xb9bd7d543685789d57ca918e8ae69d6fcdb2eae87df2b97215fe", + "0x1000001186210100001000009048c2000", + }, + { + "-0x1000009dc6e3d9822cba04129bcbe3401", + "-0xb9bd7d543685789d57cb918e833af352559021483cdb05cc21fd", + "-0xb9bd7d543685789d57cb918e8bfeff7fddb2ebe87dfbbdfe35fd", + "-0x1000001186210100001000009048c2001", + "0xb9bd7d543685789d57ca918e8ae69d6fcdb2eae87df2b97215fc", + "0xb9bd7d543685789d57ca918e82229142459020483cd2014001fc", + }, +} + +type bitFun func(z, x, y *Int) *Int + +func testBitFun(t *testing.T, msg string, f bitFun, x, y *Int, exp string) { + expected := new(Int) + expected.SetString(exp, 0) + + out := f(new(Int), x, y) + if out.Cmp(expected) != 0 { + t.Errorf("%s: got %s want %s", msg, out, expected) + } +} + +func testBitFunSelf(t *testing.T, msg string, f bitFun, x, y *Int, exp string) { + self := new(Int) + self.Set(x) + expected := new(Int) + expected.SetString(exp, 0) + + self = f(self, self, y) + if self.Cmp(expected) != 0 { + t.Errorf("%s: got %s want %s", msg, self, expected) + } +} + +func altBit(x *Int, i int) uint { + z := new(Int).Rsh(x, uint(i)) + z = z.And(z, NewInt(1)) + if z.Cmp(new(Int)) != 0 { + return 1 + } + return 0 +} + +func altSetBit(z *Int, x *Int, i int, b uint) *Int { + one := NewInt(1) + m := one.Lsh(one, uint(i)) + switch b { + case 1: + return z.Or(x, m) + case 0: + return z.AndNot(x, m) + } + panic("set bit is not 0 or 1") +} + +func testBitset(t *testing.T, x *Int) { + n := x.BitLen() + z := new(Int).Set(x) + z1 := new(Int).Set(x) + for i := 0; i < n+10; i++ { + old := z.Bit(i) + old1 := altBit(z1, i) + if old != old1 { + t.Errorf("bitset: inconsistent value for Bit(%s, %d), got %v want %v", z1, i, old, old1) + } + z := new(Int).SetBit(z, i, 1) + z1 := altSetBit(new(Int), z1, i, 1) + if z.Bit(i) == 0 { + t.Errorf("bitset: bit %d of %s got 0 want 1", i, x) + } + if z.Cmp(z1) != 0 { + t.Errorf("bitset: inconsistent value after SetBit 1, got %s want %s", z, z1) + } + z.SetBit(z, i, 0) + altSetBit(z1, z1, i, 0) + if z.Bit(i) != 0 { + t.Errorf("bitset: bit %d of %s got 1 want 0", i, x) + } + if z.Cmp(z1) != 0 { + t.Errorf("bitset: inconsistent value after SetBit 0, got %s want %s", z, z1) + } + altSetBit(z1, z1, i, old) + z.SetBit(z, i, old) + if z.Cmp(z1) != 0 { + t.Errorf("bitset: inconsistent value after SetBit old, got %s want %s", z, z1) + } + } + if z.Cmp(x) != 0 { + t.Errorf("bitset: got %s want %s", z, x) + } +} + +var bitsetTests = []struct { + x string + i int + b uint +}{ + {"0", 0, 0}, + {"0", 200, 0}, + {"1", 0, 1}, + {"1", 1, 0}, + {"-1", 0, 1}, + {"-1", 200, 1}, + {"0x2000000000000000000000000000", 108, 0}, + {"0x2000000000000000000000000000", 109, 1}, + {"0x2000000000000000000000000000", 110, 0}, + {"-0x2000000000000000000000000001", 108, 1}, + {"-0x2000000000000000000000000001", 109, 0}, + {"-0x2000000000000000000000000001", 110, 1}, +} + +func TestBitSet(t *testing.T) { + for _, test := range bitwiseTests { + x := new(Int) + x.SetString(test.x, 0) + testBitset(t, x) + x = new(Int) + x.SetString(test.y, 0) + testBitset(t, x) + } + for i, test := range bitsetTests { + x := new(Int) + x.SetString(test.x, 0) + b := x.Bit(test.i) + if b != test.b { + t.Errorf("#%d got %v want %v", i, b, test.b) + } + } + z := NewInt(1) + z.SetBit(NewInt(0), 2, 1) + if z.Cmp(NewInt(4)) != 0 { + t.Errorf("destination leaked into result; got %s want 4", z) + } +} + +var tzbTests = []struct { + in string + out uint +}{ + {"0", 0}, + {"1", 0}, + {"-1", 0}, + {"4", 2}, + {"-8", 3}, + {"0x4000000000000000000", 74}, + {"-0x8000000000000000000", 75}, +} + +func TestTrailingZeroBits(t *testing.T) { + for i, test := range tzbTests { + in, _ := new(Int).SetString(test.in, 0) + want := test.out + got := in.TrailingZeroBits() + + if got != want { + t.Errorf("#%d: got %v want %v", i, got, want) + } + } +} + +func BenchmarkBitset(b *testing.B) { + z := new(Int) + z.SetBit(z, 512, 1) + b.ResetTimer() + b.StartTimer() + for i := b.N - 1; i >= 0; i-- { + z.SetBit(z, i&512, 1) + } +} + +func BenchmarkBitsetNeg(b *testing.B) { + z := NewInt(-1) + z.SetBit(z, 512, 0) + b.ResetTimer() + b.StartTimer() + for i := b.N - 1; i >= 0; i-- { + z.SetBit(z, i&512, 0) + } +} + +func BenchmarkBitsetOrig(b *testing.B) { + z := new(Int) + altSetBit(z, z, 512, 1) + b.ResetTimer() + b.StartTimer() + for i := b.N - 1; i >= 0; i-- { + altSetBit(z, z, i&512, 1) + } +} + +func BenchmarkBitsetNegOrig(b *testing.B) { + z := NewInt(-1) + altSetBit(z, z, 512, 0) + b.ResetTimer() + b.StartTimer() + for i := b.N - 1; i >= 0; i-- { + altSetBit(z, z, i&512, 0) + } +} + +// tri generates the trinomial 2**(n*2) - 2**n - 1, which is always 3 mod 4 and +// 7 mod 8, so that 2 is always a quadratic residue. +func tri(n uint) *Int { + x := NewInt(1) + x.Lsh(x, n) + x2 := new(Int).Lsh(x, n) + x2.Sub(x2, x) + x2.Sub(x2, intOne) + return x2 +} + +func BenchmarkModSqrt225_Tonelli(b *testing.B) { + p := tri(225) + x := NewInt(2) + for i := 0; i < b.N; i++ { + x.SetUint64(2) + x.modSqrtTonelliShanks(x, p) + } +} + +func BenchmarkModSqrt225_3Mod4(b *testing.B) { + p := tri(225) + x := new(Int).SetUint64(2) + for i := 0; i < b.N; i++ { + x.SetUint64(2) + x.modSqrt3Mod4Prime(x, p) + } +} + +func BenchmarkModSqrt231_Tonelli(b *testing.B) { + p := tri(231) + p.Sub(p, intOne) + p.Sub(p, intOne) // tri(231) - 2 is a prime == 5 mod 8 + x := new(Int).SetUint64(7) + for i := 0; i < b.N; i++ { + x.SetUint64(7) + x.modSqrtTonelliShanks(x, p) + } +} + +func BenchmarkModSqrt231_5Mod8(b *testing.B) { + p := tri(231) + p.Sub(p, intOne) + p.Sub(p, intOne) // tri(231) - 2 is a prime == 5 mod 8 + x := new(Int).SetUint64(7) + for i := 0; i < b.N; i++ { + x.SetUint64(7) + x.modSqrt5Mod8Prime(x, p) + } +} + +func TestBitwise(t *testing.T) { + x := new(Int) + y := new(Int) + for _, test := range bitwiseTests { + x.SetString(test.x, 0) + y.SetString(test.y, 0) + + testBitFun(t, "and", (*Int).And, x, y, test.and) + testBitFunSelf(t, "and", (*Int).And, x, y, test.and) + testBitFun(t, "andNot", (*Int).AndNot, x, y, test.andNot) + testBitFunSelf(t, "andNot", (*Int).AndNot, x, y, test.andNot) + testBitFun(t, "or", (*Int).Or, x, y, test.or) + testBitFunSelf(t, "or", (*Int).Or, x, y, test.or) + testBitFun(t, "xor", (*Int).Xor, x, y, test.xor) + testBitFunSelf(t, "xor", (*Int).Xor, x, y, test.xor) + } +} + +var notTests = []struct { + in string + out string +}{ + {"0", "-1"}, + {"1", "-2"}, + {"7", "-8"}, + {"0", "-1"}, + {"-81910", "81909"}, + { + "298472983472983471903246121093472394872319615612417471234712061", + "-298472983472983471903246121093472394872319615612417471234712062", + }, +} + +func TestNot(t *testing.T) { + in := new(Int) + out := new(Int) + expected := new(Int) + for i, test := range notTests { + in.SetString(test.in, 10) + expected.SetString(test.out, 10) + out = out.Not(in) + if out.Cmp(expected) != 0 { + t.Errorf("#%d: got %s want %s", i, out, expected) + } + out = out.Not(out) + if out.Cmp(in) != 0 { + t.Errorf("#%d: got %s want %s", i, out, in) + } + } +} + +var modInverseTests = []struct { + element string + modulus string +}{ + {"1234567", "458948883992"}, + {"239487239847", "2410312426921032588552076022197566074856950548502459942654116941958108831682612228890093858261341614673227141477904012196503648957050582631942730706805009223062734745341073406696246014589361659774041027169249453200378729434170325843778659198143763193776859869524088940195577346119843545301547043747207749969763750084308926339295559968882457872412993810129130294592999947926365264059284647209730384947211681434464714438488520940127459844288859336526896320919633919"}, + {"-10", "13"}, // issue #16984 + {"10", "-13"}, + {"-17", "-13"}, +} + +func TestModInverse(t *testing.T) { + var element, modulus, gcd, inverse Int + one := NewInt(1) + for _, test := range modInverseTests { + (&element).SetString(test.element, 10) + (&modulus).SetString(test.modulus, 10) + (&inverse).ModInverse(&element, &modulus) + (&inverse).Mul(&inverse, &element) + (&inverse).Mod(&inverse, &modulus) + if (&inverse).Cmp(one) != 0 { + t.Errorf("ModInverse(%d,%d)*%d%%%d=%d, not 1", &element, &modulus, &element, &modulus, &inverse) + } + } + // exhaustive test for small values + for n := 2; n < 100; n++ { + (&modulus).SetInt64(int64(n)) + for x := 1; x < n; x++ { + (&element).SetInt64(int64(x)) + (&gcd).GCD(nil, nil, &element, &modulus) + if (&gcd).Cmp(one) != 0 { + continue + } + (&inverse).ModInverse(&element, &modulus) + (&inverse).Mul(&inverse, &element) + (&inverse).Mod(&inverse, &modulus) + if (&inverse).Cmp(one) != 0 { + t.Errorf("ModInverse(%d,%d)*%d%%%d=%d, not 1", &element, &modulus, &element, &modulus, &inverse) + } + } + } +} + +func BenchmarkModInverse(b *testing.B) { + p := new(Int).SetInt64(1) // Mersenne prime 2**1279 -1 + p.abs = p.abs.shl(p.abs, 1279) + p.Sub(p, intOne) + x := new(Int).Sub(p, intOne) + z := new(Int) + for i := 0; i < b.N; i++ { + z.ModInverse(x, p) + } +} + +// testModSqrt is a helper for TestModSqrt, +// which checks that ModSqrt can compute a square-root of elt^2. +func testModSqrt(t *testing.T, elt, mod, sq, sqrt *Int) bool { + var sqChk, sqrtChk, sqrtsq Int + sq.Mul(elt, elt) + sq.Mod(sq, mod) + z := sqrt.ModSqrt(sq, mod) + if z != sqrt { + t.Errorf("ModSqrt returned wrong value %s", z) + } + + // test ModSqrt arguments outside the range [0,mod) + sqChk.Add(sq, mod) + z = sqrtChk.ModSqrt(&sqChk, mod) + if z != &sqrtChk || z.Cmp(sqrt) != 0 { + t.Errorf("ModSqrt returned inconsistent value %s", z) + } + sqChk.Sub(sq, mod) + z = sqrtChk.ModSqrt(&sqChk, mod) + if z != &sqrtChk || z.Cmp(sqrt) != 0 { + t.Errorf("ModSqrt returned inconsistent value %s", z) + } + + // test x aliasing z + z = sqrtChk.ModSqrt(sqrtChk.Set(sq), mod) + if z != &sqrtChk || z.Cmp(sqrt) != 0 { + t.Errorf("ModSqrt returned inconsistent value %s", z) + } + + // make sure we actually got a square root + if sqrt.Cmp(elt) == 0 { + return true // we found the "desired" square root + } + sqrtsq.Mul(sqrt, sqrt) // make sure we found the "other" one + sqrtsq.Mod(&sqrtsq, mod) + return sq.Cmp(&sqrtsq) == 0 +} + +func TestModSqrt(t *testing.T) { + var elt, mod, modx4, sq, sqrt Int + r := rand.New(rand.NewSource(9)) + for i, s := range primes[1:] { // skip 2, use only odd primes + mod.SetString(s, 10) + modx4.Lsh(&mod, 2) + + // test a few random elements per prime + for x := 1; x < 5; x++ { + elt.Rand(r, &modx4) + elt.Sub(&elt, &mod) // test range [-mod, 3*mod) + if !testModSqrt(t, &elt, &mod, &sq, &sqrt) { + t.Errorf("#%d: failed (sqrt(e) = %s)", i, &sqrt) + } + } + + if testing.Short() && i > 2 { + break + } + } + + if testing.Short() { + return + } + + // exhaustive test for small values + for n := 3; n < 100; n++ { + mod.SetInt64(int64(n)) + if !mod.ProbablyPrime(10) { + continue + } + isSquare := make([]bool, n) + + // test all the squares + for x := 1; x < n; x++ { + elt.SetInt64(int64(x)) + if !testModSqrt(t, &elt, &mod, &sq, &sqrt) { + t.Errorf("#%d: failed (sqrt(%d,%d) = %s)", x, &elt, &mod, &sqrt) + } + isSquare[sq.Uint64()] = true + } + + // test all non-squares + for x := 1; x < n; x++ { + sq.SetInt64(int64(x)) + z := sqrt.ModSqrt(&sq, &mod) + if !isSquare[x] && z != nil { + t.Errorf("#%d: failed (sqrt(%d,%d) = nil)", x, &sqrt, &mod) + } + } + } +} + +func TestJacobi(t *testing.T) { + testCases := []struct { + x, y int64 + result int + }{ + {0, 1, 1}, + {0, -1, 1}, + {1, 1, 1}, + {1, -1, 1}, + {0, 5, 0}, + {1, 5, 1}, + {2, 5, -1}, + {-2, 5, -1}, + {2, -5, -1}, + {-2, -5, 1}, + {3, 5, -1}, + {5, 5, 0}, + {-5, 5, 0}, + {6, 5, 1}, + {6, -5, 1}, + {-6, 5, 1}, + {-6, -5, -1}, + } + + var x, y Int + + for i, test := range testCases { + x.SetInt64(test.x) + y.SetInt64(test.y) + expected := test.result + actual := Jacobi(&x, &y) + if actual != expected { + t.Errorf("#%d: Jacobi(%d, %d) = %d, but expected %d", i, test.x, test.y, actual, expected) + } + } +} + +func TestJacobiPanic(t *testing.T) { + const failureMsg = "test failure" + defer func() { + msg := recover() + if msg == nil || msg == failureMsg { + panic(msg) + } + t.Log(msg) + }() + x := NewInt(1) + y := NewInt(2) + // Jacobi should panic when the second argument is even. + Jacobi(x, y) + panic(failureMsg) +} + +func TestIssue2607(t *testing.T) { + // This code sequence used to hang. + n := NewInt(10) + n.Rand(rand.New(rand.NewSource(9)), n) +} + +func TestSqrt(t *testing.T) { + root := 0 + r := new(Int) + for i := 0; i < 10000; i++ { + if (root+1)*(root+1) <= i { + root++ + } + n := NewInt(int64(i)) + r.SetInt64(-2) + r.Sqrt(n) + if r.Cmp(NewInt(int64(root))) != 0 { + t.Errorf("Sqrt(%v) = %v, want %v", n, r, root) + } + } + + for i := 0; i < 1000; i += 10 { + n, _ := new(Int).SetString("1"+strings.Repeat("0", i), 10) + r := new(Int).Sqrt(n) + root, _ := new(Int).SetString("1"+strings.Repeat("0", i/2), 10) + if r.Cmp(root) != 0 { + t.Errorf("Sqrt(1e%d) = %v, want 1e%d", i, r, i/2) + } + } + + // Test aliasing. + r.SetInt64(100) + r.Sqrt(r) + if r.Int64() != 10 { + t.Errorf("Sqrt(100) = %v, want 10 (aliased output)", r.Int64()) + } +} + +// We can't test this together with the other Exp tests above because +// it requires a different receiver setup. +func TestIssue22830(t *testing.T) { + one := new(Int).SetInt64(1) + base, _ := new(Int).SetString("84555555300000000000", 10) + mod, _ := new(Int).SetString("66666670001111111111", 10) + want, _ := new(Int).SetString("17888885298888888889", 10) + + var tests = []int64{ + 0, 1, -1, + } + + for _, n := range tests { + m := NewInt(n) + if got := m.Exp(base, one, mod); got.Cmp(want) != 0 { + t.Errorf("(%v).Exp(%s, 1, %s) = %s, want %s", n, base, mod, got, want) + } + } +} + +func BenchmarkSqrt(b *testing.B) { + n, _ := new(Int).SetString("1"+strings.Repeat("0", 1001), 10) + b.ResetTimer() + t := new(Int) + for i := 0; i < b.N; i++ { + t.Sqrt(n) + } +} + +func benchmarkIntSqr(b *testing.B, nwords int) { + x := new(Int) + x.abs = rndNat(nwords) + t := new(Int) + b.ResetTimer() + for i := 0; i < b.N; i++ { + t.Mul(x, x) + } +} + +func BenchmarkIntSqr(b *testing.B) { + for _, n := range sqrBenchSizes { + if isRaceBuilder && n > 1e3 { + continue + } + b.Run(fmt.Sprintf("%d", n), func(b *testing.B) { + benchmarkIntSqr(b, n) + }) + } +} + +func benchmarkDiv(b *testing.B, aSize, bSize int) { + var r = rand.New(rand.NewSource(1234)) + aa := randInt(r, uint(aSize)) + bb := randInt(r, uint(bSize)) + if aa.Cmp(bb) < 0 { + aa, bb = bb, aa + } + x := new(Int) + y := new(Int) + + b.ResetTimer() + for i := 0; i < b.N; i++ { + x.DivMod(aa, bb, y) + } +} + +func BenchmarkDiv(b *testing.B) { + sizes := []int{ + 10, 20, 50, 100, 200, 500, 1000, + 1e4, 1e5, 1e6, 1e7, + } + for _, i := range sizes { + j := 2 * i + b.Run(fmt.Sprintf("%d/%d", j, i), func(b *testing.B) { + benchmarkDiv(b, j, i) + }) + } +} + +func TestFillBytes(t *testing.T) { + checkResult := func(t *testing.T, buf []byte, want *Int) { + t.Helper() + got := new(Int).SetBytes(buf) + if got.CmpAbs(want) != 0 { + t.Errorf("got 0x%x, want 0x%x: %x", got, want, buf) + } + } + panics := func(f func()) (panic bool) { + defer func() { panic = recover() != nil }() + f() + return + } + + for _, n := range []string{ + "0", + "1000", + "0xffffffff", + "-0xffffffff", + "0xffffffffffffffff", + "0x10000000000000000", + "0xabababababababababababababababababababababababababa", + "0xffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffff", + } { + t.Run(n, func(t *testing.T) { + t.Logf(n) + x, ok := new(Int).SetString(n, 0) + if !ok { + panic("invalid test entry") + } + + // Perfectly sized buffer. + byteLen := (x.BitLen() + 7) / 8 + buf := make([]byte, byteLen) + checkResult(t, x.FillBytes(buf), x) + + // Way larger, checking all bytes get zeroed. + buf = make([]byte, 100) + for i := range buf { + buf[i] = 0xff + } + checkResult(t, x.FillBytes(buf), x) + + // Too small. + if byteLen > 0 { + buf = make([]byte, byteLen-1) + if !panics(func() { x.FillBytes(buf) }) { + t.Errorf("expected panic for small buffer and value %x", x) + } + } + }) + } +} diff --git a/src/math/big/intconv.go b/src/math/big/intconv.go new file mode 100644 index 0000000..0567284 --- /dev/null +++ b/src/math/big/intconv.go @@ -0,0 +1,257 @@ +// Copyright 2015 The Go Authors. All rights reserved. +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. + +// This file implements int-to-string conversion functions. + +package big + +import ( + "errors" + "fmt" + "io" +) + +// Text returns the string representation of x in the given base. +// Base must be between 2 and 62, inclusive. The result uses the +// lower-case letters 'a' to 'z' for digit values 10 to 35, and +// the upper-case letters 'A' to 'Z' for digit values 36 to 61. +// No prefix (such as "0x") is added to the string. If x is a nil +// pointer it returns "<nil>". +func (x *Int) Text(base int) string { + if x == nil { + return "<nil>" + } + return string(x.abs.itoa(x.neg, base)) +} + +// Append appends the string representation of x, as generated by +// x.Text(base), to buf and returns the extended buffer. +func (x *Int) Append(buf []byte, base int) []byte { + if x == nil { + return append(buf, "<nil>"...) + } + return append(buf, x.abs.itoa(x.neg, base)...) +} + +// String returns the decimal representation of x as generated by +// x.Text(10). +func (x *Int) String() string { + return x.Text(10) +} + +// write count copies of text to s +func writeMultiple(s fmt.State, text string, count int) { + if len(text) > 0 { + b := []byte(text) + for ; count > 0; count-- { + s.Write(b) + } + } +} + +var _ fmt.Formatter = intOne // *Int must implement fmt.Formatter + +// Format implements fmt.Formatter. It accepts the formats +// 'b' (binary), 'o' (octal with 0 prefix), 'O' (octal with 0o prefix), +// 'd' (decimal), 'x' (lowercase hexadecimal), and +// 'X' (uppercase hexadecimal). +// Also supported are the full suite of package fmt's format +// flags for integral types, including '+' and ' ' for sign +// control, '#' for leading zero in octal and for hexadecimal, +// a leading "0x" or "0X" for "%#x" and "%#X" respectively, +// specification of minimum digits precision, output field +// width, space or zero padding, and '-' for left or right +// justification. +// +func (x *Int) Format(s fmt.State, ch rune) { + // determine base + var base int + switch ch { + case 'b': + base = 2 + case 'o', 'O': + base = 8 + case 'd', 's', 'v': + base = 10 + case 'x', 'X': + base = 16 + default: + // unknown format + fmt.Fprintf(s, "%%!%c(big.Int=%s)", ch, x.String()) + return + } + + if x == nil { + fmt.Fprint(s, "<nil>") + return + } + + // determine sign character + sign := "" + switch { + case x.neg: + sign = "-" + case s.Flag('+'): // supersedes ' ' when both specified + sign = "+" + case s.Flag(' '): + sign = " " + } + + // determine prefix characters for indicating output base + prefix := "" + if s.Flag('#') { + switch ch { + case 'b': // binary + prefix = "0b" + case 'o': // octal + prefix = "0" + case 'x': // hexadecimal + prefix = "0x" + case 'X': + prefix = "0X" + } + } + if ch == 'O' { + prefix = "0o" + } + + digits := x.abs.utoa(base) + if ch == 'X' { + // faster than bytes.ToUpper + for i, d := range digits { + if 'a' <= d && d <= 'z' { + digits[i] = 'A' + (d - 'a') + } + } + } + + // number of characters for the three classes of number padding + var left int // space characters to left of digits for right justification ("%8d") + var zeros int // zero characters (actually cs[0]) as left-most digits ("%.8d") + var right int // space characters to right of digits for left justification ("%-8d") + + // determine number padding from precision: the least number of digits to output + precision, precisionSet := s.Precision() + if precisionSet { + switch { + case len(digits) < precision: + zeros = precision - len(digits) // count of zero padding + case len(digits) == 1 && digits[0] == '0' && precision == 0: + return // print nothing if zero value (x == 0) and zero precision ("." or ".0") + } + } + + // determine field pad from width: the least number of characters to output + length := len(sign) + len(prefix) + zeros + len(digits) + if width, widthSet := s.Width(); widthSet && length < width { // pad as specified + switch d := width - length; { + case s.Flag('-'): + // pad on the right with spaces; supersedes '0' when both specified + right = d + case s.Flag('0') && !precisionSet: + // pad with zeros unless precision also specified + zeros = d + default: + // pad on the left with spaces + left = d + } + } + + // print number as [left pad][sign][prefix][zero pad][digits][right pad] + writeMultiple(s, " ", left) + writeMultiple(s, sign, 1) + writeMultiple(s, prefix, 1) + writeMultiple(s, "0", zeros) + s.Write(digits) + writeMultiple(s, " ", right) +} + +// scan sets z to the integer value corresponding to the longest possible prefix +// read from r representing a signed integer number in a given conversion base. +// It returns z, the actual conversion base used, and an error, if any. In the +// error case, the value of z is undefined but the returned value is nil. The +// syntax follows the syntax of integer literals in Go. +// +// The base argument must be 0 or a value from 2 through MaxBase. If the base +// is 0, the string prefix determines the actual conversion base. A prefix of +// ``0b'' or ``0B'' selects base 2; a ``0'', ``0o'', or ``0O'' prefix selects +// base 8, and a ``0x'' or ``0X'' prefix selects base 16. Otherwise the selected +// base is 10. +// +func (z *Int) scan(r io.ByteScanner, base int) (*Int, int, error) { + // determine sign + neg, err := scanSign(r) + if err != nil { + return nil, 0, err + } + + // determine mantissa + z.abs, base, _, err = z.abs.scan(r, base, false) + if err != nil { + return nil, base, err + } + z.neg = len(z.abs) > 0 && neg // 0 has no sign + + return z, base, nil +} + +func scanSign(r io.ByteScanner) (neg bool, err error) { + var ch byte + if ch, err = r.ReadByte(); err != nil { + return false, err + } + switch ch { + case '-': + neg = true + case '+': + // nothing to do + default: + r.UnreadByte() + } + return +} + +// byteReader is a local wrapper around fmt.ScanState; +// it implements the ByteReader interface. +type byteReader struct { + fmt.ScanState +} + +func (r byteReader) ReadByte() (byte, error) { + ch, size, err := r.ReadRune() + if size != 1 && err == nil { + err = fmt.Errorf("invalid rune %#U", ch) + } + return byte(ch), err +} + +func (r byteReader) UnreadByte() error { + return r.UnreadRune() +} + +var _ fmt.Scanner = intOne // *Int must implement fmt.Scanner + +// Scan is a support routine for fmt.Scanner; it sets z to the value of +// the scanned number. It accepts the formats 'b' (binary), 'o' (octal), +// 'd' (decimal), 'x' (lowercase hexadecimal), and 'X' (uppercase hexadecimal). +func (z *Int) Scan(s fmt.ScanState, ch rune) error { + s.SkipSpace() // skip leading space characters + base := 0 + switch ch { + case 'b': + base = 2 + case 'o': + base = 8 + case 'd': + base = 10 + case 'x', 'X': + base = 16 + case 's', 'v': + // let scan determine the base + default: + return errors.New("Int.Scan: invalid verb") + } + _, _, err := z.scan(byteReader{s}, base) + return err +} diff --git a/src/math/big/intconv_test.go b/src/math/big/intconv_test.go new file mode 100644 index 0000000..5ba2926 --- /dev/null +++ b/src/math/big/intconv_test.go @@ -0,0 +1,431 @@ +// Copyright 2015 The Go Authors. All rights reserved. +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. + +package big + +import ( + "bytes" + "fmt" + "testing" +) + +var stringTests = []struct { + in string + out string + base int + val int64 + ok bool +}{ + // invalid inputs + {in: ""}, + {in: "a"}, + {in: "z"}, + {in: "+"}, + {in: "-"}, + {in: "0b"}, + {in: "0o"}, + {in: "0x"}, + {in: "0y"}, + {in: "2", base: 2}, + {in: "0b2", base: 0}, + {in: "08"}, + {in: "8", base: 8}, + {in: "0xg", base: 0}, + {in: "g", base: 16}, + + // invalid inputs with separators + // (smoke tests only - a comprehensive set of tests is in natconv_test.go) + {in: "_"}, + {in: "0_"}, + {in: "_0"}, + {in: "-1__0"}, + {in: "0x10_"}, + {in: "1_000", base: 10}, // separators are not permitted for bases != 0 + {in: "d_e_a_d", base: 16}, + + // valid inputs + {"0", "0", 0, 0, true}, + {"0", "0", 10, 0, true}, + {"0", "0", 16, 0, true}, + {"+0", "0", 0, 0, true}, + {"-0", "0", 0, 0, true}, + {"10", "10", 0, 10, true}, + {"10", "10", 10, 10, true}, + {"10", "10", 16, 16, true}, + {"-10", "-10", 16, -16, true}, + {"+10", "10", 16, 16, true}, + {"0b10", "2", 0, 2, true}, + {"0o10", "8", 0, 8, true}, + {"0x10", "16", 0, 16, true}, + {in: "0x10", base: 16}, + {"-0x10", "-16", 0, -16, true}, + {"+0x10", "16", 0, 16, true}, + {"00", "0", 0, 0, true}, + {"0", "0", 8, 0, true}, + {"07", "7", 0, 7, true}, + {"7", "7", 8, 7, true}, + {"023", "19", 0, 19, true}, + {"23", "23", 8, 19, true}, + {"cafebabe", "cafebabe", 16, 0xcafebabe, true}, + {"0b0", "0", 0, 0, true}, + {"-111", "-111", 2, -7, true}, + {"-0b111", "-7", 0, -7, true}, + {"0b1001010111", "599", 0, 0x257, true}, + {"1001010111", "1001010111", 2, 0x257, true}, + {"A", "a", 36, 10, true}, + {"A", "A", 37, 36, true}, + {"ABCXYZ", "abcxyz", 36, 623741435, true}, + {"ABCXYZ", "ABCXYZ", 62, 33536793425, true}, + + // valid input with separators + // (smoke tests only - a comprehensive set of tests is in natconv_test.go) + {"1_000", "1000", 0, 1000, true}, + {"0b_1010", "10", 0, 10, true}, + {"+0o_660", "432", 0, 0660, true}, + {"-0xF00D_1E", "-15731998", 0, -0xf00d1e, true}, +} + +func TestIntText(t *testing.T) { + z := new(Int) + for _, test := range stringTests { + if !test.ok { + continue + } + + _, ok := z.SetString(test.in, test.base) + if !ok { + t.Errorf("%v: failed to parse", test) + continue + } + + base := test.base + if base == 0 { + base = 10 + } + + if got := z.Text(base); got != test.out { + t.Errorf("%v: got %s; want %s", test, got, test.out) + } + } +} + +func TestAppendText(t *testing.T) { + z := new(Int) + var buf []byte + for _, test := range stringTests { + if !test.ok { + continue + } + + _, ok := z.SetString(test.in, test.base) + if !ok { + t.Errorf("%v: failed to parse", test) + continue + } + + base := test.base + if base == 0 { + base = 10 + } + + i := len(buf) + buf = z.Append(buf, base) + if got := string(buf[i:]); got != test.out { + t.Errorf("%v: got %s; want %s", test, got, test.out) + } + } +} + +func format(base int) string { + switch base { + case 2: + return "%b" + case 8: + return "%o" + case 16: + return "%x" + } + return "%d" +} + +func TestGetString(t *testing.T) { + z := new(Int) + for i, test := range stringTests { + if !test.ok { + continue + } + z.SetInt64(test.val) + + if test.base == 10 { + if got := z.String(); got != test.out { + t.Errorf("#%da got %s; want %s", i, got, test.out) + } + } + + f := format(test.base) + got := fmt.Sprintf(f, z) + if f == "%d" { + if got != fmt.Sprintf("%d", test.val) { + t.Errorf("#%db got %s; want %d", i, got, test.val) + } + } else { + if got != test.out { + t.Errorf("#%dc got %s; want %s", i, got, test.out) + } + } + } +} + +func TestSetString(t *testing.T) { + tmp := new(Int) + for i, test := range stringTests { + // initialize to a non-zero value so that issues with parsing + // 0 are detected + tmp.SetInt64(1234567890) + n1, ok1 := new(Int).SetString(test.in, test.base) + n2, ok2 := tmp.SetString(test.in, test.base) + expected := NewInt(test.val) + if ok1 != test.ok || ok2 != test.ok { + t.Errorf("#%d (input '%s') ok incorrect (should be %t)", i, test.in, test.ok) + continue + } + if !ok1 { + if n1 != nil { + t.Errorf("#%d (input '%s') n1 != nil", i, test.in) + } + continue + } + if !ok2 { + if n2 != nil { + t.Errorf("#%d (input '%s') n2 != nil", i, test.in) + } + continue + } + + if ok1 && !isNormalized(n1) { + t.Errorf("#%d (input '%s'): %v is not normalized", i, test.in, *n1) + } + if ok2 && !isNormalized(n2) { + t.Errorf("#%d (input '%s'): %v is not normalized", i, test.in, *n2) + } + + if n1.Cmp(expected) != 0 { + t.Errorf("#%d (input '%s') got: %s want: %d", i, test.in, n1, test.val) + } + if n2.Cmp(expected) != 0 { + t.Errorf("#%d (input '%s') got: %s want: %d", i, test.in, n2, test.val) + } + } +} + +var formatTests = []struct { + input string + format string + output string +}{ + {"<nil>", "%x", "<nil>"}, + {"<nil>", "%#x", "<nil>"}, + {"<nil>", "%#y", "%!y(big.Int=<nil>)"}, + + {"10", "%b", "1010"}, + {"10", "%o", "12"}, + {"10", "%d", "10"}, + {"10", "%v", "10"}, + {"10", "%x", "a"}, + {"10", "%X", "A"}, + {"-10", "%X", "-A"}, + {"10", "%y", "%!y(big.Int=10)"}, + {"-10", "%y", "%!y(big.Int=-10)"}, + + {"10", "%#b", "0b1010"}, + {"10", "%#o", "012"}, + {"10", "%O", "0o12"}, + {"-10", "%#b", "-0b1010"}, + {"-10", "%#o", "-012"}, + {"-10", "%O", "-0o12"}, + {"10", "%#d", "10"}, + {"10", "%#v", "10"}, + {"10", "%#x", "0xa"}, + {"10", "%#X", "0XA"}, + {"-10", "%#X", "-0XA"}, + {"10", "%#y", "%!y(big.Int=10)"}, + {"-10", "%#y", "%!y(big.Int=-10)"}, + + {"1234", "%d", "1234"}, + {"1234", "%3d", "1234"}, + {"1234", "%4d", "1234"}, + {"-1234", "%d", "-1234"}, + {"1234", "% 5d", " 1234"}, + {"1234", "%+5d", "+1234"}, + {"1234", "%-5d", "1234 "}, + {"1234", "%x", "4d2"}, + {"1234", "%X", "4D2"}, + {"-1234", "%3x", "-4d2"}, + {"-1234", "%4x", "-4d2"}, + {"-1234", "%5x", " -4d2"}, + {"-1234", "%-5x", "-4d2 "}, + {"1234", "%03d", "1234"}, + {"1234", "%04d", "1234"}, + {"1234", "%05d", "01234"}, + {"1234", "%06d", "001234"}, + {"-1234", "%06d", "-01234"}, + {"1234", "%+06d", "+01234"}, + {"1234", "% 06d", " 01234"}, + {"1234", "%-6d", "1234 "}, + {"1234", "%-06d", "1234 "}, + {"-1234", "%-06d", "-1234 "}, + + {"1234", "%.3d", "1234"}, + {"1234", "%.4d", "1234"}, + {"1234", "%.5d", "01234"}, + {"1234", "%.6d", "001234"}, + {"-1234", "%.3d", "-1234"}, + {"-1234", "%.4d", "-1234"}, + {"-1234", "%.5d", "-01234"}, + {"-1234", "%.6d", "-001234"}, + + {"1234", "%8.3d", " 1234"}, + {"1234", "%8.4d", " 1234"}, + {"1234", "%8.5d", " 01234"}, + {"1234", "%8.6d", " 001234"}, + {"-1234", "%8.3d", " -1234"}, + {"-1234", "%8.4d", " -1234"}, + {"-1234", "%8.5d", " -01234"}, + {"-1234", "%8.6d", " -001234"}, + + {"1234", "%+8.3d", " +1234"}, + {"1234", "%+8.4d", " +1234"}, + {"1234", "%+8.5d", " +01234"}, + {"1234", "%+8.6d", " +001234"}, + {"-1234", "%+8.3d", " -1234"}, + {"-1234", "%+8.4d", " -1234"}, + {"-1234", "%+8.5d", " -01234"}, + {"-1234", "%+8.6d", " -001234"}, + + {"1234", "% 8.3d", " 1234"}, + {"1234", "% 8.4d", " 1234"}, + {"1234", "% 8.5d", " 01234"}, + {"1234", "% 8.6d", " 001234"}, + {"-1234", "% 8.3d", " -1234"}, + {"-1234", "% 8.4d", " -1234"}, + {"-1234", "% 8.5d", " -01234"}, + {"-1234", "% 8.6d", " -001234"}, + + {"1234", "%.3x", "4d2"}, + {"1234", "%.4x", "04d2"}, + {"1234", "%.5x", "004d2"}, + {"1234", "%.6x", "0004d2"}, + {"-1234", "%.3x", "-4d2"}, + {"-1234", "%.4x", "-04d2"}, + {"-1234", "%.5x", "-004d2"}, + {"-1234", "%.6x", "-0004d2"}, + + {"1234", "%8.3x", " 4d2"}, + {"1234", "%8.4x", " 04d2"}, + {"1234", "%8.5x", " 004d2"}, + {"1234", "%8.6x", " 0004d2"}, + {"-1234", "%8.3x", " -4d2"}, + {"-1234", "%8.4x", " -04d2"}, + {"-1234", "%8.5x", " -004d2"}, + {"-1234", "%8.6x", " -0004d2"}, + + {"1234", "%+8.3x", " +4d2"}, + {"1234", "%+8.4x", " +04d2"}, + {"1234", "%+8.5x", " +004d2"}, + {"1234", "%+8.6x", " +0004d2"}, + {"-1234", "%+8.3x", " -4d2"}, + {"-1234", "%+8.4x", " -04d2"}, + {"-1234", "%+8.5x", " -004d2"}, + {"-1234", "%+8.6x", " -0004d2"}, + + {"1234", "% 8.3x", " 4d2"}, + {"1234", "% 8.4x", " 04d2"}, + {"1234", "% 8.5x", " 004d2"}, + {"1234", "% 8.6x", " 0004d2"}, + {"1234", "% 8.7x", " 00004d2"}, + {"1234", "% 8.8x", " 000004d2"}, + {"-1234", "% 8.3x", " -4d2"}, + {"-1234", "% 8.4x", " -04d2"}, + {"-1234", "% 8.5x", " -004d2"}, + {"-1234", "% 8.6x", " -0004d2"}, + {"-1234", "% 8.7x", "-00004d2"}, + {"-1234", "% 8.8x", "-000004d2"}, + + {"1234", "%-8.3d", "1234 "}, + {"1234", "%-8.4d", "1234 "}, + {"1234", "%-8.5d", "01234 "}, + {"1234", "%-8.6d", "001234 "}, + {"1234", "%-8.7d", "0001234 "}, + {"1234", "%-8.8d", "00001234"}, + {"-1234", "%-8.3d", "-1234 "}, + {"-1234", "%-8.4d", "-1234 "}, + {"-1234", "%-8.5d", "-01234 "}, + {"-1234", "%-8.6d", "-001234 "}, + {"-1234", "%-8.7d", "-0001234"}, + {"-1234", "%-8.8d", "-00001234"}, + + {"16777215", "%b", "111111111111111111111111"}, // 2**24 - 1 + + {"0", "%.d", ""}, + {"0", "%.0d", ""}, + {"0", "%3.d", ""}, +} + +func TestFormat(t *testing.T) { + for i, test := range formatTests { + var x *Int + if test.input != "<nil>" { + var ok bool + x, ok = new(Int).SetString(test.input, 0) + if !ok { + t.Errorf("#%d failed reading input %s", i, test.input) + } + } + output := fmt.Sprintf(test.format, x) + if output != test.output { + t.Errorf("#%d got %q; want %q, {%q, %q, %q}", i, output, test.output, test.input, test.format, test.output) + } + } +} + +var scanTests = []struct { + input string + format string + output string + remaining int +}{ + {"1010", "%b", "10", 0}, + {"0b1010", "%v", "10", 0}, + {"12", "%o", "10", 0}, + {"012", "%v", "10", 0}, + {"10", "%d", "10", 0}, + {"10", "%v", "10", 0}, + {"a", "%x", "10", 0}, + {"0xa", "%v", "10", 0}, + {"A", "%X", "10", 0}, + {"-A", "%X", "-10", 0}, + {"+0b1011001", "%v", "89", 0}, + {"0xA", "%v", "10", 0}, + {"0 ", "%v", "0", 1}, + {"2+3", "%v", "2", 2}, + {"0XABC 12", "%v", "2748", 3}, +} + +func TestScan(t *testing.T) { + var buf bytes.Buffer + for i, test := range scanTests { + x := new(Int) + buf.Reset() + buf.WriteString(test.input) + if _, err := fmt.Fscanf(&buf, test.format, x); err != nil { + t.Errorf("#%d error: %s", i, err) + } + if x.String() != test.output { + t.Errorf("#%d got %s; want %s", i, x.String(), test.output) + } + if buf.Len() != test.remaining { + t.Errorf("#%d got %d bytes remaining; want %d", i, buf.Len(), test.remaining) + } + } +} diff --git a/src/math/big/intmarsh.go b/src/math/big/intmarsh.go new file mode 100644 index 0000000..c1422e2 --- /dev/null +++ b/src/math/big/intmarsh.go @@ -0,0 +1,80 @@ +// Copyright 2015 The Go Authors. All rights reserved. +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. + +// This file implements encoding/decoding of Ints. + +package big + +import ( + "bytes" + "fmt" +) + +// Gob codec version. Permits backward-compatible changes to the encoding. +const intGobVersion byte = 1 + +// GobEncode implements the gob.GobEncoder interface. +func (x *Int) GobEncode() ([]byte, error) { + if x == nil { + return nil, nil + } + buf := make([]byte, 1+len(x.abs)*_S) // extra byte for version and sign bit + i := x.abs.bytes(buf) - 1 // i >= 0 + b := intGobVersion << 1 // make space for sign bit + if x.neg { + b |= 1 + } + buf[i] = b + return buf[i:], nil +} + +// GobDecode implements the gob.GobDecoder interface. +func (z *Int) GobDecode(buf []byte) error { + if len(buf) == 0 { + // Other side sent a nil or default value. + *z = Int{} + return nil + } + b := buf[0] + if b>>1 != intGobVersion { + return fmt.Errorf("Int.GobDecode: encoding version %d not supported", b>>1) + } + z.neg = b&1 != 0 + z.abs = z.abs.setBytes(buf[1:]) + return nil +} + +// MarshalText implements the encoding.TextMarshaler interface. +func (x *Int) MarshalText() (text []byte, err error) { + if x == nil { + return []byte("<nil>"), nil + } + return x.abs.itoa(x.neg, 10), nil +} + +// UnmarshalText implements the encoding.TextUnmarshaler interface. +func (z *Int) UnmarshalText(text []byte) error { + if _, ok := z.setFromScanner(bytes.NewReader(text), 0); !ok { + return fmt.Errorf("math/big: cannot unmarshal %q into a *big.Int", text) + } + return nil +} + +// The JSON marshalers are only here for API backward compatibility +// (programs that explicitly look for these two methods). JSON works +// fine with the TextMarshaler only. + +// MarshalJSON implements the json.Marshaler interface. +func (x *Int) MarshalJSON() ([]byte, error) { + return x.MarshalText() +} + +// UnmarshalJSON implements the json.Unmarshaler interface. +func (z *Int) UnmarshalJSON(text []byte) error { + // Ignore null, like in the main JSON package. + if string(text) == "null" { + return nil + } + return z.UnmarshalText(text) +} diff --git a/src/math/big/intmarsh_test.go b/src/math/big/intmarsh_test.go new file mode 100644 index 0000000..f82956c --- /dev/null +++ b/src/math/big/intmarsh_test.go @@ -0,0 +1,121 @@ +// Copyright 2015 The Go Authors. All rights reserved. +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. + +package big + +import ( + "bytes" + "encoding/gob" + "encoding/json" + "encoding/xml" + "testing" +) + +var encodingTests = []string{ + "0", + "1", + "2", + "10", + "1000", + "1234567890", + "298472983472983471903246121093472394872319615612417471234712061", +} + +func TestIntGobEncoding(t *testing.T) { + var medium bytes.Buffer + enc := gob.NewEncoder(&medium) + dec := gob.NewDecoder(&medium) + for _, test := range encodingTests { + for _, sign := range []string{"", "+", "-"} { + x := sign + test + medium.Reset() // empty buffer for each test case (in case of failures) + var tx Int + tx.SetString(x, 10) + if err := enc.Encode(&tx); err != nil { + t.Errorf("encoding of %s failed: %s", &tx, err) + continue + } + var rx Int + if err := dec.Decode(&rx); err != nil { + t.Errorf("decoding of %s failed: %s", &tx, err) + continue + } + if rx.Cmp(&tx) != 0 { + t.Errorf("transmission of %s failed: got %s want %s", &tx, &rx, &tx) + } + } + } +} + +// Sending a nil Int pointer (inside a slice) on a round trip through gob should yield a zero. +// TODO: top-level nils. +func TestGobEncodingNilIntInSlice(t *testing.T) { + buf := new(bytes.Buffer) + enc := gob.NewEncoder(buf) + dec := gob.NewDecoder(buf) + + var in = make([]*Int, 1) + err := enc.Encode(&in) + if err != nil { + t.Errorf("gob encode failed: %q", err) + } + var out []*Int + err = dec.Decode(&out) + if err != nil { + t.Fatalf("gob decode failed: %q", err) + } + if len(out) != 1 { + t.Fatalf("wrong len; want 1 got %d", len(out)) + } + var zero Int + if out[0].Cmp(&zero) != 0 { + t.Fatalf("transmission of (*Int)(nil) failed: got %s want 0", out) + } +} + +func TestIntJSONEncoding(t *testing.T) { + for _, test := range encodingTests { + for _, sign := range []string{"", "+", "-"} { + x := sign + test + var tx Int + tx.SetString(x, 10) + b, err := json.Marshal(&tx) + if err != nil { + t.Errorf("marshaling of %s failed: %s", &tx, err) + continue + } + var rx Int + if err := json.Unmarshal(b, &rx); err != nil { + t.Errorf("unmarshaling of %s failed: %s", &tx, err) + continue + } + if rx.Cmp(&tx) != 0 { + t.Errorf("JSON encoding of %s failed: got %s want %s", &tx, &rx, &tx) + } + } + } +} + +func TestIntXMLEncoding(t *testing.T) { + for _, test := range encodingTests { + for _, sign := range []string{"", "+", "-"} { + x := sign + test + var tx Int + tx.SetString(x, 0) + b, err := xml.Marshal(&tx) + if err != nil { + t.Errorf("marshaling of %s failed: %s", &tx, err) + continue + } + var rx Int + if err := xml.Unmarshal(b, &rx); err != nil { + t.Errorf("unmarshaling of %s failed: %s", &tx, err) + continue + } + if rx.Cmp(&tx) != 0 { + t.Errorf("XML encoding of %s failed: got %s want %s", &tx, &rx, &tx) + } + } + } +} diff --git a/src/math/big/link_test.go b/src/math/big/link_test.go new file mode 100644 index 0000000..6e33aa5 --- /dev/null +++ b/src/math/big/link_test.go @@ -0,0 +1,63 @@ +// Copyright 2020 The Go Authors. All rights reserved. +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. + +package big + +import ( + "bytes" + "internal/testenv" + "os" + "os/exec" + "path/filepath" + "testing" +) + +// Tests that the linker is able to remove references to Float, Rat, +// and Int if unused (notably, not used by init). +func TestLinkerGC(t *testing.T) { + if testing.Short() { + t.Skip("skipping in short mode") + } + t.Parallel() + tmp := t.TempDir() + goBin := testenv.GoToolPath(t) + goFile := filepath.Join(tmp, "x.go") + file := []byte(`package main +import _ "math/big" +func main() {} +`) + if err := os.WriteFile(goFile, file, 0644); err != nil { + t.Fatal(err) + } + cmd := exec.Command(goBin, "build", "-o", "x.exe", "x.go") + cmd.Dir = tmp + if out, err := cmd.CombinedOutput(); err != nil { + t.Fatalf("compile: %v, %s", err, out) + } + + cmd = exec.Command(goBin, "tool", "nm", "x.exe") + cmd.Dir = tmp + nm, err := cmd.CombinedOutput() + if err != nil { + t.Fatalf("nm: %v, %s", err, nm) + } + const want = "runtime.main" + if !bytes.Contains(nm, []byte(want)) { + // Test the test. + t.Errorf("expected symbol %q not found", want) + } + bad := []string{ + "math/big.(*Float)", + "math/big.(*Rat)", + "math/big.(*Int)", + } + for _, sym := range bad { + if bytes.Contains(nm, []byte(sym)) { + t.Errorf("unexpected symbol %q found", sym) + } + } + if t.Failed() { + t.Logf("Got: %s", nm) + } +} diff --git a/src/math/big/nat.go b/src/math/big/nat.go new file mode 100644 index 0000000..140c619 --- /dev/null +++ b/src/math/big/nat.go @@ -0,0 +1,1244 @@ +// Copyright 2009 The Go Authors. All rights reserved. +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. + +// This file implements unsigned multi-precision integers (natural +// numbers). They are the building blocks for the implementation +// of signed integers, rationals, and floating-point numbers. +// +// Caution: This implementation relies on the function "alias" +// which assumes that (nat) slice capacities are never +// changed (no 3-operand slice expressions). If that +// changes, alias needs to be updated for correctness. + +package big + +import ( + "encoding/binary" + "math/bits" + "math/rand" + "sync" +) + +// An unsigned integer x of the form +// +// x = x[n-1]*_B^(n-1) + x[n-2]*_B^(n-2) + ... + x[1]*_B + x[0] +// +// with 0 <= x[i] < _B and 0 <= i < n is stored in a slice of length n, +// with the digits x[i] as the slice elements. +// +// A number is normalized if the slice contains no leading 0 digits. +// During arithmetic operations, denormalized values may occur but are +// always normalized before returning the final result. The normalized +// representation of 0 is the empty or nil slice (length = 0). +// +type nat []Word + +var ( + natOne = nat{1} + natTwo = nat{2} + natFive = nat{5} + natTen = nat{10} +) + +func (z nat) clear() { + for i := range z { + z[i] = 0 + } +} + +func (z nat) norm() nat { + i := len(z) + for i > 0 && z[i-1] == 0 { + i-- + } + return z[0:i] +} + +func (z nat) make(n int) nat { + if n <= cap(z) { + return z[:n] // reuse z + } + if n == 1 { + // Most nats start small and stay that way; don't over-allocate. + return make(nat, 1) + } + // Choosing a good value for e has significant performance impact + // because it increases the chance that a value can be reused. + const e = 4 // extra capacity + return make(nat, n, n+e) +} + +func (z nat) setWord(x Word) nat { + if x == 0 { + return z[:0] + } + z = z.make(1) + z[0] = x + return z +} + +func (z nat) setUint64(x uint64) nat { + // single-word value + if w := Word(x); uint64(w) == x { + return z.setWord(w) + } + // 2-word value + z = z.make(2) + z[1] = Word(x >> 32) + z[0] = Word(x) + return z +} + +func (z nat) set(x nat) nat { + z = z.make(len(x)) + copy(z, x) + return z +} + +func (z nat) add(x, y nat) nat { + m := len(x) + n := len(y) + + switch { + case m < n: + return z.add(y, x) + case m == 0: + // n == 0 because m >= n; result is 0 + return z[:0] + case n == 0: + // result is x + return z.set(x) + } + // m > 0 + + z = z.make(m + 1) + c := addVV(z[0:n], x, y) + if m > n { + c = addVW(z[n:m], x[n:], c) + } + z[m] = c + + return z.norm() +} + +func (z nat) sub(x, y nat) nat { + m := len(x) + n := len(y) + + switch { + case m < n: + panic("underflow") + case m == 0: + // n == 0 because m >= n; result is 0 + return z[:0] + case n == 0: + // result is x + return z.set(x) + } + // m > 0 + + z = z.make(m) + c := subVV(z[0:n], x, y) + if m > n { + c = subVW(z[n:], x[n:], c) + } + if c != 0 { + panic("underflow") + } + + return z.norm() +} + +func (x nat) cmp(y nat) (r int) { + m := len(x) + n := len(y) + if m != n || m == 0 { + switch { + case m < n: + r = -1 + case m > n: + r = 1 + } + return + } + + i := m - 1 + for i > 0 && x[i] == y[i] { + i-- + } + + switch { + case x[i] < y[i]: + r = -1 + case x[i] > y[i]: + r = 1 + } + return +} + +func (z nat) mulAddWW(x nat, y, r Word) nat { + m := len(x) + if m == 0 || y == 0 { + return z.setWord(r) // result is r + } + // m > 0 + + z = z.make(m + 1) + z[m] = mulAddVWW(z[0:m], x, y, r) + + return z.norm() +} + +// basicMul multiplies x and y and leaves the result in z. +// The (non-normalized) result is placed in z[0 : len(x) + len(y)]. +func basicMul(z, x, y nat) { + z[0 : len(x)+len(y)].clear() // initialize z + for i, d := range y { + if d != 0 { + z[len(x)+i] = addMulVVW(z[i:i+len(x)], x, d) + } + } +} + +// montgomery computes z mod m = x*y*2**(-n*_W) mod m, +// assuming k = -1/m mod 2**_W. +// z is used for storing the result which is returned; +// z must not alias x, y or m. +// See Gueron, "Efficient Software Implementations of Modular Exponentiation". +// https://eprint.iacr.org/2011/239.pdf +// In the terminology of that paper, this is an "Almost Montgomery Multiplication": +// x and y are required to satisfy 0 <= z < 2**(n*_W) and then the result +// z is guaranteed to satisfy 0 <= z < 2**(n*_W), but it may not be < m. +func (z nat) montgomery(x, y, m nat, k Word, n int) nat { + // This code assumes x, y, m are all the same length, n. + // (required by addMulVVW and the for loop). + // It also assumes that x, y are already reduced mod m, + // or else the result will not be properly reduced. + if len(x) != n || len(y) != n || len(m) != n { + panic("math/big: mismatched montgomery number lengths") + } + z = z.make(n * 2) + z.clear() + var c Word + for i := 0; i < n; i++ { + d := y[i] + c2 := addMulVVW(z[i:n+i], x, d) + t := z[i] * k + c3 := addMulVVW(z[i:n+i], m, t) + cx := c + c2 + cy := cx + c3 + z[n+i] = cy + if cx < c2 || cy < c3 { + c = 1 + } else { + c = 0 + } + } + if c != 0 { + subVV(z[:n], z[n:], m) + } else { + copy(z[:n], z[n:]) + } + return z[:n] +} + +// Fast version of z[0:n+n>>1].add(z[0:n+n>>1], x[0:n]) w/o bounds checks. +// Factored out for readability - do not use outside karatsuba. +func karatsubaAdd(z, x nat, n int) { + if c := addVV(z[0:n], z, x); c != 0 { + addVW(z[n:n+n>>1], z[n:], c) + } +} + +// Like karatsubaAdd, but does subtract. +func karatsubaSub(z, x nat, n int) { + if c := subVV(z[0:n], z, x); c != 0 { + subVW(z[n:n+n>>1], z[n:], c) + } +} + +// Operands that are shorter than karatsubaThreshold are multiplied using +// "grade school" multiplication; for longer operands the Karatsuba algorithm +// is used. +var karatsubaThreshold = 40 // computed by calibrate_test.go + +// karatsuba multiplies x and y and leaves the result in z. +// Both x and y must have the same length n and n must be a +// power of 2. The result vector z must have len(z) >= 6*n. +// The (non-normalized) result is placed in z[0 : 2*n]. +func karatsuba(z, x, y nat) { + n := len(y) + + // Switch to basic multiplication if numbers are odd or small. + // (n is always even if karatsubaThreshold is even, but be + // conservative) + if n&1 != 0 || n < karatsubaThreshold || n < 2 { + basicMul(z, x, y) + return + } + // n&1 == 0 && n >= karatsubaThreshold && n >= 2 + + // Karatsuba multiplication is based on the observation that + // for two numbers x and y with: + // + // x = x1*b + x0 + // y = y1*b + y0 + // + // the product x*y can be obtained with 3 products z2, z1, z0 + // instead of 4: + // + // x*y = x1*y1*b*b + (x1*y0 + x0*y1)*b + x0*y0 + // = z2*b*b + z1*b + z0 + // + // with: + // + // xd = x1 - x0 + // yd = y0 - y1 + // + // z1 = xd*yd + z2 + z0 + // = (x1-x0)*(y0 - y1) + z2 + z0 + // = x1*y0 - x1*y1 - x0*y0 + x0*y1 + z2 + z0 + // = x1*y0 - z2 - z0 + x0*y1 + z2 + z0 + // = x1*y0 + x0*y1 + + // split x, y into "digits" + n2 := n >> 1 // n2 >= 1 + x1, x0 := x[n2:], x[0:n2] // x = x1*b + y0 + y1, y0 := y[n2:], y[0:n2] // y = y1*b + y0 + + // z is used for the result and temporary storage: + // + // 6*n 5*n 4*n 3*n 2*n 1*n 0*n + // z = [z2 copy|z0 copy| xd*yd | yd:xd | x1*y1 | x0*y0 ] + // + // For each recursive call of karatsuba, an unused slice of + // z is passed in that has (at least) half the length of the + // caller's z. + + // compute z0 and z2 with the result "in place" in z + karatsuba(z, x0, y0) // z0 = x0*y0 + karatsuba(z[n:], x1, y1) // z2 = x1*y1 + + // compute xd (or the negative value if underflow occurs) + s := 1 // sign of product xd*yd + xd := z[2*n : 2*n+n2] + if subVV(xd, x1, x0) != 0 { // x1-x0 + s = -s + subVV(xd, x0, x1) // x0-x1 + } + + // compute yd (or the negative value if underflow occurs) + yd := z[2*n+n2 : 3*n] + if subVV(yd, y0, y1) != 0 { // y0-y1 + s = -s + subVV(yd, y1, y0) // y1-y0 + } + + // p = (x1-x0)*(y0-y1) == x1*y0 - x1*y1 - x0*y0 + x0*y1 for s > 0 + // p = (x0-x1)*(y0-y1) == x0*y0 - x0*y1 - x1*y0 + x1*y1 for s < 0 + p := z[n*3:] + karatsuba(p, xd, yd) + + // save original z2:z0 + // (ok to use upper half of z since we're done recursing) + r := z[n*4:] + copy(r, z[:n*2]) + + // add up all partial products + // + // 2*n n 0 + // z = [ z2 | z0 ] + // + [ z0 ] + // + [ z2 ] + // + [ p ] + // + karatsubaAdd(z[n2:], r, n) + karatsubaAdd(z[n2:], r[n:], n) + if s > 0 { + karatsubaAdd(z[n2:], p, n) + } else { + karatsubaSub(z[n2:], p, n) + } +} + +// alias reports whether x and y share the same base array. +// Note: alias assumes that the capacity of underlying arrays +// is never changed for nat values; i.e. that there are +// no 3-operand slice expressions in this code (or worse, +// reflect-based operations to the same effect). +func alias(x, y nat) bool { + return cap(x) > 0 && cap(y) > 0 && &x[0:cap(x)][cap(x)-1] == &y[0:cap(y)][cap(y)-1] +} + +// addAt implements z += x<<(_W*i); z must be long enough. +// (we don't use nat.add because we need z to stay the same +// slice, and we don't need to normalize z after each addition) +func addAt(z, x nat, i int) { + if n := len(x); n > 0 { + if c := addVV(z[i:i+n], z[i:], x); c != 0 { + j := i + n + if j < len(z) { + addVW(z[j:], z[j:], c) + } + } + } +} + +func max(x, y int) int { + if x > y { + return x + } + return y +} + +// karatsubaLen computes an approximation to the maximum k <= n such that +// k = p<<i for a number p <= threshold and an i >= 0. Thus, the +// result is the largest number that can be divided repeatedly by 2 before +// becoming about the value of threshold. +func karatsubaLen(n, threshold int) int { + i := uint(0) + for n > threshold { + n >>= 1 + i++ + } + return n << i +} + +func (z nat) mul(x, y nat) nat { + m := len(x) + n := len(y) + + switch { + case m < n: + return z.mul(y, x) + case m == 0 || n == 0: + return z[:0] + case n == 1: + return z.mulAddWW(x, y[0], 0) + } + // m >= n > 1 + + // determine if z can be reused + if alias(z, x) || alias(z, y) { + z = nil // z is an alias for x or y - cannot reuse + } + + // use basic multiplication if the numbers are small + if n < karatsubaThreshold { + z = z.make(m + n) + basicMul(z, x, y) + return z.norm() + } + // m >= n && n >= karatsubaThreshold && n >= 2 + + // determine Karatsuba length k such that + // + // x = xh*b + x0 (0 <= x0 < b) + // y = yh*b + y0 (0 <= y0 < b) + // b = 1<<(_W*k) ("base" of digits xi, yi) + // + k := karatsubaLen(n, karatsubaThreshold) + // k <= n + + // multiply x0 and y0 via Karatsuba + x0 := x[0:k] // x0 is not normalized + y0 := y[0:k] // y0 is not normalized + z = z.make(max(6*k, m+n)) // enough space for karatsuba of x0*y0 and full result of x*y + karatsuba(z, x0, y0) + z = z[0 : m+n] // z has final length but may be incomplete + z[2*k:].clear() // upper portion of z is garbage (and 2*k <= m+n since k <= n <= m) + + // If xh != 0 or yh != 0, add the missing terms to z. For + // + // xh = xi*b^i + ... + x2*b^2 + x1*b (0 <= xi < b) + // yh = y1*b (0 <= y1 < b) + // + // the missing terms are + // + // x0*y1*b and xi*y0*b^i, xi*y1*b^(i+1) for i > 0 + // + // since all the yi for i > 1 are 0 by choice of k: If any of them + // were > 0, then yh >= b^2 and thus y >= b^2. Then k' = k*2 would + // be a larger valid threshold contradicting the assumption about k. + // + if k < n || m != n { + tp := getNat(3 * k) + t := *tp + + // add x0*y1*b + x0 := x0.norm() + y1 := y[k:] // y1 is normalized because y is + t = t.mul(x0, y1) // update t so we don't lose t's underlying array + addAt(z, t, k) + + // add xi*y0<<i, xi*y1*b<<(i+k) + y0 := y0.norm() + for i := k; i < len(x); i += k { + xi := x[i:] + if len(xi) > k { + xi = xi[:k] + } + xi = xi.norm() + t = t.mul(xi, y0) + addAt(z, t, i) + t = t.mul(xi, y1) + addAt(z, t, i+k) + } + + putNat(tp) + } + + return z.norm() +} + +// basicSqr sets z = x*x and is asymptotically faster than basicMul +// by about a factor of 2, but slower for small arguments due to overhead. +// Requirements: len(x) > 0, len(z) == 2*len(x) +// The (non-normalized) result is placed in z. +func basicSqr(z, x nat) { + n := len(x) + tp := getNat(2 * n) + t := *tp // temporary variable to hold the products + t.clear() + z[1], z[0] = mulWW(x[0], x[0]) // the initial square + for i := 1; i < n; i++ { + d := x[i] + // z collects the squares x[i] * x[i] + z[2*i+1], z[2*i] = mulWW(d, d) + // t collects the products x[i] * x[j] where j < i + t[2*i] = addMulVVW(t[i:2*i], x[0:i], d) + } + t[2*n-1] = shlVU(t[1:2*n-1], t[1:2*n-1], 1) // double the j < i products + addVV(z, z, t) // combine the result + putNat(tp) +} + +// karatsubaSqr squares x and leaves the result in z. +// len(x) must be a power of 2 and len(z) >= 6*len(x). +// The (non-normalized) result is placed in z[0 : 2*len(x)]. +// +// The algorithm and the layout of z are the same as for karatsuba. +func karatsubaSqr(z, x nat) { + n := len(x) + + if n&1 != 0 || n < karatsubaSqrThreshold || n < 2 { + basicSqr(z[:2*n], x) + return + } + + n2 := n >> 1 + x1, x0 := x[n2:], x[0:n2] + + karatsubaSqr(z, x0) + karatsubaSqr(z[n:], x1) + + // s = sign(xd*yd) == -1 for xd != 0; s == 1 for xd == 0 + xd := z[2*n : 2*n+n2] + if subVV(xd, x1, x0) != 0 { + subVV(xd, x0, x1) + } + + p := z[n*3:] + karatsubaSqr(p, xd) + + r := z[n*4:] + copy(r, z[:n*2]) + + karatsubaAdd(z[n2:], r, n) + karatsubaAdd(z[n2:], r[n:], n) + karatsubaSub(z[n2:], p, n) // s == -1 for p != 0; s == 1 for p == 0 +} + +// Operands that are shorter than basicSqrThreshold are squared using +// "grade school" multiplication; for operands longer than karatsubaSqrThreshold +// we use the Karatsuba algorithm optimized for x == y. +var basicSqrThreshold = 20 // computed by calibrate_test.go +var karatsubaSqrThreshold = 260 // computed by calibrate_test.go + +// z = x*x +func (z nat) sqr(x nat) nat { + n := len(x) + switch { + case n == 0: + return z[:0] + case n == 1: + d := x[0] + z = z.make(2) + z[1], z[0] = mulWW(d, d) + return z.norm() + } + + if alias(z, x) { + z = nil // z is an alias for x - cannot reuse + } + + if n < basicSqrThreshold { + z = z.make(2 * n) + basicMul(z, x, x) + return z.norm() + } + if n < karatsubaSqrThreshold { + z = z.make(2 * n) + basicSqr(z, x) + return z.norm() + } + + // Use Karatsuba multiplication optimized for x == y. + // The algorithm and layout of z are the same as for mul. + + // z = (x1*b + x0)^2 = x1^2*b^2 + 2*x1*x0*b + x0^2 + + k := karatsubaLen(n, karatsubaSqrThreshold) + + x0 := x[0:k] + z = z.make(max(6*k, 2*n)) + karatsubaSqr(z, x0) // z = x0^2 + z = z[0 : 2*n] + z[2*k:].clear() + + if k < n { + tp := getNat(2 * k) + t := *tp + x0 := x0.norm() + x1 := x[k:] + t = t.mul(x0, x1) + addAt(z, t, k) + addAt(z, t, k) // z = 2*x1*x0*b + x0^2 + t = t.sqr(x1) + addAt(z, t, 2*k) // z = x1^2*b^2 + 2*x1*x0*b + x0^2 + putNat(tp) + } + + return z.norm() +} + +// mulRange computes the product of all the unsigned integers in the +// range [a, b] inclusively. If a > b (empty range), the result is 1. +func (z nat) mulRange(a, b uint64) nat { + switch { + case a == 0: + // cut long ranges short (optimization) + return z.setUint64(0) + case a > b: + return z.setUint64(1) + case a == b: + return z.setUint64(a) + case a+1 == b: + return z.mul(nat(nil).setUint64(a), nat(nil).setUint64(b)) + } + m := (a + b) / 2 + return z.mul(nat(nil).mulRange(a, m), nat(nil).mulRange(m+1, b)) +} + +// getNat returns a *nat of len n. The contents may not be zero. +// The pool holds *nat to avoid allocation when converting to interface{}. +func getNat(n int) *nat { + var z *nat + if v := natPool.Get(); v != nil { + z = v.(*nat) + } + if z == nil { + z = new(nat) + } + *z = z.make(n) + return z +} + +func putNat(x *nat) { + natPool.Put(x) +} + +var natPool sync.Pool + +// Length of x in bits. x must be normalized. +func (x nat) bitLen() int { + if i := len(x) - 1; i >= 0 { + return i*_W + bits.Len(uint(x[i])) + } + return 0 +} + +// trailingZeroBits returns the number of consecutive least significant zero +// bits of x. +func (x nat) trailingZeroBits() uint { + if len(x) == 0 { + return 0 + } + var i uint + for x[i] == 0 { + i++ + } + // x[i] != 0 + return i*_W + uint(bits.TrailingZeros(uint(x[i]))) +} + +func same(x, y nat) bool { + return len(x) == len(y) && len(x) > 0 && &x[0] == &y[0] +} + +// z = x << s +func (z nat) shl(x nat, s uint) nat { + if s == 0 { + if same(z, x) { + return z + } + if !alias(z, x) { + return z.set(x) + } + } + + m := len(x) + if m == 0 { + return z[:0] + } + // m > 0 + + n := m + int(s/_W) + z = z.make(n + 1) + z[n] = shlVU(z[n-m:n], x, s%_W) + z[0 : n-m].clear() + + return z.norm() +} + +// z = x >> s +func (z nat) shr(x nat, s uint) nat { + if s == 0 { + if same(z, x) { + return z + } + if !alias(z, x) { + return z.set(x) + } + } + + m := len(x) + n := m - int(s/_W) + if n <= 0 { + return z[:0] + } + // n > 0 + + z = z.make(n) + shrVU(z, x[m-n:], s%_W) + + return z.norm() +} + +func (z nat) setBit(x nat, i uint, b uint) nat { + j := int(i / _W) + m := Word(1) << (i % _W) + n := len(x) + switch b { + case 0: + z = z.make(n) + copy(z, x) + if j >= n { + // no need to grow + return z + } + z[j] &^= m + return z.norm() + case 1: + if j >= n { + z = z.make(j + 1) + z[n:].clear() + } else { + z = z.make(n) + } + copy(z, x) + z[j] |= m + // no need to normalize + return z + } + panic("set bit is not 0 or 1") +} + +// bit returns the value of the i'th bit, with lsb == bit 0. +func (x nat) bit(i uint) uint { + j := i / _W + if j >= uint(len(x)) { + return 0 + } + // 0 <= j < len(x) + return uint(x[j] >> (i % _W) & 1) +} + +// sticky returns 1 if there's a 1 bit within the +// i least significant bits, otherwise it returns 0. +func (x nat) sticky(i uint) uint { + j := i / _W + if j >= uint(len(x)) { + if len(x) == 0 { + return 0 + } + return 1 + } + // 0 <= j < len(x) + for _, x := range x[:j] { + if x != 0 { + return 1 + } + } + if x[j]<<(_W-i%_W) != 0 { + return 1 + } + return 0 +} + +func (z nat) and(x, y nat) nat { + m := len(x) + n := len(y) + if m > n { + m = n + } + // m <= n + + z = z.make(m) + for i := 0; i < m; i++ { + z[i] = x[i] & y[i] + } + + return z.norm() +} + +func (z nat) andNot(x, y nat) nat { + m := len(x) + n := len(y) + if n > m { + n = m + } + // m >= n + + z = z.make(m) + for i := 0; i < n; i++ { + z[i] = x[i] &^ y[i] + } + copy(z[n:m], x[n:m]) + + return z.norm() +} + +func (z nat) or(x, y nat) nat { + m := len(x) + n := len(y) + s := x + if m < n { + n, m = m, n + s = y + } + // m >= n + + z = z.make(m) + for i := 0; i < n; i++ { + z[i] = x[i] | y[i] + } + copy(z[n:m], s[n:m]) + + return z.norm() +} + +func (z nat) xor(x, y nat) nat { + m := len(x) + n := len(y) + s := x + if m < n { + n, m = m, n + s = y + } + // m >= n + + z = z.make(m) + for i := 0; i < n; i++ { + z[i] = x[i] ^ y[i] + } + copy(z[n:m], s[n:m]) + + return z.norm() +} + +// random creates a random integer in [0..limit), using the space in z if +// possible. n is the bit length of limit. +func (z nat) random(rand *rand.Rand, limit nat, n int) nat { + if alias(z, limit) { + z = nil // z is an alias for limit - cannot reuse + } + z = z.make(len(limit)) + + bitLengthOfMSW := uint(n % _W) + if bitLengthOfMSW == 0 { + bitLengthOfMSW = _W + } + mask := Word((1 << bitLengthOfMSW) - 1) + + for { + switch _W { + case 32: + for i := range z { + z[i] = Word(rand.Uint32()) + } + case 64: + for i := range z { + z[i] = Word(rand.Uint32()) | Word(rand.Uint32())<<32 + } + default: + panic("unknown word size") + } + z[len(limit)-1] &= mask + if z.cmp(limit) < 0 { + break + } + } + + return z.norm() +} + +// If m != 0 (i.e., len(m) != 0), expNN sets z to x**y mod m; +// otherwise it sets z to x**y. The result is the value of z. +func (z nat) expNN(x, y, m nat) nat { + if alias(z, x) || alias(z, y) { + // We cannot allow in-place modification of x or y. + z = nil + } + + // x**y mod 1 == 0 + if len(m) == 1 && m[0] == 1 { + return z.setWord(0) + } + // m == 0 || m > 1 + + // x**0 == 1 + if len(y) == 0 { + return z.setWord(1) + } + // y > 0 + + // x**1 mod m == x mod m + if len(y) == 1 && y[0] == 1 && len(m) != 0 { + _, z = nat(nil).div(z, x, m) + return z + } + // y > 1 + + if len(m) != 0 { + // We likely end up being as long as the modulus. + z = z.make(len(m)) + } + z = z.set(x) + + // If the base is non-trivial and the exponent is large, we use + // 4-bit, windowed exponentiation. This involves precomputing 14 values + // (x^2...x^15) but then reduces the number of multiply-reduces by a + // third. Even for a 32-bit exponent, this reduces the number of + // operations. Uses Montgomery method for odd moduli. + if x.cmp(natOne) > 0 && len(y) > 1 && len(m) > 0 { + if m[0]&1 == 1 { + return z.expNNMontgomery(x, y, m) + } + return z.expNNWindowed(x, y, m) + } + + v := y[len(y)-1] // v > 0 because y is normalized and y > 0 + shift := nlz(v) + 1 + v <<= shift + var q nat + + const mask = 1 << (_W - 1) + + // We walk through the bits of the exponent one by one. Each time we + // see a bit, we square, thus doubling the power. If the bit is a one, + // we also multiply by x, thus adding one to the power. + + w := _W - int(shift) + // zz and r are used to avoid allocating in mul and div as + // otherwise the arguments would alias. + var zz, r nat + for j := 0; j < w; j++ { + zz = zz.sqr(z) + zz, z = z, zz + + if v&mask != 0 { + zz = zz.mul(z, x) + zz, z = z, zz + } + + if len(m) != 0 { + zz, r = zz.div(r, z, m) + zz, r, q, z = q, z, zz, r + } + + v <<= 1 + } + + for i := len(y) - 2; i >= 0; i-- { + v = y[i] + + for j := 0; j < _W; j++ { + zz = zz.sqr(z) + zz, z = z, zz + + if v&mask != 0 { + zz = zz.mul(z, x) + zz, z = z, zz + } + + if len(m) != 0 { + zz, r = zz.div(r, z, m) + zz, r, q, z = q, z, zz, r + } + + v <<= 1 + } + } + + return z.norm() +} + +// expNNWindowed calculates x**y mod m using a fixed, 4-bit window. +func (z nat) expNNWindowed(x, y, m nat) nat { + // zz and r are used to avoid allocating in mul and div as otherwise + // the arguments would alias. + var zz, r nat + + const n = 4 + // powers[i] contains x^i. + var powers [1 << n]nat + powers[0] = natOne + powers[1] = x + for i := 2; i < 1<<n; i += 2 { + p2, p, p1 := &powers[i/2], &powers[i], &powers[i+1] + *p = p.sqr(*p2) + zz, r = zz.div(r, *p, m) + *p, r = r, *p + *p1 = p1.mul(*p, x) + zz, r = zz.div(r, *p1, m) + *p1, r = r, *p1 + } + + z = z.setWord(1) + + for i := len(y) - 1; i >= 0; i-- { + yi := y[i] + for j := 0; j < _W; j += n { + if i != len(y)-1 || j != 0 { + // Unrolled loop for significant performance + // gain. Use go test -bench=".*" in crypto/rsa + // to check performance before making changes. + zz = zz.sqr(z) + zz, z = z, zz + zz, r = zz.div(r, z, m) + z, r = r, z + + zz = zz.sqr(z) + zz, z = z, zz + zz, r = zz.div(r, z, m) + z, r = r, z + + zz = zz.sqr(z) + zz, z = z, zz + zz, r = zz.div(r, z, m) + z, r = r, z + + zz = zz.sqr(z) + zz, z = z, zz + zz, r = zz.div(r, z, m) + z, r = r, z + } + + zz = zz.mul(z, powers[yi>>(_W-n)]) + zz, z = z, zz + zz, r = zz.div(r, z, m) + z, r = r, z + + yi <<= n + } + } + + return z.norm() +} + +// expNNMontgomery calculates x**y mod m using a fixed, 4-bit window. +// Uses Montgomery representation. +func (z nat) expNNMontgomery(x, y, m nat) nat { + numWords := len(m) + + // We want the lengths of x and m to be equal. + // It is OK if x >= m as long as len(x) == len(m). + if len(x) > numWords { + _, x = nat(nil).div(nil, x, m) + // Note: now len(x) <= numWords, not guaranteed ==. + } + if len(x) < numWords { + rr := make(nat, numWords) + copy(rr, x) + x = rr + } + + // Ideally the precomputations would be performed outside, and reused + // k0 = -m**-1 mod 2**_W. Algorithm from: Dumas, J.G. "On Newton–Raphson + // Iteration for Multiplicative Inverses Modulo Prime Powers". + k0 := 2 - m[0] + t := m[0] - 1 + for i := 1; i < _W; i <<= 1 { + t *= t + k0 *= (t + 1) + } + k0 = -k0 + + // RR = 2**(2*_W*len(m)) mod m + RR := nat(nil).setWord(1) + zz := nat(nil).shl(RR, uint(2*numWords*_W)) + _, RR = nat(nil).div(RR, zz, m) + if len(RR) < numWords { + zz = zz.make(numWords) + copy(zz, RR) + RR = zz + } + // one = 1, with equal length to that of m + one := make(nat, numWords) + one[0] = 1 + + const n = 4 + // powers[i] contains x^i + var powers [1 << n]nat + powers[0] = powers[0].montgomery(one, RR, m, k0, numWords) + powers[1] = powers[1].montgomery(x, RR, m, k0, numWords) + for i := 2; i < 1<<n; i++ { + powers[i] = powers[i].montgomery(powers[i-1], powers[1], m, k0, numWords) + } + + // initialize z = 1 (Montgomery 1) + z = z.make(numWords) + copy(z, powers[0]) + + zz = zz.make(numWords) + + // same windowed exponent, but with Montgomery multiplications + for i := len(y) - 1; i >= 0; i-- { + yi := y[i] + for j := 0; j < _W; j += n { + if i != len(y)-1 || j != 0 { + zz = zz.montgomery(z, z, m, k0, numWords) + z = z.montgomery(zz, zz, m, k0, numWords) + zz = zz.montgomery(z, z, m, k0, numWords) + z = z.montgomery(zz, zz, m, k0, numWords) + } + zz = zz.montgomery(z, powers[yi>>(_W-n)], m, k0, numWords) + z, zz = zz, z + yi <<= n + } + } + // convert to regular number + zz = zz.montgomery(z, one, m, k0, numWords) + + // One last reduction, just in case. + // See golang.org/issue/13907. + if zz.cmp(m) >= 0 { + // Common case is m has high bit set; in that case, + // since zz is the same length as m, there can be just + // one multiple of m to remove. Just subtract. + // We think that the subtract should be sufficient in general, + // so do that unconditionally, but double-check, + // in case our beliefs are wrong. + // The div is not expected to be reached. + zz = zz.sub(zz, m) + if zz.cmp(m) >= 0 { + _, zz = nat(nil).div(nil, zz, m) + } + } + + return zz.norm() +} + +// bytes writes the value of z into buf using big-endian encoding. +// The value of z is encoded in the slice buf[i:]. If the value of z +// cannot be represented in buf, bytes panics. The number i of unused +// bytes at the beginning of buf is returned as result. +func (z nat) bytes(buf []byte) (i int) { + i = len(buf) + for _, d := range z { + for j := 0; j < _S; j++ { + i-- + if i >= 0 { + buf[i] = byte(d) + } else if byte(d) != 0 { + panic("math/big: buffer too small to fit value") + } + d >>= 8 + } + } + + if i < 0 { + i = 0 + } + for i < len(buf) && buf[i] == 0 { + i++ + } + + return +} + +// bigEndianWord returns the contents of buf interpreted as a big-endian encoded Word value. +func bigEndianWord(buf []byte) Word { + if _W == 64 { + return Word(binary.BigEndian.Uint64(buf)) + } + return Word(binary.BigEndian.Uint32(buf)) +} + +// setBytes interprets buf as the bytes of a big-endian unsigned +// integer, sets z to that value, and returns z. +func (z nat) setBytes(buf []byte) nat { + z = z.make((len(buf) + _S - 1) / _S) + + i := len(buf) + for k := 0; i >= _S; k++ { + z[k] = bigEndianWord(buf[i-_S : i]) + i -= _S + } + if i > 0 { + var d Word + for s := uint(0); i > 0; s += 8 { + d |= Word(buf[i-1]) << s + i-- + } + z[len(z)-1] = d + } + + return z.norm() +} + +// sqrt sets z = ⌊√x⌋ +func (z nat) sqrt(x nat) nat { + if x.cmp(natOne) <= 0 { + return z.set(x) + } + if alias(z, x) { + z = nil + } + + // Start with value known to be too large and repeat "z = ⌊(z + ⌊x/z⌋)/2⌋" until it stops getting smaller. + // See Brent and Zimmermann, Modern Computer Arithmetic, Algorithm 1.13 (SqrtInt). + // https://members.loria.fr/PZimmermann/mca/pub226.html + // If x is one less than a perfect square, the sequence oscillates between the correct z and z+1; + // otherwise it converges to the correct z and stays there. + var z1, z2 nat + z1 = z + z1 = z1.setUint64(1) + z1 = z1.shl(z1, uint(x.bitLen()+1)/2) // must be ≥ √x + for n := 0; ; n++ { + z2, _ = z2.div(nil, x, z1) + z2 = z2.add(z2, z1) + z2 = z2.shr(z2, 1) + if z2.cmp(z1) >= 0 { + // z1 is answer. + // Figure out whether z1 or z2 is currently aliased to z by looking at loop count. + if n&1 == 0 { + return z1 + } + return z.set(z1) + } + z1, z2 = z2, z1 + } +} diff --git a/src/math/big/nat_test.go b/src/math/big/nat_test.go new file mode 100644 index 0000000..0850818 --- /dev/null +++ b/src/math/big/nat_test.go @@ -0,0 +1,816 @@ +// Copyright 2009 The Go Authors. All rights reserved. +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. + +package big + +import ( + "fmt" + "runtime" + "strings" + "testing" +) + +var cmpTests = []struct { + x, y nat + r int +}{ + {nil, nil, 0}, + {nil, nat(nil), 0}, + {nat(nil), nil, 0}, + {nat(nil), nat(nil), 0}, + {nat{0}, nat{0}, 0}, + {nat{0}, nat{1}, -1}, + {nat{1}, nat{0}, 1}, + {nat{1}, nat{1}, 0}, + {nat{0, _M}, nat{1}, 1}, + {nat{1}, nat{0, _M}, -1}, + {nat{1, _M}, nat{0, _M}, 1}, + {nat{0, _M}, nat{1, _M}, -1}, + {nat{16, 571956, 8794, 68}, nat{837, 9146, 1, 754489}, -1}, + {nat{34986, 41, 105, 1957}, nat{56, 7458, 104, 1957}, 1}, +} + +func TestCmp(t *testing.T) { + for i, a := range cmpTests { + r := a.x.cmp(a.y) + if r != a.r { + t.Errorf("#%d got r = %v; want %v", i, r, a.r) + } + } +} + +type funNN func(z, x, y nat) nat +type argNN struct { + z, x, y nat +} + +var sumNN = []argNN{ + {}, + {nat{1}, nil, nat{1}}, + {nat{1111111110}, nat{123456789}, nat{987654321}}, + {nat{0, 0, 0, 1}, nil, nat{0, 0, 0, 1}}, + {nat{0, 0, 0, 1111111110}, nat{0, 0, 0, 123456789}, nat{0, 0, 0, 987654321}}, + {nat{0, 0, 0, 1}, nat{0, 0, _M}, nat{0, 0, 1}}, +} + +var prodNN = []argNN{ + {}, + {nil, nil, nil}, + {nil, nat{991}, nil}, + {nat{991}, nat{991}, nat{1}}, + {nat{991 * 991}, nat{991}, nat{991}}, + {nat{0, 0, 991 * 991}, nat{0, 991}, nat{0, 991}}, + {nat{1 * 991, 2 * 991, 3 * 991, 4 * 991}, nat{1, 2, 3, 4}, nat{991}}, + {nat{4, 11, 20, 30, 20, 11, 4}, nat{1, 2, 3, 4}, nat{4, 3, 2, 1}}, + // 3^100 * 3^28 = 3^128 + { + natFromString("11790184577738583171520872861412518665678211592275841109096961"), + natFromString("515377520732011331036461129765621272702107522001"), + natFromString("22876792454961"), + }, + // z = 111....1 (70000 digits) + // x = 10^(99*700) + ... + 10^1400 + 10^700 + 1 + // y = 111....1 (700 digits, larger than Karatsuba threshold on 32-bit and 64-bit) + { + natFromString(strings.Repeat("1", 70000)), + natFromString("1" + strings.Repeat(strings.Repeat("0", 699)+"1", 99)), + natFromString(strings.Repeat("1", 700)), + }, + // z = 111....1 (20000 digits) + // x = 10^10000 + 1 + // y = 111....1 (10000 digits) + { + natFromString(strings.Repeat("1", 20000)), + natFromString("1" + strings.Repeat("0", 9999) + "1"), + natFromString(strings.Repeat("1", 10000)), + }, +} + +func natFromString(s string) nat { + x, _, _, err := nat(nil).scan(strings.NewReader(s), 0, false) + if err != nil { + panic(err) + } + return x +} + +func TestSet(t *testing.T) { + for _, a := range sumNN { + z := nat(nil).set(a.z) + if z.cmp(a.z) != 0 { + t.Errorf("got z = %v; want %v", z, a.z) + } + } +} + +func testFunNN(t *testing.T, msg string, f funNN, a argNN) { + z := f(nil, a.x, a.y) + if z.cmp(a.z) != 0 { + t.Errorf("%s%+v\n\tgot z = %v; want %v", msg, a, z, a.z) + } +} + +func TestFunNN(t *testing.T) { + for _, a := range sumNN { + arg := a + testFunNN(t, "add", nat.add, arg) + + arg = argNN{a.z, a.y, a.x} + testFunNN(t, "add symmetric", nat.add, arg) + + arg = argNN{a.x, a.z, a.y} + testFunNN(t, "sub", nat.sub, arg) + + arg = argNN{a.y, a.z, a.x} + testFunNN(t, "sub symmetric", nat.sub, arg) + } + + for _, a := range prodNN { + arg := a + testFunNN(t, "mul", nat.mul, arg) + + arg = argNN{a.z, a.y, a.x} + testFunNN(t, "mul symmetric", nat.mul, arg) + } +} + +var mulRangesN = []struct { + a, b uint64 + prod string +}{ + {0, 0, "0"}, + {1, 1, "1"}, + {1, 2, "2"}, + {1, 3, "6"}, + {10, 10, "10"}, + {0, 100, "0"}, + {0, 1e9, "0"}, + {1, 0, "1"}, // empty range + {100, 1, "1"}, // empty range + {1, 10, "3628800"}, // 10! + {1, 20, "2432902008176640000"}, // 20! + {1, 100, + "933262154439441526816992388562667004907159682643816214685929" + + "638952175999932299156089414639761565182862536979208272237582" + + "51185210916864000000000000000000000000", // 100! + }, +} + +func TestMulRangeN(t *testing.T) { + for i, r := range mulRangesN { + prod := string(nat(nil).mulRange(r.a, r.b).utoa(10)) + if prod != r.prod { + t.Errorf("#%d: got %s; want %s", i, prod, r.prod) + } + } +} + +// allocBytes returns the number of bytes allocated by invoking f. +func allocBytes(f func()) uint64 { + var stats runtime.MemStats + runtime.ReadMemStats(&stats) + t := stats.TotalAlloc + f() + runtime.ReadMemStats(&stats) + return stats.TotalAlloc - t +} + +// TestMulUnbalanced tests that multiplying numbers of different lengths +// does not cause deep recursion and in turn allocate too much memory. +// Test case for issue 3807. +func TestMulUnbalanced(t *testing.T) { + defer runtime.GOMAXPROCS(runtime.GOMAXPROCS(1)) + x := rndNat(50000) + y := rndNat(40) + allocSize := allocBytes(func() { + nat(nil).mul(x, y) + }) + inputSize := uint64(len(x)+len(y)) * _S + if ratio := allocSize / uint64(inputSize); ratio > 10 { + t.Errorf("multiplication uses too much memory (%d > %d times the size of inputs)", allocSize, ratio) + } +} + +// rndNat returns a random nat value >= 0 of (usually) n words in length. +// In extremely unlikely cases it may be smaller than n words if the top- +// most words are 0. +func rndNat(n int) nat { + return nat(rndV(n)).norm() +} + +// rndNat1 is like rndNat but the result is guaranteed to be > 0. +func rndNat1(n int) nat { + x := nat(rndV(n)).norm() + if len(x) == 0 { + x.setWord(1) + } + return x +} + +func BenchmarkMul(b *testing.B) { + mulx := rndNat(1e4) + muly := rndNat(1e4) + b.ResetTimer() + for i := 0; i < b.N; i++ { + var z nat + z.mul(mulx, muly) + } +} + +func benchmarkNatMul(b *testing.B, nwords int) { + x := rndNat(nwords) + y := rndNat(nwords) + var z nat + b.ResetTimer() + for i := 0; i < b.N; i++ { + z.mul(x, y) + } +} + +var mulBenchSizes = []int{10, 100, 1000, 10000, 100000} + +func BenchmarkNatMul(b *testing.B) { + for _, n := range mulBenchSizes { + if isRaceBuilder && n > 1e3 { + continue + } + b.Run(fmt.Sprintf("%d", n), func(b *testing.B) { + benchmarkNatMul(b, n) + }) + } +} + +func TestNLZ(t *testing.T) { + var x Word = _B >> 1 + for i := 0; i <= _W; i++ { + if int(nlz(x)) != i { + t.Errorf("failed at %x: got %d want %d", x, nlz(x), i) + } + x >>= 1 + } +} + +type shiftTest struct { + in nat + shift uint + out nat +} + +var leftShiftTests = []shiftTest{ + {nil, 0, nil}, + {nil, 1, nil}, + {natOne, 0, natOne}, + {natOne, 1, natTwo}, + {nat{1 << (_W - 1)}, 1, nat{0}}, + {nat{1 << (_W - 1), 0}, 1, nat{0, 1}}, +} + +func TestShiftLeft(t *testing.T) { + for i, test := range leftShiftTests { + var z nat + z = z.shl(test.in, test.shift) + for j, d := range test.out { + if j >= len(z) || z[j] != d { + t.Errorf("#%d: got: %v want: %v", i, z, test.out) + break + } + } + } +} + +var rightShiftTests = []shiftTest{ + {nil, 0, nil}, + {nil, 1, nil}, + {natOne, 0, natOne}, + {natOne, 1, nil}, + {natTwo, 1, natOne}, + {nat{0, 1}, 1, nat{1 << (_W - 1)}}, + {nat{2, 1, 1}, 1, nat{1<<(_W-1) + 1, 1 << (_W - 1)}}, +} + +func TestShiftRight(t *testing.T) { + for i, test := range rightShiftTests { + var z nat + z = z.shr(test.in, test.shift) + for j, d := range test.out { + if j >= len(z) || z[j] != d { + t.Errorf("#%d: got: %v want: %v", i, z, test.out) + break + } + } + } +} + +func BenchmarkZeroShifts(b *testing.B) { + x := rndNat(800) + + b.Run("Shl", func(b *testing.B) { + for i := 0; i < b.N; i++ { + var z nat + z.shl(x, 0) + } + }) + b.Run("ShlSame", func(b *testing.B) { + for i := 0; i < b.N; i++ { + x.shl(x, 0) + } + }) + + b.Run("Shr", func(b *testing.B) { + for i := 0; i < b.N; i++ { + var z nat + z.shr(x, 0) + } + }) + b.Run("ShrSame", func(b *testing.B) { + for i := 0; i < b.N; i++ { + x.shr(x, 0) + } + }) +} + +type modWTest struct { + in string + dividend string + out string +} + +var modWTests32 = []modWTest{ + {"23492635982634928349238759823742", "252341", "220170"}, +} + +var modWTests64 = []modWTest{ + {"6527895462947293856291561095690465243862946", "524326975699234", "375066989628668"}, +} + +func runModWTests(t *testing.T, tests []modWTest) { + for i, test := range tests { + in, _ := new(Int).SetString(test.in, 10) + d, _ := new(Int).SetString(test.dividend, 10) + out, _ := new(Int).SetString(test.out, 10) + + r := in.abs.modW(d.abs[0]) + if r != out.abs[0] { + t.Errorf("#%d failed: got %d want %s", i, r, out) + } + } +} + +func TestModW(t *testing.T) { + if _W >= 32 { + runModWTests(t, modWTests32) + } + if _W >= 64 { + runModWTests(t, modWTests64) + } +} + +var montgomeryTests = []struct { + x, y, m string + k0 uint64 + out32, out64 string +}{ + { + "0xffffffffffffffffffffffffffffffffffffffffffffffffe", + "0xffffffffffffffffffffffffffffffffffffffffffffffffe", + "0xfffffffffffffffffffffffffffffffffffffffffffffffff", + 1, + "0x1000000000000000000000000000000000000000000", + "0x10000000000000000000000000000000000", + }, + { + "0x000000000ffffff5", + "0x000000000ffffff0", + "0x0000000010000001", + 0xff0000000fffffff, + "0x000000000bfffff4", + "0x0000000003400001", + }, + { + "0x0000000080000000", + "0x00000000ffffffff", + "0x1000000000000001", + 0xfffffffffffffff, + "0x0800000008000001", + "0x0800000008000001", + }, + { + "0x0000000080000000", + "0x0000000080000000", + "0xffffffff00000001", + 0xfffffffeffffffff, + "0xbfffffff40000001", + "0xbfffffff40000001", + }, + { + "0x0000000080000000", + "0x0000000080000000", + "0x00ffffff00000001", + 0xfffffeffffffff, + "0xbfffff40000001", + "0xbfffff40000001", + }, + { + "0x0000000080000000", + "0x0000000080000000", + "0x0000ffff00000001", + 0xfffeffffffff, + "0xbfff40000001", + "0xbfff40000001", + }, + { + "0x3321ffffffffffffffffffffffffffff00000000000022222623333333332bbbb888c0", + "0x3321ffffffffffffffffffffffffffff00000000000022222623333333332bbbb888c0", + "0x33377fffffffffffffffffffffffffffffffffffffffffffff0000000000022222eee1", + 0xdecc8f1249812adf, + "0x04eb0e11d72329dc0915f86784820fc403275bf2f6620a20e0dd344c5cd0875e50deb5", + "0x0d7144739a7d8e11d72329dc0915f86784820fc403275bf2f61ed96f35dd34dbb3d6a0", + }, + { + "0x10000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000ffffffffffffffffffffffffffffffff00000000000022222223333333333444444444", + "0x10000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000ffffffffffffffffffffffffffffffff999999999999999aaabbbbbbbbcccccccccccc", + "0xffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffff33377fffffffffffffffffffffffffffffffffffffffffffff0000000000022222eee1", + 0xdecc8f1249812adf, + "0x5c0d52f451aec609b15da8e5e5626c4eaa88723bdeac9d25ca9b961269400410ca208a16af9c2fb07d7a11c7772cba02c22f9711078d51a3797eb18e691295293284d988e349fa6deba46b25a4ecd9f715", + "0x92fcad4b5c0d52f451aec609b15da8e5e5626c4eaa88723bdeac9d25ca9b961269400410ca208a16af9c2fb07d799c32fe2f3cc5422f9711078d51a3797eb18e691295293284d8f5e69caf6decddfe1df6", + }, +} + +func TestMontgomery(t *testing.T) { + one := NewInt(1) + _B := new(Int).Lsh(one, _W) + for i, test := range montgomeryTests { + x := natFromString(test.x) + y := natFromString(test.y) + m := natFromString(test.m) + for len(x) < len(m) { + x = append(x, 0) + } + for len(y) < len(m) { + y = append(y, 0) + } + + if x.cmp(m) > 0 { + _, r := nat(nil).div(nil, x, m) + t.Errorf("#%d: x > m (0x%s > 0x%s; use 0x%s)", i, x.utoa(16), m.utoa(16), r.utoa(16)) + } + if y.cmp(m) > 0 { + _, r := nat(nil).div(nil, x, m) + t.Errorf("#%d: y > m (0x%s > 0x%s; use 0x%s)", i, y.utoa(16), m.utoa(16), r.utoa(16)) + } + + var out nat + if _W == 32 { + out = natFromString(test.out32) + } else { + out = natFromString(test.out64) + } + + // t.Logf("#%d: len=%d\n", i, len(m)) + + // check output in table + xi := &Int{abs: x} + yi := &Int{abs: y} + mi := &Int{abs: m} + p := new(Int).Mod(new(Int).Mul(xi, new(Int).Mul(yi, new(Int).ModInverse(new(Int).Lsh(one, uint(len(m))*_W), mi))), mi) + if out.cmp(p.abs.norm()) != 0 { + t.Errorf("#%d: out in table=0x%s, computed=0x%s", i, out.utoa(16), p.abs.norm().utoa(16)) + } + + // check k0 in table + k := new(Int).Mod(&Int{abs: m}, _B) + k = new(Int).Sub(_B, k) + k = new(Int).Mod(k, _B) + k0 := Word(new(Int).ModInverse(k, _B).Uint64()) + if k0 != Word(test.k0) { + t.Errorf("#%d: k0 in table=%#x, computed=%#x\n", i, test.k0, k0) + } + + // check montgomery with correct k0 produces correct output + z := nat(nil).montgomery(x, y, m, k0, len(m)) + z = z.norm() + if z.cmp(out) != 0 { + t.Errorf("#%d: got 0x%s want 0x%s", i, z.utoa(16), out.utoa(16)) + } + } +} + +var expNNTests = []struct { + x, y, m string + out string +}{ + {"0", "0", "0", "1"}, + {"0", "0", "1", "0"}, + {"1", "1", "1", "0"}, + {"2", "1", "1", "0"}, + {"2", "2", "1", "0"}, + {"10", "100000000000", "1", "0"}, + {"0x8000000000000000", "2", "", "0x40000000000000000000000000000000"}, + {"0x8000000000000000", "2", "6719", "4944"}, + {"0x8000000000000000", "3", "6719", "5447"}, + {"0x8000000000000000", "1000", "6719", "1603"}, + {"0x8000000000000000", "1000000", "6719", "3199"}, + { + "2938462938472983472983659726349017249287491026512746239764525612965293865296239471239874193284792387498274256129746192347", + "298472983472983471903246121093472394872319615612417471234712061", + "29834729834729834729347290846729561262544958723956495615629569234729836259263598127342374289365912465901365498236492183464", + "23537740700184054162508175125554701713153216681790245129157191391322321508055833908509185839069455749219131480588829346291", + }, + { + "11521922904531591643048817447554701904414021819823889996244743037378330903763518501116638828335352811871131385129455853417360623007349090150042001944696604737499160174391019030572483602867266711107136838523916077674888297896995042968746762200926853379", + "426343618817810911523", + "444747819283133684179", + "42", + }, +} + +func TestExpNN(t *testing.T) { + for i, test := range expNNTests { + x := natFromString(test.x) + y := natFromString(test.y) + out := natFromString(test.out) + + var m nat + if len(test.m) > 0 { + m = natFromString(test.m) + } + + z := nat(nil).expNN(x, y, m) + if z.cmp(out) != 0 { + t.Errorf("#%d got %s want %s", i, z.utoa(10), out.utoa(10)) + } + } +} + +func BenchmarkExp3Power(b *testing.B) { + const x = 3 + for _, y := range []Word{ + 0x10, 0x40, 0x100, 0x400, 0x1000, 0x4000, 0x10000, 0x40000, 0x100000, 0x400000, + } { + b.Run(fmt.Sprintf("%#x", y), func(b *testing.B) { + var z nat + for i := 0; i < b.N; i++ { + z.expWW(x, y) + } + }) + } +} + +func fibo(n int) nat { + switch n { + case 0: + return nil + case 1: + return nat{1} + } + f0 := fibo(0) + f1 := fibo(1) + var f2 nat + for i := 1; i < n; i++ { + f2 = f2.add(f0, f1) + f0, f1, f2 = f1, f2, f0 + } + return f1 +} + +var fiboNums = []string{ + "0", + "55", + "6765", + "832040", + "102334155", + "12586269025", + "1548008755920", + "190392490709135", + "23416728348467685", + "2880067194370816120", + "354224848179261915075", +} + +func TestFibo(t *testing.T) { + for i, want := range fiboNums { + n := i * 10 + got := string(fibo(n).utoa(10)) + if got != want { + t.Errorf("fibo(%d) failed: got %s want %s", n, got, want) + } + } +} + +func BenchmarkFibo(b *testing.B) { + for i := 0; i < b.N; i++ { + fibo(1e0) + fibo(1e1) + fibo(1e2) + fibo(1e3) + fibo(1e4) + fibo(1e5) + } +} + +var bitTests = []struct { + x string + i uint + want uint +}{ + {"0", 0, 0}, + {"0", 1, 0}, + {"0", 1000, 0}, + + {"0x1", 0, 1}, + {"0x10", 0, 0}, + {"0x10", 3, 0}, + {"0x10", 4, 1}, + {"0x10", 5, 0}, + + {"0x8000000000000000", 62, 0}, + {"0x8000000000000000", 63, 1}, + {"0x8000000000000000", 64, 0}, + + {"0x3" + strings.Repeat("0", 32), 127, 0}, + {"0x3" + strings.Repeat("0", 32), 128, 1}, + {"0x3" + strings.Repeat("0", 32), 129, 1}, + {"0x3" + strings.Repeat("0", 32), 130, 0}, +} + +func TestBit(t *testing.T) { + for i, test := range bitTests { + x := natFromString(test.x) + if got := x.bit(test.i); got != test.want { + t.Errorf("#%d: %s.bit(%d) = %v; want %v", i, test.x, test.i, got, test.want) + } + } +} + +var stickyTests = []struct { + x string + i uint + want uint +}{ + {"0", 0, 0}, + {"0", 1, 0}, + {"0", 1000, 0}, + + {"0x1", 0, 0}, + {"0x1", 1, 1}, + + {"0x1350", 0, 0}, + {"0x1350", 4, 0}, + {"0x1350", 5, 1}, + + {"0x8000000000000000", 63, 0}, + {"0x8000000000000000", 64, 1}, + + {"0x1" + strings.Repeat("0", 100), 400, 0}, + {"0x1" + strings.Repeat("0", 100), 401, 1}, +} + +func TestSticky(t *testing.T) { + for i, test := range stickyTests { + x := natFromString(test.x) + if got := x.sticky(test.i); got != test.want { + t.Errorf("#%d: %s.sticky(%d) = %v; want %v", i, test.x, test.i, got, test.want) + } + if test.want == 1 { + // all subsequent i's should also return 1 + for d := uint(1); d <= 3; d++ { + if got := x.sticky(test.i + d); got != 1 { + t.Errorf("#%d: %s.sticky(%d) = %v; want %v", i, test.x, test.i+d, got, 1) + } + } + } + } +} + +func testSqr(t *testing.T, x nat) { + got := make(nat, 2*len(x)) + want := make(nat, 2*len(x)) + got = got.sqr(x) + want = want.mul(x, x) + if got.cmp(want) != 0 { + t.Errorf("basicSqr(%v), got %v, want %v", x, got, want) + } +} + +func TestSqr(t *testing.T) { + for _, a := range prodNN { + if a.x != nil { + testSqr(t, a.x) + } + if a.y != nil { + testSqr(t, a.y) + } + if a.z != nil { + testSqr(t, a.z) + } + } +} + +func benchmarkNatSqr(b *testing.B, nwords int) { + x := rndNat(nwords) + var z nat + b.ResetTimer() + for i := 0; i < b.N; i++ { + z.sqr(x) + } +} + +var sqrBenchSizes = []int{ + 1, 2, 3, 5, 8, 10, 20, 30, 50, 80, + 100, 200, 300, 500, 800, + 1000, 10000, 100000, +} + +func BenchmarkNatSqr(b *testing.B) { + for _, n := range sqrBenchSizes { + if isRaceBuilder && n > 1e3 { + continue + } + b.Run(fmt.Sprintf("%d", n), func(b *testing.B) { + benchmarkNatSqr(b, n) + }) + } +} + +func BenchmarkNatSetBytes(b *testing.B) { + const maxLength = 128 + lengths := []int{ + // No remainder: + 8, 24, maxLength, + // With remainder: + 7, 23, maxLength - 1, + } + n := make(nat, maxLength/_W) // ensure n doesn't need to grow during the test + buf := make([]byte, maxLength) + for _, l := range lengths { + b.Run(fmt.Sprint(l), func(b *testing.B) { + for i := 0; i < b.N; i++ { + n.setBytes(buf[:l]) + } + }) + } +} + +func TestNatDiv(t *testing.T) { + sizes := []int{ + 1, 2, 5, 8, 15, 25, 40, 65, 100, + 200, 500, 800, 1500, 2500, 4000, 6500, 10000, + } + for _, i := range sizes { + for _, j := range sizes { + a := rndNat1(i) + b := rndNat1(j) + // the test requires b >= 2 + if len(b) == 1 && b[0] == 1 { + b[0] = 2 + } + // choose a remainder c < b + c := rndNat1(len(b)) + if len(c) == len(b) && c[len(c)-1] >= b[len(b)-1] { + c[len(c)-1] = 0 + c = c.norm() + } + // compute x = a*b+c + x := nat(nil).mul(a, b) + x = x.add(x, c) + + var q, r nat + q, r = q.div(r, x, b) + if q.cmp(a) != 0 { + t.Fatalf("wrong quotient: got %s; want %s for %s/%s", q.utoa(10), a.utoa(10), x.utoa(10), b.utoa(10)) + } + if r.cmp(c) != 0 { + t.Fatalf("wrong remainder: got %s; want %s for %s/%s", r.utoa(10), c.utoa(10), x.utoa(10), b.utoa(10)) + } + } + } +} + +// TestIssue37499 triggers the edge case of divBasic where +// the inaccurate estimate of the first word's quotient +// happens at the very beginning of the loop. +func TestIssue37499(t *testing.T) { + // Choose u and v such that v is slightly larger than u >> N. + // This tricks divBasic into choosing 1 as the first word + // of the quotient. This works in both 32-bit and 64-bit settings. + u := natFromString("0x2b6c385a05be027f5c22005b63c42a1165b79ff510e1706b39f8489c1d28e57bb5ba4ef9fd9387a3e344402c0a453381") + v := natFromString("0x2b6c385a05be027f5c22005b63c42a1165b79ff510e1706c") + + q := nat(nil).make(8) + q.divBasic(u, v) + q = q.norm() + if s := string(q.utoa(16)); s != "fffffffffffffffffffffffffffffffffffffffffffffffb" { + t.Fatalf("incorrect quotient: %s", s) + } +} + +// TestIssue42552 triggers an edge case of recursive division +// where the first division loop is never entered, and correcting +// the remainder takes exactly two iterations in the final loop. +func TestIssue42552(t *testing.T) { + u := natFromString("0xc23b166884c3869092a520eceedeced2b00847bd256c9cf3b2c5e2227c15bd5e6ee7ef8a2f49236ad0eedf2c8a3b453cf6e0706f64285c526b372c4b1321245519d430540804a50b7ca8b6f1b34a2ec05cdbc24de7599af112d3e3c8db347e8799fe70f16e43c6566ba3aeb169463a3ecc486172deb2d9b80a3699c776e44fef20036bd946f1b4d054dd88a2c1aeb986199b0b2b7e58c42288824b74934d112fe1fc06e06b4d99fe1c5e725946b23210521e209cd507cce90b5f39a523f27e861f9e232aee50c3f585208b4573dcc0b897b6177f2ba20254fd5c50a033e849dee1b3a93bd2dc44ba8ca836cab2c2ae50e50b126284524fa0187af28628ff0face68d87709200329db1392852c8b8963fbe3d05fb1efe19f0ed5ca9fadc2f96f82187c24bb2512b2e85a66333a7e176605695211e1c8e0b9b9e82813e50654964945b1e1e66a90840396c7d10e23e47f364d2d3f660fa54598e18d1ca2ea4fe4f35a40a11f69f201c80b48eaee3e2e9b0eda63decf92bec08a70f731587d4ed0f218d5929285c8b2ccbc497e20db42de73885191fa453350335990184d8df805072f958d5354debda38f5421effaaafd6cb9b721ace74be0892d77679f62a4a126697cd35797f6858193da4ba1770c06aea2e5c59ec04b8ea26749e61b72ecdde403f3bc7e5e546cd799578cc939fa676dfd5e648576d4a06cbadb028adc2c0b461f145b2321f42e5e0f3b4fb898ecd461df07a6f5154067787bf74b5cc5c03704a1ce47494961931f0263b0aac32505102595957531a2de69dd71aac51f8a49902f81f21283dbe8e21e01e5d82517868826f86acf338d935aa6b4d5a25c8d540389b277dd9d64569d68baf0f71bd03dba45b92a7fc052601d1bd011a2fc6790a23f97c6fa5caeea040ab86841f268d39ce4f7caf01069df78bba098e04366492f0c2ac24f1bf16828752765fa523c9a4d42b71109d123e6be8c7b1ab3ccf8ea03404075fe1a9596f1bba1d267f9a7879ceece514818316c9c0583469d2367831fc42b517ea028a28df7c18d783d16ea2436cee2b15d52db68b5dfdee6b4d26f0905f9b030c911a04d078923a4136afea96eed6874462a482917353264cc9bee298f167ac65a6db4e4eda88044b39cc0b33183843eaa946564a00c3a0ab661f2c915e70bf0bb65bfbb6fa2eea20aed16bf2c1a1d00ec55fb4ff2f76b8e462ea70c19efa579c9ee78194b86708fdae66a9ce6e2cf3d366037798cfb50277ba6d2fd4866361022fd788ab7735b40b8b61d55e32243e06719e53992e9ac16c9c4b6e6933635c3c47c8f7e73e17dd54d0dd8aeba5d76de46894e7b3f9d3ec25ad78ee82297ba69905ea0fa094b8667faa2b8885e2187b3da80268aa1164761d7b0d6de206b676777348152b8ae1d4afed753bc63c739a5ca8ce7afb2b241a226bd9e502baba391b5b13f5054f070b65a9cf3a67063bfaa803ba390732cd03888f664023f888741d04d564e0b5674b0a183ace81452001b3fbb4214c77d42ca75376742c471e58f67307726d56a1032bd236610cbcbcd03d0d7a452900136897dc55bb3ce959d10d4e6a10fb635006bd8c41cd9ded2d3dfdd8f2e229590324a7370cb2124210b2330f4c56155caa09a2564932ceded8d92c79664dcdeb87faad7d3da006cc2ea267ee3df41e9677789cc5a8cc3b83add6491561b3047919e0648b1b2e97d7ad6f6c2aa80cab8e9ae10e1f75b1fdd0246151af709d259a6a0ed0b26bd711024965ecad7c41387de45443defce53f66612948694a6032279131c257119ed876a8e805dfb49576ef5c563574115ee87050d92d191bc761ef51d966918e2ef925639400069e3959d8fe19f36136e947ff430bf74e71da0aa5923b00000000") + v := natFromString("0x838332321d443a3d30373d47301d47073847473a383d3030f25b3d3d3e00000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000002e00000000000000000041603038331c3d32f5303441e0e0e0e0e0e0e0e0e0e0e0e0e0e0e0e0e0e0e0e0e0e0e0e0e0e0e0e0e0e0e0e0e0e0e0e0e0e0e0e0e0e0e0e0e0e0e0e0e0e0e01c0a5459bfc7b9be9fcbb9d2383840464319434707303030f43a32f53034411c0a5459413820878787878787878787878787878787878787878787878787878787878787878787870630303a3a30334036605b923a6101f83638413943413960204337602043323801526040523241846038414143015238604060328452413841413638523c0240384141364036605b923a6101f83638413943413960204334602043323801526040523241846038414143015238604060328452413841413638523c02403841413638433030f25a8b83838383838383838383838383838383837d838383ffffffffffffffff838383838383838383000000000000000000030000007d26e27c7c8b83838383838383838383838383838383837d838383ffffffffffffffff83838383838383838383838383838383838383838383435960f535073030f3343200000000000000011881301938343030fa398383300000002300000000000000000000f11af4600c845252904141364138383c60406032414443095238010241414303364443434132305b595a15434160b042385341ffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffff47476043410536613603593a6005411c437405fcfcfcfcfcfcfc0000000000005a3b075815054359000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000") + q := nat(nil).make(16) + q.div(q, u, v) +} diff --git a/src/math/big/natconv.go b/src/math/big/natconv.go new file mode 100644 index 0000000..42d1ccc --- /dev/null +++ b/src/math/big/natconv.go @@ -0,0 +1,512 @@ +// Copyright 2015 The Go Authors. All rights reserved. +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. + +// This file implements nat-to-string conversion functions. + +package big + +import ( + "errors" + "fmt" + "io" + "math" + "math/bits" + "sync" +) + +const digits = "0123456789abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ" + +// Note: MaxBase = len(digits), but it must remain an untyped rune constant +// for API compatibility. + +// MaxBase is the largest number base accepted for string conversions. +const MaxBase = 10 + ('z' - 'a' + 1) + ('Z' - 'A' + 1) +const maxBaseSmall = 10 + ('z' - 'a' + 1) + +// maxPow returns (b**n, n) such that b**n is the largest power b**n <= _M. +// For instance maxPow(10) == (1e19, 19) for 19 decimal digits in a 64bit Word. +// In other words, at most n digits in base b fit into a Word. +// TODO(gri) replace this with a table, generated at build time. +func maxPow(b Word) (p Word, n int) { + p, n = b, 1 // assuming b <= _M + for max := _M / b; p <= max; { + // p == b**n && p <= max + p *= b + n++ + } + // p == b**n && p <= _M + return +} + +// pow returns x**n for n > 0, and 1 otherwise. +func pow(x Word, n int) (p Word) { + // n == sum of bi * 2**i, for 0 <= i < imax, and bi is 0 or 1 + // thus x**n == product of x**(2**i) for all i where bi == 1 + // (Russian Peasant Method for exponentiation) + p = 1 + for n > 0 { + if n&1 != 0 { + p *= x + } + x *= x + n >>= 1 + } + return +} + +// scan errors +var ( + errNoDigits = errors.New("number has no digits") + errInvalSep = errors.New("'_' must separate successive digits") +) + +// scan scans the number corresponding to the longest possible prefix +// from r representing an unsigned number in a given conversion base. +// scan returns the corresponding natural number res, the actual base b, +// a digit count, and a read or syntax error err, if any. +// +// For base 0, an underscore character ``_'' may appear between a base +// prefix and an adjacent digit, and between successive digits; such +// underscores do not change the value of the number, or the returned +// digit count. Incorrect placement of underscores is reported as an +// error if there are no other errors. If base != 0, underscores are +// not recognized and thus terminate scanning like any other character +// that is not a valid radix point or digit. +// +// number = mantissa | prefix pmantissa . +// prefix = "0" [ "b" | "B" | "o" | "O" | "x" | "X" ] . +// mantissa = digits "." [ digits ] | digits | "." digits . +// pmantissa = [ "_" ] digits "." [ digits ] | [ "_" ] digits | "." digits . +// digits = digit { [ "_" ] digit } . +// digit = "0" ... "9" | "a" ... "z" | "A" ... "Z" . +// +// Unless fracOk is set, the base argument must be 0 or a value between +// 2 and MaxBase. If fracOk is set, the base argument must be one of +// 0, 2, 8, 10, or 16. Providing an invalid base argument leads to a run- +// time panic. +// +// For base 0, the number prefix determines the actual base: A prefix of +// ``0b'' or ``0B'' selects base 2, ``0o'' or ``0O'' selects base 8, and +// ``0x'' or ``0X'' selects base 16. If fracOk is false, a ``0'' prefix +// (immediately followed by digits) selects base 8 as well. Otherwise, +// the selected base is 10 and no prefix is accepted. +// +// If fracOk is set, a period followed by a fractional part is permitted. +// The result value is computed as if there were no period present; and +// the count value is used to determine the fractional part. +// +// For bases <= 36, lower and upper case letters are considered the same: +// The letters 'a' to 'z' and 'A' to 'Z' represent digit values 10 to 35. +// For bases > 36, the upper case letters 'A' to 'Z' represent the digit +// values 36 to 61. +// +// A result digit count > 0 corresponds to the number of (non-prefix) digits +// parsed. A digit count <= 0 indicates the presence of a period (if fracOk +// is set, only), and -count is the number of fractional digits found. +// In this case, the actual value of the scanned number is res * b**count. +// +func (z nat) scan(r io.ByteScanner, base int, fracOk bool) (res nat, b, count int, err error) { + // reject invalid bases + baseOk := base == 0 || + !fracOk && 2 <= base && base <= MaxBase || + fracOk && (base == 2 || base == 8 || base == 10 || base == 16) + if !baseOk { + panic(fmt.Sprintf("invalid number base %d", base)) + } + + // prev encodes the previously seen char: it is one + // of '_', '0' (a digit), or '.' (anything else). A + // valid separator '_' may only occur after a digit + // and if base == 0. + prev := '.' + invalSep := false + + // one char look-ahead + ch, err := r.ReadByte() + + // determine actual base + b, prefix := base, 0 + if base == 0 { + // actual base is 10 unless there's a base prefix + b = 10 + if err == nil && ch == '0' { + prev = '0' + count = 1 + ch, err = r.ReadByte() + if err == nil { + // possibly one of 0b, 0B, 0o, 0O, 0x, 0X + switch ch { + case 'b', 'B': + b, prefix = 2, 'b' + case 'o', 'O': + b, prefix = 8, 'o' + case 'x', 'X': + b, prefix = 16, 'x' + default: + if !fracOk { + b, prefix = 8, '0' + } + } + if prefix != 0 { + count = 0 // prefix is not counted + if prefix != '0' { + ch, err = r.ReadByte() + } + } + } + } + } + + // convert string + // Algorithm: Collect digits in groups of at most n digits in di + // and then use mulAddWW for every such group to add them to the + // result. + z = z[:0] + b1 := Word(b) + bn, n := maxPow(b1) // at most n digits in base b1 fit into Word + di := Word(0) // 0 <= di < b1**i < bn + i := 0 // 0 <= i < n + dp := -1 // position of decimal point + for err == nil { + if ch == '.' && fracOk { + fracOk = false + if prev == '_' { + invalSep = true + } + prev = '.' + dp = count + } else if ch == '_' && base == 0 { + if prev != '0' { + invalSep = true + } + prev = '_' + } else { + // convert rune into digit value d1 + var d1 Word + switch { + case '0' <= ch && ch <= '9': + d1 = Word(ch - '0') + case 'a' <= ch && ch <= 'z': + d1 = Word(ch - 'a' + 10) + case 'A' <= ch && ch <= 'Z': + if b <= maxBaseSmall { + d1 = Word(ch - 'A' + 10) + } else { + d1 = Word(ch - 'A' + maxBaseSmall) + } + default: + d1 = MaxBase + 1 + } + if d1 >= b1 { + r.UnreadByte() // ch does not belong to number anymore + break + } + prev = '0' + count++ + + // collect d1 in di + di = di*b1 + d1 + i++ + + // if di is "full", add it to the result + if i == n { + z = z.mulAddWW(z, bn, di) + di = 0 + i = 0 + } + } + + ch, err = r.ReadByte() + } + + if err == io.EOF { + err = nil + } + + // other errors take precedence over invalid separators + if err == nil && (invalSep || prev == '_') { + err = errInvalSep + } + + if count == 0 { + // no digits found + if prefix == '0' { + // there was only the octal prefix 0 (possibly followed by separators and digits > 7); + // interpret as decimal 0 + return z[:0], 10, 1, err + } + err = errNoDigits // fall through; result will be 0 + } + + // add remaining digits to result + if i > 0 { + z = z.mulAddWW(z, pow(b1, i), di) + } + res = z.norm() + + // adjust count for fraction, if any + if dp >= 0 { + // 0 <= dp <= count + count = dp - count + } + + return +} + +// utoa converts x to an ASCII representation in the given base; +// base must be between 2 and MaxBase, inclusive. +func (x nat) utoa(base int) []byte { + return x.itoa(false, base) +} + +// itoa is like utoa but it prepends a '-' if neg && x != 0. +func (x nat) itoa(neg bool, base int) []byte { + if base < 2 || base > MaxBase { + panic("invalid base") + } + + // x == 0 + if len(x) == 0 { + return []byte("0") + } + // len(x) > 0 + + // allocate buffer for conversion + i := int(float64(x.bitLen())/math.Log2(float64(base))) + 1 // off by 1 at most + if neg { + i++ + } + s := make([]byte, i) + + // convert power of two and non power of two bases separately + if b := Word(base); b == b&-b { + // shift is base b digit size in bits + shift := uint(bits.TrailingZeros(uint(b))) // shift > 0 because b >= 2 + mask := Word(1<<shift - 1) + w := x[0] // current word + nbits := uint(_W) // number of unprocessed bits in w + + // convert less-significant words (include leading zeros) + for k := 1; k < len(x); k++ { + // convert full digits + for nbits >= shift { + i-- + s[i] = digits[w&mask] + w >>= shift + nbits -= shift + } + + // convert any partial leading digit and advance to next word + if nbits == 0 { + // no partial digit remaining, just advance + w = x[k] + nbits = _W + } else { + // partial digit in current word w (== x[k-1]) and next word x[k] + w |= x[k] << nbits + i-- + s[i] = digits[w&mask] + + // advance + w = x[k] >> (shift - nbits) + nbits = _W - (shift - nbits) + } + } + + // convert digits of most-significant word w (omit leading zeros) + for w != 0 { + i-- + s[i] = digits[w&mask] + w >>= shift + } + + } else { + bb, ndigits := maxPow(b) + + // construct table of successive squares of bb*leafSize to use in subdivisions + // result (table != nil) <=> (len(x) > leafSize > 0) + table := divisors(len(x), b, ndigits, bb) + + // preserve x, create local copy for use by convertWords + q := nat(nil).set(x) + + // convert q to string s in base b + q.convertWords(s, b, ndigits, bb, table) + + // strip leading zeros + // (x != 0; thus s must contain at least one non-zero digit + // and the loop will terminate) + i = 0 + for s[i] == '0' { + i++ + } + } + + if neg { + i-- + s[i] = '-' + } + + return s[i:] +} + +// Convert words of q to base b digits in s. If q is large, it is recursively "split in half" +// by nat/nat division using tabulated divisors. Otherwise, it is converted iteratively using +// repeated nat/Word division. +// +// The iterative method processes n Words by n divW() calls, each of which visits every Word in the +// incrementally shortened q for a total of n + (n-1) + (n-2) ... + 2 + 1, or n(n+1)/2 divW()'s. +// Recursive conversion divides q by its approximate square root, yielding two parts, each half +// the size of q. Using the iterative method on both halves means 2 * (n/2)(n/2 + 1)/2 divW()'s +// plus the expensive long div(). Asymptotically, the ratio is favorable at 1/2 the divW()'s, and +// is made better by splitting the subblocks recursively. Best is to split blocks until one more +// split would take longer (because of the nat/nat div()) than the twice as many divW()'s of the +// iterative approach. This threshold is represented by leafSize. Benchmarking of leafSize in the +// range 2..64 shows that values of 8 and 16 work well, with a 4x speedup at medium lengths and +// ~30x for 20000 digits. Use nat_test.go's BenchmarkLeafSize tests to optimize leafSize for +// specific hardware. +// +func (q nat) convertWords(s []byte, b Word, ndigits int, bb Word, table []divisor) { + // split larger blocks recursively + if table != nil { + // len(q) > leafSize > 0 + var r nat + index := len(table) - 1 + for len(q) > leafSize { + // find divisor close to sqrt(q) if possible, but in any case < q + maxLength := q.bitLen() // ~= log2 q, or at of least largest possible q of this bit length + minLength := maxLength >> 1 // ~= log2 sqrt(q) + for index > 0 && table[index-1].nbits > minLength { + index-- // desired + } + if table[index].nbits >= maxLength && table[index].bbb.cmp(q) >= 0 { + index-- + if index < 0 { + panic("internal inconsistency") + } + } + + // split q into the two digit number (q'*bbb + r) to form independent subblocks + q, r = q.div(r, q, table[index].bbb) + + // convert subblocks and collect results in s[:h] and s[h:] + h := len(s) - table[index].ndigits + r.convertWords(s[h:], b, ndigits, bb, table[0:index]) + s = s[:h] // == q.convertWords(s, b, ndigits, bb, table[0:index+1]) + } + } + + // having split any large blocks now process the remaining (small) block iteratively + i := len(s) + var r Word + if b == 10 { + // hard-coding for 10 here speeds this up by 1.25x (allows for / and % by constants) + for len(q) > 0 { + // extract least significant, base bb "digit" + q, r = q.divW(q, bb) + for j := 0; j < ndigits && i > 0; j++ { + i-- + // avoid % computation since r%10 == r - int(r/10)*10; + // this appears to be faster for BenchmarkString10000Base10 + // and smaller strings (but a bit slower for larger ones) + t := r / 10 + s[i] = '0' + byte(r-t*10) + r = t + } + } + } else { + for len(q) > 0 { + // extract least significant, base bb "digit" + q, r = q.divW(q, bb) + for j := 0; j < ndigits && i > 0; j++ { + i-- + s[i] = digits[r%b] + r /= b + } + } + } + + // prepend high-order zeros + for i > 0 { // while need more leading zeros + i-- + s[i] = '0' + } +} + +// Split blocks greater than leafSize Words (or set to 0 to disable recursive conversion) +// Benchmark and configure leafSize using: go test -bench="Leaf" +// 8 and 16 effective on 3.0 GHz Xeon "Clovertown" CPU (128 byte cache lines) +// 8 and 16 effective on 2.66 GHz Core 2 Duo "Penryn" CPU +var leafSize int = 8 // number of Word-size binary values treat as a monolithic block + +type divisor struct { + bbb nat // divisor + nbits int // bit length of divisor (discounting leading zeros) ~= log2(bbb) + ndigits int // digit length of divisor in terms of output base digits +} + +var cacheBase10 struct { + sync.Mutex + table [64]divisor // cached divisors for base 10 +} + +// expWW computes x**y +func (z nat) expWW(x, y Word) nat { + return z.expNN(nat(nil).setWord(x), nat(nil).setWord(y), nil) +} + +// construct table of powers of bb*leafSize to use in subdivisions +func divisors(m int, b Word, ndigits int, bb Word) []divisor { + // only compute table when recursive conversion is enabled and x is large + if leafSize == 0 || m <= leafSize { + return nil + } + + // determine k where (bb**leafSize)**(2**k) >= sqrt(x) + k := 1 + for words := leafSize; words < m>>1 && k < len(cacheBase10.table); words <<= 1 { + k++ + } + + // reuse and extend existing table of divisors or create new table as appropriate + var table []divisor // for b == 10, table overlaps with cacheBase10.table + if b == 10 { + cacheBase10.Lock() + table = cacheBase10.table[0:k] // reuse old table for this conversion + } else { + table = make([]divisor, k) // create new table for this conversion + } + + // extend table + if table[k-1].ndigits == 0 { + // add new entries as needed + var larger nat + for i := 0; i < k; i++ { + if table[i].ndigits == 0 { + if i == 0 { + table[0].bbb = nat(nil).expWW(bb, Word(leafSize)) + table[0].ndigits = ndigits * leafSize + } else { + table[i].bbb = nat(nil).sqr(table[i-1].bbb) + table[i].ndigits = 2 * table[i-1].ndigits + } + + // optimization: exploit aggregated extra bits in macro blocks + larger = nat(nil).set(table[i].bbb) + for mulAddVWW(larger, larger, b, 0) == 0 { + table[i].bbb = table[i].bbb.set(larger) + table[i].ndigits++ + } + + table[i].nbits = table[i].bbb.bitLen() + } + } + } + + if b == 10 { + cacheBase10.Unlock() + } + + return table +} diff --git a/src/math/big/natconv_test.go b/src/math/big/natconv_test.go new file mode 100644 index 0000000..d390272 --- /dev/null +++ b/src/math/big/natconv_test.go @@ -0,0 +1,463 @@ +// Copyright 2015 The Go Authors. All rights reserved. +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. + +package big + +import ( + "bytes" + "fmt" + "io" + "math/bits" + "strings" + "testing" +) + +func TestMaxBase(t *testing.T) { + if MaxBase != len(digits) { + t.Fatalf("%d != %d", MaxBase, len(digits)) + } +} + +// log2 computes the integer binary logarithm of x. +// The result is the integer n for which 2^n <= x < 2^(n+1). +// If x == 0, the result is -1. +func log2(x Word) int { + return bits.Len(uint(x)) - 1 +} + +func itoa(x nat, base int) []byte { + // special cases + switch { + case base < 2: + panic("illegal base") + case len(x) == 0: + return []byte("0") + } + + // allocate buffer for conversion + i := x.bitLen()/log2(Word(base)) + 1 // +1: round up + s := make([]byte, i) + + // don't destroy x + q := nat(nil).set(x) + + // convert + for len(q) > 0 { + i-- + var r Word + q, r = q.divW(q, Word(base)) + s[i] = digits[r] + } + + return s[i:] +} + +var strTests = []struct { + x nat // nat value to be converted + b int // conversion base + s string // expected result +}{ + {nil, 2, "0"}, + {nat{1}, 2, "1"}, + {nat{0xc5}, 2, "11000101"}, + {nat{03271}, 8, "3271"}, + {nat{10}, 10, "10"}, + {nat{1234567890}, 10, "1234567890"}, + {nat{0xdeadbeef}, 16, "deadbeef"}, + {nat{0x229be7}, 17, "1a2b3c"}, + {nat{0x309663e6}, 32, "o9cov6"}, + {nat{0x309663e6}, 62, "TakXI"}, +} + +func TestString(t *testing.T) { + // test invalid base explicitly + var panicStr string + func() { + defer func() { + panicStr = recover().(string) + }() + natOne.utoa(1) + }() + if panicStr != "invalid base" { + t.Errorf("expected panic for invalid base") + } + + for _, a := range strTests { + s := string(a.x.utoa(a.b)) + if s != a.s { + t.Errorf("string%+v\n\tgot s = %s; want %s", a, s, a.s) + } + + x, b, _, err := nat(nil).scan(strings.NewReader(a.s), a.b, false) + if x.cmp(a.x) != 0 { + t.Errorf("scan%+v\n\tgot z = %v; want %v", a, x, a.x) + } + if b != a.b { + t.Errorf("scan%+v\n\tgot b = %d; want %d", a, b, a.b) + } + if err != nil { + t.Errorf("scan%+v\n\tgot error = %s", a, err) + } + } +} + +var natScanTests = []struct { + s string // string to be scanned + base int // input base + frac bool // fraction ok + x nat // expected nat + b int // expected base + count int // expected digit count + err error // expected error + next rune // next character (or 0, if at EOF) +}{ + // invalid: no digits + {"", 0, false, nil, 10, 0, errNoDigits, 0}, + {"_", 0, false, nil, 10, 0, errNoDigits, 0}, + {"?", 0, false, nil, 10, 0, errNoDigits, '?'}, + {"?", 10, false, nil, 10, 0, errNoDigits, '?'}, + {"", 10, false, nil, 10, 0, errNoDigits, 0}, + {"", 36, false, nil, 36, 0, errNoDigits, 0}, + {"", 62, false, nil, 62, 0, errNoDigits, 0}, + {"0b", 0, false, nil, 2, 0, errNoDigits, 0}, + {"0o", 0, false, nil, 8, 0, errNoDigits, 0}, + {"0x", 0, false, nil, 16, 0, errNoDigits, 0}, + {"0x_", 0, false, nil, 16, 0, errNoDigits, 0}, + {"0b2", 0, false, nil, 2, 0, errNoDigits, '2'}, + {"0B2", 0, false, nil, 2, 0, errNoDigits, '2'}, + {"0o8", 0, false, nil, 8, 0, errNoDigits, '8'}, + {"0O8", 0, false, nil, 8, 0, errNoDigits, '8'}, + {"0xg", 0, false, nil, 16, 0, errNoDigits, 'g'}, + {"0Xg", 0, false, nil, 16, 0, errNoDigits, 'g'}, + {"345", 2, false, nil, 2, 0, errNoDigits, '3'}, + + // invalid: incorrect use of decimal point + {"._", 0, true, nil, 10, 0, errNoDigits, 0}, + {".0", 0, false, nil, 10, 0, errNoDigits, '.'}, + {".0", 10, false, nil, 10, 0, errNoDigits, '.'}, + {".", 0, true, nil, 10, 0, errNoDigits, 0}, + {"0x.", 0, true, nil, 16, 0, errNoDigits, 0}, + {"0x.g", 0, true, nil, 16, 0, errNoDigits, 'g'}, + {"0x.0", 0, false, nil, 16, 0, errNoDigits, '.'}, + + // invalid: incorrect use of separators + {"_0", 0, false, nil, 10, 1, errInvalSep, 0}, + {"0_", 0, false, nil, 10, 1, errInvalSep, 0}, + {"0__0", 0, false, nil, 8, 1, errInvalSep, 0}, + {"0x___0", 0, false, nil, 16, 1, errInvalSep, 0}, + {"0_x", 0, false, nil, 10, 1, errInvalSep, 'x'}, + {"0_8", 0, false, nil, 10, 1, errInvalSep, '8'}, + {"123_.", 0, true, nat{123}, 10, 0, errInvalSep, 0}, + {"._123", 0, true, nat{123}, 10, -3, errInvalSep, 0}, + {"0b__1000", 0, false, nat{0x8}, 2, 4, errInvalSep, 0}, + {"0o60___0", 0, false, nat{0600}, 8, 3, errInvalSep, 0}, + {"0466_", 0, false, nat{0466}, 8, 3, errInvalSep, 0}, + {"01234567_8", 0, false, nat{01234567}, 8, 7, errInvalSep, '8'}, + {"1_.", 0, true, nat{1}, 10, 0, errInvalSep, 0}, + {"0._1", 0, true, nat{1}, 10, -1, errInvalSep, 0}, + {"2.7_", 0, true, nat{27}, 10, -1, errInvalSep, 0}, + {"0x1.0_", 0, true, nat{0x10}, 16, -1, errInvalSep, 0}, + + // valid: separators are not accepted for base != 0 + {"0_", 10, false, nil, 10, 1, nil, '_'}, + {"1__0", 10, false, nat{1}, 10, 1, nil, '_'}, + {"0__8", 10, false, nil, 10, 1, nil, '_'}, + {"xy_z_", 36, false, nat{33*36 + 34}, 36, 2, nil, '_'}, + + // valid, no decimal point + {"0", 0, false, nil, 10, 1, nil, 0}, + {"0", 36, false, nil, 36, 1, nil, 0}, + {"0", 62, false, nil, 62, 1, nil, 0}, + {"1", 0, false, nat{1}, 10, 1, nil, 0}, + {"1", 10, false, nat{1}, 10, 1, nil, 0}, + {"0 ", 0, false, nil, 10, 1, nil, ' '}, + {"00 ", 0, false, nil, 8, 1, nil, ' '}, // octal 0 + {"0b1", 0, false, nat{1}, 2, 1, nil, 0}, + {"0B11000101", 0, false, nat{0xc5}, 2, 8, nil, 0}, + {"0B110001012", 0, false, nat{0xc5}, 2, 8, nil, '2'}, + {"07", 0, false, nat{7}, 8, 1, nil, 0}, + {"08", 0, false, nil, 10, 1, nil, '8'}, + {"08", 10, false, nat{8}, 10, 2, nil, 0}, + {"018", 0, false, nat{1}, 8, 1, nil, '8'}, + {"0o7", 0, false, nat{7}, 8, 1, nil, 0}, + {"0o18", 0, false, nat{1}, 8, 1, nil, '8'}, + {"0O17", 0, false, nat{017}, 8, 2, nil, 0}, + {"03271", 0, false, nat{03271}, 8, 4, nil, 0}, + {"10ab", 0, false, nat{10}, 10, 2, nil, 'a'}, + {"1234567890", 0, false, nat{1234567890}, 10, 10, nil, 0}, + {"A", 36, false, nat{10}, 36, 1, nil, 0}, + {"A", 37, false, nat{36}, 37, 1, nil, 0}, + {"xyz", 36, false, nat{(33*36+34)*36 + 35}, 36, 3, nil, 0}, + {"XYZ?", 36, false, nat{(33*36+34)*36 + 35}, 36, 3, nil, '?'}, + {"XYZ?", 62, false, nat{(59*62+60)*62 + 61}, 62, 3, nil, '?'}, + {"0x", 16, false, nil, 16, 1, nil, 'x'}, + {"0xdeadbeef", 0, false, nat{0xdeadbeef}, 16, 8, nil, 0}, + {"0XDEADBEEF", 0, false, nat{0xdeadbeef}, 16, 8, nil, 0}, + + // valid, with decimal point + {"0.", 0, false, nil, 10, 1, nil, '.'}, + {"0.", 10, true, nil, 10, 0, nil, 0}, + {"0.1.2", 10, true, nat{1}, 10, -1, nil, '.'}, + {".000", 10, true, nil, 10, -3, nil, 0}, + {"12.3", 10, true, nat{123}, 10, -1, nil, 0}, + {"012.345", 10, true, nat{12345}, 10, -3, nil, 0}, + {"0.1", 0, true, nat{1}, 10, -1, nil, 0}, + {"0.1", 2, true, nat{1}, 2, -1, nil, 0}, + {"0.12", 2, true, nat{1}, 2, -1, nil, '2'}, + {"0b0.1", 0, true, nat{1}, 2, -1, nil, 0}, + {"0B0.12", 0, true, nat{1}, 2, -1, nil, '2'}, + {"0o0.7", 0, true, nat{7}, 8, -1, nil, 0}, + {"0O0.78", 0, true, nat{7}, 8, -1, nil, '8'}, + {"0xdead.beef", 0, true, nat{0xdeadbeef}, 16, -4, nil, 0}, + + // valid, with separators + {"1_000", 0, false, nat{1000}, 10, 4, nil, 0}, + {"0_466", 0, false, nat{0466}, 8, 3, nil, 0}, + {"0o_600", 0, false, nat{0600}, 8, 3, nil, 0}, + {"0x_f0_0d", 0, false, nat{0xf00d}, 16, 4, nil, 0}, + {"0b1000_0001", 0, false, nat{0x81}, 2, 8, nil, 0}, + {"1_000.000_1", 0, true, nat{10000001}, 10, -4, nil, 0}, + {"0x_f00d.1e", 0, true, nat{0xf00d1e}, 16, -2, nil, 0}, + {"0x_f00d.1E2", 0, true, nat{0xf00d1e2}, 16, -3, nil, 0}, + {"0x_f00d.1eg", 0, true, nat{0xf00d1e}, 16, -2, nil, 'g'}, +} + +func TestScanBase(t *testing.T) { + for _, a := range natScanTests { + r := strings.NewReader(a.s) + x, b, count, err := nat(nil).scan(r, a.base, a.frac) + if err != a.err { + t.Errorf("scan%+v\n\tgot error = %v; want %v", a, err, a.err) + } + if x.cmp(a.x) != 0 { + t.Errorf("scan%+v\n\tgot z = %v; want %v", a, x, a.x) + } + if b != a.b { + t.Errorf("scan%+v\n\tgot b = %d; want %d", a, b, a.base) + } + if count != a.count { + t.Errorf("scan%+v\n\tgot count = %d; want %d", a, count, a.count) + } + next, _, err := r.ReadRune() + if err == io.EOF { + next = 0 + err = nil + } + if err == nil && next != a.next { + t.Errorf("scan%+v\n\tgot next = %q; want %q", a, next, a.next) + } + } +} + +var pi = "3" + + "14159265358979323846264338327950288419716939937510582097494459230781640628620899862803482534211706798214808651" + + "32823066470938446095505822317253594081284811174502841027019385211055596446229489549303819644288109756659334461" + + "28475648233786783165271201909145648566923460348610454326648213393607260249141273724587006606315588174881520920" + + "96282925409171536436789259036001133053054882046652138414695194151160943305727036575959195309218611738193261179" + + "31051185480744623799627495673518857527248912279381830119491298336733624406566430860213949463952247371907021798" + + "60943702770539217176293176752384674818467669405132000568127145263560827785771342757789609173637178721468440901" + + "22495343014654958537105079227968925892354201995611212902196086403441815981362977477130996051870721134999999837" + + "29780499510597317328160963185950244594553469083026425223082533446850352619311881710100031378387528865875332083" + + "81420617177669147303598253490428755468731159562863882353787593751957781857780532171226806613001927876611195909" + + "21642019893809525720106548586327886593615338182796823030195203530185296899577362259941389124972177528347913151" + + "55748572424541506959508295331168617278558890750983817546374649393192550604009277016711390098488240128583616035" + + "63707660104710181942955596198946767837449448255379774726847104047534646208046684259069491293313677028989152104" + + "75216205696602405803815019351125338243003558764024749647326391419927260426992279678235478163600934172164121992" + + "45863150302861829745557067498385054945885869269956909272107975093029553211653449872027559602364806654991198818" + + "34797753566369807426542527862551818417574672890977772793800081647060016145249192173217214772350141441973568548" + + "16136115735255213347574184946843852332390739414333454776241686251898356948556209921922218427255025425688767179" + + "04946016534668049886272327917860857843838279679766814541009538837863609506800642251252051173929848960841284886" + + "26945604241965285022210661186306744278622039194945047123713786960956364371917287467764657573962413890865832645" + + "99581339047802759009946576407895126946839835259570982582262052248940772671947826848260147699090264013639443745" + + "53050682034962524517493996514314298091906592509372216964615157098583874105978859597729754989301617539284681382" + + "68683868942774155991855925245953959431049972524680845987273644695848653836736222626099124608051243884390451244" + + "13654976278079771569143599770012961608944169486855584840635342207222582848864815845602850601684273945226746767" + + "88952521385225499546667278239864565961163548862305774564980355936345681743241125150760694794510965960940252288" + + "79710893145669136867228748940560101503308617928680920874760917824938589009714909675985261365549781893129784821" + + "68299894872265880485756401427047755513237964145152374623436454285844479526586782105114135473573952311342716610" + + "21359695362314429524849371871101457654035902799344037420073105785390621983874478084784896833214457138687519435" + + "06430218453191048481005370614680674919278191197939952061419663428754440643745123718192179998391015919561814675" + + "14269123974894090718649423196156794520809514655022523160388193014209376213785595663893778708303906979207734672" + + "21825625996615014215030680384477345492026054146659252014974428507325186660021324340881907104863317346496514539" + + "05796268561005508106658796998163574736384052571459102897064140110971206280439039759515677157700420337869936007" + + "23055876317635942187312514712053292819182618612586732157919841484882916447060957527069572209175671167229109816" + + "90915280173506712748583222871835209353965725121083579151369882091444210067510334671103141267111369908658516398" + + "31501970165151168517143765761835155650884909989859982387345528331635507647918535893226185489632132933089857064" + + "20467525907091548141654985946163718027098199430992448895757128289059232332609729971208443357326548938239119325" + + "97463667305836041428138830320382490375898524374417029132765618093773444030707469211201913020330380197621101100" + + "44929321516084244485963766983895228684783123552658213144957685726243344189303968642624341077322697802807318915" + + "44110104468232527162010526522721116603966655730925471105578537634668206531098965269186205647693125705863566201" + + "85581007293606598764861179104533488503461136576867532494416680396265797877185560845529654126654085306143444318" + + "58676975145661406800700237877659134401712749470420562230538994561314071127000407854733269939081454664645880797" + + "27082668306343285878569830523580893306575740679545716377525420211495576158140025012622859413021647155097925923" + + "09907965473761255176567513575178296664547791745011299614890304639947132962107340437518957359614589019389713111" + + "79042978285647503203198691514028708085990480109412147221317947647772622414254854540332157185306142288137585043" + + "06332175182979866223717215916077166925474873898665494945011465406284336639379003976926567214638530673609657120" + + "91807638327166416274888800786925602902284721040317211860820419000422966171196377921337575114959501566049631862" + + "94726547364252308177036751590673502350728354056704038674351362222477158915049530984448933309634087807693259939" + + "78054193414473774418426312986080998886874132604721569516239658645730216315981931951673538129741677294786724229" + + "24654366800980676928238280689964004824354037014163149658979409243237896907069779422362508221688957383798623001" + + "59377647165122893578601588161755782973523344604281512627203734314653197777416031990665541876397929334419521541" + + "34189948544473456738316249934191318148092777710386387734317720754565453220777092120190516609628049092636019759" + + "88281613323166636528619326686336062735676303544776280350450777235547105859548702790814356240145171806246436267" + + "94561275318134078330336254232783944975382437205835311477119926063813346776879695970309833913077109870408591337" + +// Test case for BenchmarkScanPi. +func TestScanPi(t *testing.T) { + var x nat + z, _, _, err := x.scan(strings.NewReader(pi), 10, false) + if err != nil { + t.Errorf("scanning pi: %s", err) + } + if s := string(z.utoa(10)); s != pi { + t.Errorf("scanning pi: got %s", s) + } +} + +func TestScanPiParallel(t *testing.T) { + const n = 2 + c := make(chan int) + for i := 0; i < n; i++ { + go func() { + TestScanPi(t) + c <- 0 + }() + } + for i := 0; i < n; i++ { + <-c + } +} + +func BenchmarkScanPi(b *testing.B) { + for i := 0; i < b.N; i++ { + var x nat + x.scan(strings.NewReader(pi), 10, false) + } +} + +func BenchmarkStringPiParallel(b *testing.B) { + var x nat + x, _, _, _ = x.scan(strings.NewReader(pi), 0, false) + if string(x.utoa(10)) != pi { + panic("benchmark incorrect: conversion failed") + } + b.RunParallel(func(pb *testing.PB) { + for pb.Next() { + x.utoa(10) + } + }) +} + +func BenchmarkScan(b *testing.B) { + const x = 10 + for _, base := range []int{2, 8, 10, 16} { + for _, y := range []Word{10, 100, 1000, 10000, 100000} { + if isRaceBuilder && y > 1000 { + continue + } + b.Run(fmt.Sprintf("%d/Base%d", y, base), func(b *testing.B) { + b.StopTimer() + var z nat + z = z.expWW(x, y) + + s := z.utoa(base) + if t := itoa(z, base); !bytes.Equal(s, t) { + b.Fatalf("scanning: got %s; want %s", s, t) + } + b.StartTimer() + + for i := 0; i < b.N; i++ { + z.scan(bytes.NewReader(s), base, false) + } + }) + } + } +} + +func BenchmarkString(b *testing.B) { + const x = 10 + for _, base := range []int{2, 8, 10, 16} { + for _, y := range []Word{10, 100, 1000, 10000, 100000} { + if isRaceBuilder && y > 1000 { + continue + } + b.Run(fmt.Sprintf("%d/Base%d", y, base), func(b *testing.B) { + b.StopTimer() + var z nat + z = z.expWW(x, y) + z.utoa(base) // warm divisor cache + b.StartTimer() + + for i := 0; i < b.N; i++ { + _ = z.utoa(base) + } + }) + } + } +} + +func BenchmarkLeafSize(b *testing.B) { + for n := 0; n <= 16; n++ { + b.Run(fmt.Sprint(n), func(b *testing.B) { LeafSizeHelper(b, 10, n) }) + } + // Try some large lengths + for _, n := range []int{32, 64} { + b.Run(fmt.Sprint(n), func(b *testing.B) { LeafSizeHelper(b, 10, n) }) + } +} + +func LeafSizeHelper(b *testing.B, base, size int) { + b.StopTimer() + originalLeafSize := leafSize + resetTable(cacheBase10.table[:]) + leafSize = size + b.StartTimer() + + for d := 1; d <= 10000; d *= 10 { + b.StopTimer() + var z nat + z = z.expWW(Word(base), Word(d)) // build target number + _ = z.utoa(base) // warm divisor cache + b.StartTimer() + + for i := 0; i < b.N; i++ { + _ = z.utoa(base) + } + } + + b.StopTimer() + resetTable(cacheBase10.table[:]) + leafSize = originalLeafSize + b.StartTimer() +} + +func resetTable(table []divisor) { + if table != nil && table[0].bbb != nil { + for i := 0; i < len(table); i++ { + table[i].bbb = nil + table[i].nbits = 0 + table[i].ndigits = 0 + } + } +} + +func TestStringPowers(t *testing.T) { + var p Word + for b := 2; b <= 16; b++ { + for p = 0; p <= 512; p++ { + if testing.Short() && p > 10 { + break + } + x := nat(nil).expWW(Word(b), p) + xs := x.utoa(b) + xs2 := itoa(x, b) + if !bytes.Equal(xs, xs2) { + t.Errorf("failed at %d ** %d in base %d: %s != %s", b, p, b, xs, xs2) + } + } + if b >= 3 && testing.Short() { + break + } + } +} diff --git a/src/math/big/natdiv.go b/src/math/big/natdiv.go new file mode 100644 index 0000000..882bb6d --- /dev/null +++ b/src/math/big/natdiv.go @@ -0,0 +1,884 @@ +// Copyright 2009 The Go Authors. All rights reserved. +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. + +/* + +Multi-precision division. Here be dragons. + +Given u and v, where u is n+m digits, and v is n digits (with no leading zeros), +the goal is to return quo, rem such that u = quo*v + rem, where 0 ≤ rem < v. +That is, quo = ⌊u/v⌋ where ⌊x⌋ denotes the floor (truncation to integer) of x, +and rem = u - quo·v. + + +Long Division + +Division in a computer proceeds the same as long division in elementary school, +but computers are not as good as schoolchildren at following vague directions, +so we have to be much more precise about the actual steps and what can happen. + +We work from most to least significant digit of the quotient, doing: + + • Guess a digit q, the number of v to subtract from the current + section of u to zero out the topmost digit. + • Check the guess by multiplying q·v and comparing it against + the current section of u, adjusting the guess as needed. + • Subtract q·v from the current section of u. + • Add q to the corresponding section of the result quo. + +When all digits have been processed, the final remainder is left in u +and returned as rem. + +For example, here is a sketch of dividing 5 digits by 3 digits (n=3, m=2). + + q₂ q₁ q₀ + _________________ + v₂ v₁ v₀ ) u₄ u₃ u₂ u₁ u₀ + ↓ ↓ ↓ | | + [u₄ u₃ u₂]| | + - [ q₂·v ]| | + ----------- ↓ | + [ rem | u₁]| + - [ q₁·v ]| + ----------- ↓ + [ rem | u₀] + - [ q₀·v ] + ------------ + [ rem ] + +Instead of creating new storage for the remainders and copying digits from u +as indicated by the arrows, we use u's storage directly as both the source +and destination of the subtractions, so that the remainders overwrite +successive overlapping sections of u as the division proceeds, using a slice +of u to identify the current section. This avoids all the copying as well as +shifting of remainders. + +Division of u with n+m digits by v with n digits (in base B) can in general +produce at most m+1 digits, because: + + • u < B^(n+m) [B^(n+m) has n+m+1 digits] + • v ≥ B^(n-1) [B^(n-1) is the smallest n-digit number] + • u/v < B^(n+m) / B^(n-1) [divide bounds for u, v] + • u/v < B^(m+1) [simplify] + +The first step is special: it takes the top n digits of u and divides them by +the n digits of v, producing the first quotient digit and an n-digit remainder. +In the example, q₂ = ⌊u₄u₃u₂ / v⌋. + +The first step divides n digits by n digits to ensure that it produces only a +single digit. + +Each subsequent step appends the next digit from u to the remainder and divides +those n+1 digits by the n digits of v, producing another quotient digit and a +new n-digit remainder. + +Subsequent steps divide n+1 digits by n digits, an operation that in general +might produce two digits. However, as used in the algorithm, that division is +guaranteed to produce only a single digit. The dividend is of the form +rem·B + d, where rem is a remainder from the previous step and d is a single +digit, so: + + • rem ≤ v - 1 [rem is a remainder from dividing by v] + • rem·B ≤ v·B - B [multiply by B] + • d ≤ B - 1 [d is a single digit] + • rem·B + d ≤ v·B - 1 [add] + • rem·B + d < v·B [change ≤ to <] + • (rem·B + d)/v < B [divide by v] + + +Guess and Check + +At each step we need to divide n+1 digits by n digits, but this is for the +implementation of division by n digits, so we can't just invoke a division +routine: we _are_ the division routine. Instead, we guess at the answer and +then check it using multiplication. If the guess is wrong, we correct it. + +How can this guessing possibly be efficient? It turns out that the following +statement (let's call it the Good Guess Guarantee) is true. + +If + + • q = ⌊u/v⌋ where u is n+1 digits and v is n digits, + • q < B, and + • the topmost digit of v = vₙ₋₁ ≥ B/2, + +then q̂ = ⌊uₙuₙ₋₁ / vₙ₋₁⌋ satisfies q ≤ q̂ ≤ q+2. (Proof below.) + +That is, if we know the answer has only a single digit and we guess an answer +by ignoring the bottom n-1 digits of u and v, using a 2-by-1-digit division, +then that guess is at least as large as the correct answer. It is also not +too much larger: it is off by at most two from the correct answer. + +Note that in the first step of the overall division, which is an n-by-n-digit +division, the 2-by-1 guess uses an implicit uₙ = 0. + +Note that using a 2-by-1-digit division here does not mean calling ourselves +recursively. Instead, we use an efficient direct hardware implementation of +that operation. + +Note that because q is u/v rounded down, q·v must not exceed u: u ≥ q·v. +If a guess q̂ is too big, it will not satisfy this test. Viewed a different way, +the remainder r̂ for a given q̂ is u - q̂·v, which must be positive. If it is +negative, then the guess q̂ is too big. + +This gives us a way to compute q. First compute q̂ with 2-by-1-digit division. +Then, while u < q̂·v, decrement q̂; this loop executes at most twice, because +q̂ ≤ q+2. + + +Scaling Inputs + +The Good Guess Guarantee requires that the top digit of v (vₙ₋₁) be at least B/2. +For example in base 10, ⌊172/19⌋ = 9, but ⌊18/1⌋ = 18: the guess is wildly off +because the first digit 1 is smaller than B/2 = 5. + +We can ensure that v has a large top digit by multiplying both u and v by the +right amount. Continuing the example, if we multiply both 172 and 19 by 3, we +now have ⌊516/57⌋, the leading digit of v is now ≥ 5, and sure enough +⌊51/5⌋ = 10 is much closer to the correct answer 9. It would be easier here +to multiply by 4, because that can be done with a shift. Specifically, we can +always count the number of leading zeros i in the first digit of v and then +shift both u and v left by i bits. + +Having scaled u and v, the value ⌊u/v⌋ is unchanged, but the remainder will +be scaled: 172 mod 19 is 1, but 516 mod 57 is 3. We have to divide the remainder +by the scaling factor (shifting right i bits) when we finish. + +Note that these shifts happen before and after the entire division algorithm, +not at each step in the per-digit iteration. + +Note the effect of scaling inputs on the size of the possible quotient. +In the scaled u/v, u can gain a digit from scaling; v never does, because we +pick the scaling factor to make v's top digit larger but without overflowing. +If u and v have n+m and n digits after scaling, then: + + • u < B^(n+m) [B^(n+m) has n+m+1 digits] + • v ≥ B^n / 2 [vₙ₋₁ ≥ B/2, so vₙ₋₁·B^(n-1) ≥ B^n/2] + • u/v < B^(n+m) / (B^n / 2) [divide bounds for u, v] + • u/v < 2 B^m [simplify] + +The quotient can still have m+1 significant digits, but if so the top digit +must be a 1. This provides a different way to handle the first digit of the +result: compare the top n digits of u against v and fill in either a 0 or a 1. + + +Refining Guesses + +Before we check whether u < q̂·v, we can adjust our guess to change it from +q̂ = ⌊uₙuₙ₋₁ / vₙ₋₁⌋ into the refined guess ⌊uₙuₙ₋₁uₙ₋₂ / vₙ₋₁vₙ₋₂⌋. +Although not mentioned above, the Good Guess Guarantee also promises that this +3-by-2-digit division guess is more precise and at most one away from the real +answer q. The improvement from the 2-by-1 to the 3-by-2 guess can also be done +without n-digit math. + +If we have a guess q̂ = ⌊uₙuₙ₋₁ / vₙ₋₁⌋ and we want to see if it also equal to +⌊uₙuₙ₋₁uₙ₋₂ / vₙ₋₁vₙ₋₂⌋, we can use the same check we would for the full division: +if uₙuₙ₋₁uₙ₋₂ < q̂·vₙ₋₁vₙ₋₂, then the guess is too large and should be reduced. + +Checking uₙuₙ₋₁uₙ₋₂ < q̂·vₙ₋₁vₙ₋₂ is the same as uₙuₙ₋₁uₙ₋₂ - q̂·vₙ₋₁vₙ₋₂ < 0, +and + + uₙuₙ₋₁uₙ₋₂ - q̂·vₙ₋₁vₙ₋₂ = (uₙuₙ₋₁·B + uₙ₋₂) - q̂·(vₙ₋₁·B + vₙ₋₂) + [splitting off the bottom digit] + = (uₙuₙ₋₁ - q̂·vₙ₋₁)·B + uₙ₋₂ - q̂·vₙ₋₂ + [regrouping] + +The expression (uₙuₙ₋₁ - q̂·vₙ₋₁) is the remainder of uₙuₙ₋₁ / vₙ₋₁. +If the initial guess returns both q̂ and its remainder r̂, then checking +whether uₙuₙ₋₁uₙ₋₂ < q̂·vₙ₋₁vₙ₋₂ is the same as checking r̂·B + uₙ₋₂ < q̂·vₙ₋₂. + +If we find that r̂·B + uₙ₋₂ < q̂·vₙ₋₂, then we can adjust the guess by +decrementing q̂ and adding vₙ₋₁ to r̂. We repeat until r̂·B + uₙ₋₂ ≥ q̂·vₙ₋₂. +(As before, this fixup is only needed at most twice.) + +Now that q̂ = ⌊uₙuₙ₋₁uₙ₋₂ / vₙ₋₁vₙ₋₂⌋, as mentioned above it is at most one +away from the correct q, and we've avoided doing any n-digit math. +(If we need the new remainder, it can be computed as r̂·B + uₙ₋₂ - q̂·vₙ₋₂.) + +The final check u < q̂·v and the possible fixup must be done at full precision. +For random inputs, a fixup at this step is exceedingly rare: the 3-by-2 guess +is not often wrong at all. But still we must do the check. Note that since the +3-by-2 guess is off by at most 1, it can be convenient to perform the final +u < q̂·v as part of the computation of the remainder r = u - q̂·v. If the +subtraction underflows, decremeting q̂ and adding one v back to r is enough to +arrive at the final q, r. + +That's the entirety of long division: scale the inputs, and then loop over +each output position, guessing, checking, and correcting the next output digit. + +For a 2n-digit number divided by an n-digit number (the worst size-n case for +division complexity), this algorithm uses n+1 iterations, each of which must do +at least the 1-by-n-digit multiplication q̂·v. That's O(n) iterations of +O(n) time each, so O(n²) time overall. + + +Recursive Division + +For very large inputs, it is possible to improve on the O(n²) algorithm. +Let's call a group of n/2 real digits a (very) “wide digit”. We can run the +standard long division algorithm explained above over the wide digits instead of +the actual digits. This will result in many fewer steps, but the math involved in +each step is more work. + +Where basic long division uses a 2-by-1-digit division to guess the initial q̂, +the new algorithm must use a 2-by-1-wide-digit division, which is of course +really an n-by-n/2-digit division. That's OK: if we implement n-digit division +in terms of n/2-digit division, the recursion will terminate when the divisor +becomes small enough to handle with standard long division or even with the +2-by-1 hardware instruction. + +For example, here is a sketch of dividing 10 digits by 4, proceeding with +wide digits corresponding to two regular digits. The first step, still special, +must leave off a (regular) digit, dividing 5 by 4 and producing a 4-digit +remainder less than v. The middle steps divide 6 digits by 4, guaranteed to +produce two output digits each (one wide digit) with 4-digit remainders. +The final step must use what it has: the 4-digit remainder plus one more, +5 digits to divide by 4. + + q₆ q₅ q₄ q₃ q₂ q₁ q₀ + _______________________________ + v₃ v₂ v₁ v₀ ) u₉ u₈ u₇ u₆ u₅ u₄ u₃ u₂ u₁ u₀ + ↓ ↓ ↓ ↓ ↓ | | | | | + [u₉ u₈ u₇ u₆ u₅]| | | | | + - [ q₆q₅·v ]| | | | | + ----------------- ↓ ↓ | | | + [ rem |u₄ u₃]| | | + - [ q₄q₃·v ]| | | + -------------------- ↓ ↓ | + [ rem |u₂ u₁]| + - [ q₂q₁·v ]| + -------------------- ↓ + [ rem |u₀] + - [ q₀·v ] + ------------------ + [ rem ] + +An alternative would be to look ahead to how well n/2 divides into n+m and +adjust the first step to use fewer digits as needed, making the first step +more special to make the last step not special at all. For example, using the +same input, we could choose to use only 4 digits in the first step, leaving +a full wide digit for the last step: + + q₆ q₅ q₄ q₃ q₂ q₁ q₀ + _______________________________ + v₃ v₂ v₁ v₀ ) u₉ u₈ u₇ u₆ u₅ u₄ u₃ u₂ u₁ u₀ + ↓ ↓ ↓ ↓ | | | | | | + [u₉ u₈ u₇ u₆]| | | | | | + - [ q₆·v ]| | | | | | + -------------- ↓ ↓ | | | | + [ rem |u₅ u₄]| | | | + - [ q₅q₄·v ]| | | | + -------------------- ↓ ↓ | | + [ rem |u₃ u₂]| | + - [ q₃q₂·v ]| | + -------------------- ↓ ↓ + [ rem |u₁ u₀] + - [ q₁q₀·v ] + --------------------- + [ rem ] + +Today, the code in divRecursiveStep works like the first example. Perhaps in +the future we will make it work like the alternative, to avoid a special case +in the final iteration. + +Either way, each step is a 3-by-2-wide-digit division approximated first by +a 2-by-1-wide-digit division, just as we did for regular digits in long division. +Because the actual answer we want is a 3-by-2-wide-digit division, instead of +multiplying q̂·v directly during the fixup, we can use the quick refinement +from long division (an n/2-by-n/2 multiply) to correct q to its actual value +and also compute the remainder (as mentioned above), and then stop after that, +never doing a full n-by-n multiply. + +Instead of using an n-by-n/2-digit division to produce n/2 digits, we can add +(not discard) one more real digit, doing an (n+1)-by-(n/2+1)-digit division that +produces n/2+1 digits. That single extra digit tightens the Good Guess Guarantee +to q ≤ q̂ ≤ q+1 and lets us drop long division's special treatment of the first +digit. These benefits are discussed more after the Good Guess Guarantee proof +below. + + +How Fast is Recursive Division? + +For a 2n-by-n-digit division, this algorithm runs a 4-by-2 long division over +wide digits, producing two wide digits plus a possible leading regular digit 1, +which can be handled without a recursive call. That is, the algorithm uses two +full iterations, each using an n-by-n/2-digit division and an n/2-by-n/2-digit +multiplication, along with a few n-digit additions and subtractions. The standard +n-by-n-digit multiplication algorithm requires O(n²) time, making the overall +algorithm require time T(n) where + + T(n) = 2T(n/2) + O(n) + O(n²) + +which, by the Bentley-Haken-Saxe theorem, ends up reducing to T(n) = O(n²). +This is not an improvement over regular long division. + +When the number of digits n becomes large enough, Karatsuba's algorithm for +multiplication can be used instead, which takes O(n^log₂3) = O(n^1.6) time. +(Karatsuba multiplication is implemented in func karatsuba in nat.go.) +That makes the overall recursive division algorithm take O(n^1.6) time as well, +which is an improvement, but again only for large enough numbers. + +It is not critical to make sure that every recursion does only two recursive +calls. While in general the number of recursive calls can change the time +analysis, in this case doing three calls does not change the analysis: + + T(n) = 3T(n/2) + O(n) + O(n^log₂3) + +ends up being T(n) = O(n^log₂3). Because the Karatsuba multiplication taking +time O(n^log₂3) is itself doing 3 half-sized recursions, doing three for the +division does not hurt the asymptotic performance. Of course, it is likely +still faster in practice to do two. + + +Proof of the Good Guess Guarantee + +Given numbers x, y, let us break them into the quotients and remainders when +divided by some scaling factor S, with the added constraints that the quotient +x/y and the high part of y are both less than some limit T, and that the high +part of y is at least half as big as T. + + x₁ = ⌊x/S⌋ y₁ = ⌊y/S⌋ + x₀ = x mod S y₀ = y mod S + + x = x₁·S + x₀ 0 ≤ x₀ < S x/y < T + y = y₁·S + y₀ 0 ≤ y₀ < S T/2 ≤ y₁ < T + +And consider the two truncated quotients: + + q = ⌊x/y⌋ + q̂ = ⌊x₁/y₁⌋ + +We will prove that q ≤ q̂ ≤ q+2. + +The guarantee makes no real demands on the scaling factor S: it is simply the +magnitude of the digits cut from both x and y to produce x₁ and y₁. +The guarantee makes only limited demands on T: it must be large enough to hold +the quotient x/y, and y₁ must have roughly the same size. + +To apply to the earlier discussion of 2-by-1 guesses in long division, +we would choose: + + S = Bⁿ⁻¹ + T = B + x = u + x₁ = uₙuₙ₋₁ + x₀ = uₙ₋₂...u₀ + y = v + y₁ = vₙ₋₁ + y₀ = vₙ₋₂...u₀ + +These simpler variables avoid repeating those longer expressions in the proof. + +Note also that, by definition, truncating division ⌊x/y⌋ satisfies + + x/y - 1 < ⌊x/y⌋ ≤ x/y. + +This fact will be used a few times in the proofs. + +Proof that q ≤ q̂: + + q̂·y₁ = ⌊x₁/y₁⌋·y₁ [by definition, q̂ = ⌊x₁/y₁⌋] + > (x₁/y₁ - 1)·y₁ [x₁/y₁ - 1 < ⌊x₁/y₁⌋] + = x₁ - y₁ [distribute y₁] + + So q̂·y₁ > x₁ - y₁. + Since q̂·y₁ is an integer, q̂·y₁ ≥ x₁ - y₁ + 1. + + q̂ - q = q̂ - ⌊x/y⌋ [by definition, q = ⌊x/y⌋] + ≥ q̂ - x/y [⌊x/y⌋ < x/y] + = (1/y)·(q̂·y - x) [factor out 1/y] + ≥ (1/y)·(q̂·y₁·S - x) [y = y₁·S + y₀ ≥ y₁·S] + ≥ (1/y)·((x₁ - y₁ + 1)·S - x) [above: q̂·y₁ ≥ x₁ - y₁ + 1] + = (1/y)·(x₁·S - y₁·S + S - x) [distribute S] + = (1/y)·(S - x₀ - y₁·S) [-x = -x₁·S - x₀] + > -y₁·S / y [x₀ < S, so S - x₀ < 0; drop it] + ≥ -1 [y₁·S ≤ y] + + So q̂ - q > -1. + Since q̂ - q is an integer, q̂ - q ≥ 0, or equivalently q ≤ q̂. + +Proof that q̂ ≤ q+2: + + x₁/y₁ - x/y = x₁·S/y₁·S - x/y [multiply left term by S/S] + ≤ x/y₁·S - x/y [x₁S ≤ x] + = (x/y)·(y/y₁·S - 1) [factor out x/y] + = (x/y)·((y - y₁·S)/y₁·S) [move -1 into y/y₁·S fraction] + = (x/y)·(y₀/y₁·S) [y - y₁·S = y₀] + = (x/y)·(1/y₁)·(y₀/S) [factor out 1/y₁] + < (x/y)·(1/y₁) [y₀ < S, so y₀/S < 1] + ≤ (x/y)·(2/T) [y₁ ≥ T/2, so 1/y₁ ≤ 2/T] + < T·(2/T) [x/y < T] + = 2 [T·(2/T) = 2] + + So x₁/y₁ - x/y < 2. + + q̂ - q = ⌊x₁/y₁⌋ - q [by definition, q̂ = ⌊x₁/y₁⌋] + = ⌊x₁/y₁⌋ - ⌊x/y⌋ [by definition, q = ⌊x/y⌋] + ≤ x₁/y₁ - ⌊x/y⌋ [⌊x₁/y₁⌋ ≤ x₁/y₁] + < x₁/y₁ - (x/y - 1) [⌊x/y⌋ > x/y - 1] + = (x₁/y₁ - x/y) + 1 [regrouping] + < 2 + 1 [above: x₁/y₁ - x/y < 2] + = 3 + + So q̂ - q < 3. + Since q̂ - q is an integer, q̂ - q ≤ 2. + +Note that when x/y < T/2, the bounds tighten to x₁/y₁ - x/y < 1 and therefore +q̂ - q ≤ 1. + +Note also that in the general case 2n-by-n division where we don't know that +x/y < T, we do know that x/y < 2T, yielding the bound q̂ - q ≤ 4. So we could +remove the special case first step of long division as long as we allow the +first fixup loop to run up to four times. (Using a simple comparison to decide +whether the first digit is 0 or 1 is still more efficient, though.) + +Finally, note that when dividing three leading base-B digits by two (scaled), +we have T = B² and x/y < B = T/B, a much tighter bound than x/y < T. +This in turn yields the much tighter bound x₁/y₁ - x/y < 2/B. This means that +⌊x₁/y₁⌋ and ⌊x/y⌋ can only differ when x/y is less than 2/B greater than an +integer. For random x and y, the chance of this is 2/B, or, for large B, +approximately zero. This means that after we produce the 3-by-2 guess in the +long division algorithm, the fixup loop essentially never runs. + +In the recursive algorithm, the extra digit in (2·⌊n/2⌋+1)-by-(⌊n/2⌋+1)-digit +division has exactly the same effect: the probability of needing a fixup is the +same 2/B. Even better, we can allow the general case x/y < 2T and the fixup +probability only grows to 4/B, still essentially zero. + + +References + +There are no great references for implementing long division; thus this comment. +Here are some notes about what to expect from the obvious references. + +Knuth Volume 2 (Seminumerical Algorithms) section 4.3.1 is the usual canonical +reference for long division, but that entire series is highly compressed, never +repeating a necessary fact and leaving important insights to the exercises. +For example, no rationale whatsoever is given for the calculation that extends +q̂ from a 2-by-1 to a 3-by-2 guess, nor why it reduces the error bound. +The proof that the calculation even has the desired effect is left to exercises. +The solutions to those exercises provided at the back of the book are entirely +calculations, still with no explanation as to what is going on or how you would +arrive at the idea of doing those exact calculations. Nowhere is it mentioned +that this test extends the 2-by-1 guess into a 3-by-2 guess. The proof of the +Good Guess Guarantee is only for the 2-by-1 guess and argues by contradiction, +making it difficult to understand how modifications like adding another digit +or adjusting the quotient range affects the overall bound. + +All that said, Knuth remains the canonical reference. It is dense but packed +full of information and references, and the proofs are simpler than many other +presentations. The proofs above are reworkings of Knuth's to remove the +arguments by contradiction and add explanations or steps that Knuth omitted. +But beware of errors in older printings. Take the published errata with you. + +Brinch Hansen's “Multiple-length Division Revisited: a Tour of the Minefield” +starts with a blunt critique of Knuth's presentation (among others) and then +presents a more detailed and easier to follow treatment of long division, +including an implementation in Pascal. But the algorithm and implementation +work entirely in terms of 3-by-2 division, which is much less useful on modern +hardware than an algorithm using 2-by-1 division. The proofs are a bit too +focused on digit counting and seem needlessly complex, especially compared to +the ones given above. + +Burnikel and Ziegler's “Fast Recursive Division” introduced the key insight of +implementing division by an n-digit divisor using recursive calls to division +by an n/2-digit divisor, relying on Karatsuba multiplication to yield a +sub-quadratic run time. However, the presentation decisions are made almost +entirely for the purpose of simplifying the run-time analysis, rather than +simplifying the presentation. Instead of a single algorithm that loops over +quotient digits, the paper presents two mutually-recursive algorithms, for +2n-by-n and 3n-by-2n. The paper also does not present any general (n+m)-by-n +algorithm. + +The proofs in the paper are remarkably complex, especially considering that +the algorithm is at its core just long division on wide digits, so that the +usual long division proofs apply essentially unaltered. +*/ + +package big + +import "math/bits" + +// div returns q, r such that q = ⌊u/v⌋ and r = u%v = u - q·v. +// It uses z and z2 as the storage for q and r. +func (z nat) div(z2, u, v nat) (q, r nat) { + if len(v) == 0 { + panic("division by zero") + } + + if u.cmp(v) < 0 { + q = z[:0] + r = z2.set(u) + return + } + + if len(v) == 1 { + // Short division: long optimized for a single-word divisor. + // In that case, the 2-by-1 guess is all we need at each step. + var r2 Word + q, r2 = z.divW(u, v[0]) + r = z2.setWord(r2) + return + } + + q, r = z.divLarge(z2, u, v) + return +} + +// divW returns q, r such that q = ⌊x/y⌋ and r = x%y = x - q·y. +// It uses z as the storage for q. +// Note that y is a single digit (Word), not a big number. +func (z nat) divW(x nat, y Word) (q nat, r Word) { + m := len(x) + switch { + case y == 0: + panic("division by zero") + case y == 1: + q = z.set(x) // result is x + return + case m == 0: + q = z[:0] // result is 0 + return + } + // m > 0 + z = z.make(m) + r = divWVW(z, 0, x, y) + q = z.norm() + return +} + +// modW returns x % d. +func (x nat) modW(d Word) (r Word) { + // TODO(agl): we don't actually need to store the q value. + var q nat + q = q.make(len(x)) + return divWVW(q, 0, x, d) +} + +// divWVW overwrites z with ⌊x/y⌋, returning the remainder r. +// The caller must ensure that len(z) = len(x). +func divWVW(z []Word, xn Word, x []Word, y Word) (r Word) { + r = xn + if len(x) == 1 { + qq, rr := bits.Div(uint(r), uint(x[0]), uint(y)) + z[0] = Word(qq) + return Word(rr) + } + rec := reciprocalWord(y) + for i := len(z) - 1; i >= 0; i-- { + z[i], r = divWW(r, x[i], y, rec) + } + return r +} + +// div returns q, r such that q = ⌊uIn/vIn⌋ and r = uIn%vIn = uIn - q·vIn. +// It uses z and u as the storage for q and r. +// The caller must ensure that len(vIn) ≥ 2 (use divW otherwise) +// and that len(uIn) ≥ len(vIn) (the answer is 0, uIn otherwise). +func (z nat) divLarge(u, uIn, vIn nat) (q, r nat) { + n := len(vIn) + m := len(uIn) - n + + // Scale the inputs so vIn's top bit is 1 (see “Scaling Inputs” above). + // vIn is treated as a read-only input (it may be in use by another + // goroutine), so we must make a copy. + // uIn is copied to u. + shift := nlz(vIn[n-1]) + vp := getNat(n) + v := *vp + shlVU(v, vIn, shift) + u = u.make(len(uIn) + 1) + u[len(uIn)] = shlVU(u[0:len(uIn)], uIn, shift) + + // The caller should not pass aliased z and u, since those are + // the two different outputs, but correct just in case. + if alias(z, u) { + z = nil + } + q = z.make(m + 1) + + // Use basic or recursive long division depending on size. + if n < divRecursiveThreshold { + q.divBasic(u, v) + } else { + q.divRecursive(u, v) + } + putNat(vp) + + q = q.norm() + + // Undo scaling of remainder. + shrVU(u, u, shift) + r = u.norm() + + return q, r +} + +// divBasic implements long division as described above. +// It overwrites q with ⌊u/v⌋ and overwrites u with the remainder r. +// q must be large enough to hold ⌊u/v⌋. +func (q nat) divBasic(u, v nat) { + n := len(v) + m := len(u) - n + + qhatvp := getNat(n + 1) + qhatv := *qhatvp + + // Set up for divWW below, precomputing reciprocal argument. + vn1 := v[n-1] + rec := reciprocalWord(vn1) + + // Compute each digit of quotient. + for j := m; j >= 0; j-- { + // Compute the 2-by-1 guess q̂. + // The first iteration must invent a leading 0 for u. + qhat := Word(_M) + var ujn Word + if j+n < len(u) { + ujn = u[j+n] + } + + // ujn ≤ vn1, or else q̂ would be more than one digit. + // For ujn == vn1, we set q̂ to the max digit M above. + // Otherwise, we compute the 2-by-1 guess. + if ujn != vn1 { + var rhat Word + qhat, rhat = divWW(ujn, u[j+n-1], vn1, rec) + + // Refine q̂ to a 3-by-2 guess. See “Refining Guesses” above. + vn2 := v[n-2] + x1, x2 := mulWW(qhat, vn2) + ujn2 := u[j+n-2] + for greaterThan(x1, x2, rhat, ujn2) { // x1x2 > r̂ u[j+n-2] + qhat-- + prevRhat := rhat + rhat += vn1 + // If r̂ overflows, then + // r̂ u[j+n-2]v[n-1] is now definitely > x1 x2. + if rhat < prevRhat { + break + } + // TODO(rsc): No need for a full mulWW. + // x2 += vn2; if x2 overflows, x1++ + x1, x2 = mulWW(qhat, vn2) + } + } + + // Compute q̂·v. + qhatv[n] = mulAddVWW(qhatv[0:n], v, qhat, 0) + qhl := len(qhatv) + if j+qhl > len(u) && qhatv[n] == 0 { + qhl-- + } + + // Subtract q̂·v from the current section of u. + // If it underflows, q̂·v > u, which we fix up + // by decrementing q̂ and adding v back. + c := subVV(u[j:j+qhl], u[j:], qhatv) + if c != 0 { + c := addVV(u[j:j+n], u[j:], v) + // If n == qhl, the carry from subVV and the carry from addVV + // cancel out and don't affect u[j+n]. + if n < qhl { + u[j+n] += c + } + qhat-- + } + + // Save quotient digit. + // Caller may know the top digit is zero and not leave room for it. + if j == m && m == len(q) && qhat == 0 { + continue + } + q[j] = qhat + } + + putNat(qhatvp) +} + +// greaterThan reports whether the two digit numbers x1 x2 > y1 y2. +// TODO(rsc): In contradiction to most of this file, x1 is the high +// digit and x2 is the low digit. This should be fixed. +func greaterThan(x1, x2, y1, y2 Word) bool { + return x1 > y1 || x1 == y1 && x2 > y2 +} + +// divRecursiveThreshold is the number of divisor digits +// at which point divRecursive is faster than divBasic. +const divRecursiveThreshold = 100 + +// divRecursive implements recursive division as described above. +// It overwrites z with ⌊u/v⌋ and overwrites u with the remainder r. +// z must be large enough to hold ⌊u/v⌋. +// This function is just for allocating and freeing temporaries +// around divRecursiveStep, the real implementation. +func (z nat) divRecursive(u, v nat) { + // Recursion depth is (much) less than 2 log₂(len(v)). + // Allocate a slice of temporaries to be reused across recursion, + // plus one extra temporary not live across the recursion. + recDepth := 2 * bits.Len(uint(len(v))) + tmp := getNat(3 * len(v)) + temps := make([]*nat, recDepth) + + z.clear() + z.divRecursiveStep(u, v, 0, tmp, temps) + + // Free temporaries. + for _, n := range temps { + if n != nil { + putNat(n) + } + } + putNat(tmp) +} + +// divRecursiveStep is the actual implementation of recursive division. +// It adds ⌊u/v⌋ to z and overwrites u with the remainder r. +// z must be large enough to hold ⌊u/v⌋. +// It uses temps[depth] (allocating if needed) as a temporary live across +// the recursive call. It also uses tmp, but not live across the recursion. +func (z nat) divRecursiveStep(u, v nat, depth int, tmp *nat, temps []*nat) { + // u is a subsection of the original and may have leading zeros. + // TODO(rsc): The v = v.norm() is useless and should be removed. + // We know (and require) that v's top digit is ≥ B/2. + u = u.norm() + v = v.norm() + if len(u) == 0 { + z.clear() + return + } + + // Fall back to basic division if the problem is now small enough. + n := len(v) + if n < divRecursiveThreshold { + z.divBasic(u, v) + return + } + + // Nothing to do if u is shorter than v (implies u < v). + m := len(u) - n + if m < 0 { + return + } + + // We consider B digits in a row as a single wide digit. + // (See “Recursive Division” above.) + // + // TODO(rsc): rename B to Wide, to avoid confusion with _B, + // which is something entirely different. + // TODO(rsc): Look into whether using ⌈n/2⌉ is better than ⌊n/2⌋. + B := n / 2 + + // Allocate a nat for qhat below. + if temps[depth] == nil { + temps[depth] = getNat(n) // TODO(rsc): Can be just B+1. + } else { + *temps[depth] = temps[depth].make(B + 1) + } + + // Compute each wide digit of the quotient. + // + // TODO(rsc): Change the loop to be + // for j := (m+B-1)/B*B; j > 0; j -= B { + // which will make the final step a regular step, letting us + // delete what amounts to an extra copy of the loop body below. + j := m + for j > B { + // Divide u[j-B:j+n] (3 wide digits) by v (2 wide digits). + // First make the 2-by-1-wide-digit guess using a recursive call. + // Then extend the guess to the full 3-by-2 (see “Refining Guesses”). + // + // For the 2-by-1-wide-digit guess, instead of doing 2B-by-B-digit, + // we use a (2B+1)-by-(B+1) digit, which handles the possibility that + // the result has an extra leading 1 digit as well as guaranteeing + // that the computed q̂ will be off by at most 1 instead of 2. + + // s is the number of digits to drop from the 3B- and 2B-digit chunks. + // We drop B-1 to be left with 2B+1 and B+1. + s := (B - 1) + + // uu is the up-to-3B-digit section of u we are working on. + uu := u[j-B:] + + // Compute the 2-by-1 guess q̂, leaving r̂ in uu[s:B+n]. + qhat := *temps[depth] + qhat.clear() + qhat.divRecursiveStep(uu[s:B+n], v[s:], depth+1, tmp, temps) + qhat = qhat.norm() + + // Extend to a 3-by-2 quotient and remainder. + // Because divRecursiveStep overwrote the top part of uu with + // the remainder r̂, the full uu already contains the equivalent + // of r̂·B + uₙ₋₂ from the “Refining Guesses” discussion. + // Subtracting q̂·vₙ₋₂ from it will compute the full-length remainder. + // If that subtraction underflows, q̂·v > u, which we fix up + // by decrementing q̂ and adding v back, same as in long division. + + // TODO(rsc): Instead of subtract and fix-up, this code is computing + // q̂·vₙ₋₂ and decrementing q̂ until that product is ≤ u. + // But we can do the subtraction directly, as in the comment above + // and in long division, because we know that q̂ is wrong by at most one. + qhatv := tmp.make(3 * n) + qhatv.clear() + qhatv = qhatv.mul(qhat, v[:s]) + for i := 0; i < 2; i++ { + e := qhatv.cmp(uu.norm()) + if e <= 0 { + break + } + subVW(qhat, qhat, 1) + c := subVV(qhatv[:s], qhatv[:s], v[:s]) + if len(qhatv) > s { + subVW(qhatv[s:], qhatv[s:], c) + } + addAt(uu[s:], v[s:], 0) + } + if qhatv.cmp(uu.norm()) > 0 { + panic("impossible") + } + c := subVV(uu[:len(qhatv)], uu[:len(qhatv)], qhatv) + if c > 0 { + subVW(uu[len(qhatv):], uu[len(qhatv):], c) + } + addAt(z, qhat, j-B) + j -= B + } + + // TODO(rsc): Rewrite loop as described above and delete all this code. + + // Now u < (v<<B), compute lower bits in the same way. + // Choose shift = B-1 again. + s := B - 1 + qhat := *temps[depth] + qhat.clear() + qhat.divRecursiveStep(u[s:].norm(), v[s:], depth+1, tmp, temps) + qhat = qhat.norm() + qhatv := tmp.make(3 * n) + qhatv.clear() + qhatv = qhatv.mul(qhat, v[:s]) + // Set the correct remainder as before. + for i := 0; i < 2; i++ { + if e := qhatv.cmp(u.norm()); e > 0 { + subVW(qhat, qhat, 1) + c := subVV(qhatv[:s], qhatv[:s], v[:s]) + if len(qhatv) > s { + subVW(qhatv[s:], qhatv[s:], c) + } + addAt(u[s:], v[s:], 0) + } + } + if qhatv.cmp(u.norm()) > 0 { + panic("impossible") + } + c := subVV(u[0:len(qhatv)], u[0:len(qhatv)], qhatv) + if c > 0 { + c = subVW(u[len(qhatv):], u[len(qhatv):], c) + } + if c > 0 { + panic("impossible") + } + + // Done! + addAt(z, qhat.norm(), 0) +} diff --git a/src/math/big/prime.go b/src/math/big/prime.go new file mode 100644 index 0000000..d9a5f1e --- /dev/null +++ b/src/math/big/prime.go @@ -0,0 +1,320 @@ +// Copyright 2016 The Go Authors. All rights reserved. +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. + +package big + +import "math/rand" + +// ProbablyPrime reports whether x is probably prime, +// applying the Miller-Rabin test with n pseudorandomly chosen bases +// as well as a Baillie-PSW test. +// +// If x is prime, ProbablyPrime returns true. +// If x is chosen randomly and not prime, ProbablyPrime probably returns false. +// The probability of returning true for a randomly chosen non-prime is at most ¼ⁿ. +// +// ProbablyPrime is 100% accurate for inputs less than 2⁶⁴. +// See Menezes et al., Handbook of Applied Cryptography, 1997, pp. 145-149, +// and FIPS 186-4 Appendix F for further discussion of the error probabilities. +// +// ProbablyPrime is not suitable for judging primes that an adversary may +// have crafted to fool the test. +// +// As of Go 1.8, ProbablyPrime(0) is allowed and applies only a Baillie-PSW test. +// Before Go 1.8, ProbablyPrime applied only the Miller-Rabin tests, and ProbablyPrime(0) panicked. +func (x *Int) ProbablyPrime(n int) bool { + // Note regarding the doc comment above: + // It would be more precise to say that the Baillie-PSW test uses the + // extra strong Lucas test as its Lucas test, but since no one knows + // how to tell any of the Lucas tests apart inside a Baillie-PSW test + // (they all work equally well empirically), that detail need not be + // documented or implicitly guaranteed. + // The comment does avoid saying "the" Baillie-PSW test + // because of this general ambiguity. + + if n < 0 { + panic("negative n for ProbablyPrime") + } + if x.neg || len(x.abs) == 0 { + return false + } + + // primeBitMask records the primes < 64. + const primeBitMask uint64 = 1<<2 | 1<<3 | 1<<5 | 1<<7 | + 1<<11 | 1<<13 | 1<<17 | 1<<19 | 1<<23 | 1<<29 | 1<<31 | + 1<<37 | 1<<41 | 1<<43 | 1<<47 | 1<<53 | 1<<59 | 1<<61 + + w := x.abs[0] + if len(x.abs) == 1 && w < 64 { + return primeBitMask&(1<<w) != 0 + } + + if w&1 == 0 { + return false // x is even + } + + const primesA = 3 * 5 * 7 * 11 * 13 * 17 * 19 * 23 * 37 + const primesB = 29 * 31 * 41 * 43 * 47 * 53 + + var rA, rB uint32 + switch _W { + case 32: + rA = uint32(x.abs.modW(primesA)) + rB = uint32(x.abs.modW(primesB)) + case 64: + r := x.abs.modW((primesA * primesB) & _M) + rA = uint32(r % primesA) + rB = uint32(r % primesB) + default: + panic("math/big: invalid word size") + } + + if rA%3 == 0 || rA%5 == 0 || rA%7 == 0 || rA%11 == 0 || rA%13 == 0 || rA%17 == 0 || rA%19 == 0 || rA%23 == 0 || rA%37 == 0 || + rB%29 == 0 || rB%31 == 0 || rB%41 == 0 || rB%43 == 0 || rB%47 == 0 || rB%53 == 0 { + return false + } + + return x.abs.probablyPrimeMillerRabin(n+1, true) && x.abs.probablyPrimeLucas() +} + +// probablyPrimeMillerRabin reports whether n passes reps rounds of the +// Miller-Rabin primality test, using pseudo-randomly chosen bases. +// If force2 is true, one of the rounds is forced to use base 2. +// See Handbook of Applied Cryptography, p. 139, Algorithm 4.24. +// The number n is known to be non-zero. +func (n nat) probablyPrimeMillerRabin(reps int, force2 bool) bool { + nm1 := nat(nil).sub(n, natOne) + // determine q, k such that nm1 = q << k + k := nm1.trailingZeroBits() + q := nat(nil).shr(nm1, k) + + nm3 := nat(nil).sub(nm1, natTwo) + rand := rand.New(rand.NewSource(int64(n[0]))) + + var x, y, quotient nat + nm3Len := nm3.bitLen() + +NextRandom: + for i := 0; i < reps; i++ { + if i == reps-1 && force2 { + x = x.set(natTwo) + } else { + x = x.random(rand, nm3, nm3Len) + x = x.add(x, natTwo) + } + y = y.expNN(x, q, n) + if y.cmp(natOne) == 0 || y.cmp(nm1) == 0 { + continue + } + for j := uint(1); j < k; j++ { + y = y.sqr(y) + quotient, y = quotient.div(y, y, n) + if y.cmp(nm1) == 0 { + continue NextRandom + } + if y.cmp(natOne) == 0 { + return false + } + } + return false + } + + return true +} + +// probablyPrimeLucas reports whether n passes the "almost extra strong" Lucas probable prime test, +// using Baillie-OEIS parameter selection. This corresponds to "AESLPSP" on Jacobsen's tables (link below). +// The combination of this test and a Miller-Rabin/Fermat test with base 2 gives a Baillie-PSW test. +// +// References: +// +// Baillie and Wagstaff, "Lucas Pseudoprimes", Mathematics of Computation 35(152), +// October 1980, pp. 1391-1417, especially page 1401. +// https://www.ams.org/journals/mcom/1980-35-152/S0025-5718-1980-0583518-6/S0025-5718-1980-0583518-6.pdf +// +// Grantham, "Frobenius Pseudoprimes", Mathematics of Computation 70(234), +// March 2000, pp. 873-891. +// https://www.ams.org/journals/mcom/2001-70-234/S0025-5718-00-01197-2/S0025-5718-00-01197-2.pdf +// +// Baillie, "Extra strong Lucas pseudoprimes", OEIS A217719, https://oeis.org/A217719. +// +// Jacobsen, "Pseudoprime Statistics, Tables, and Data", http://ntheory.org/pseudoprimes.html. +// +// Nicely, "The Baillie-PSW Primality Test", http://www.trnicely.net/misc/bpsw.html. +// (Note that Nicely's definition of the "extra strong" test gives the wrong Jacobi condition, +// as pointed out by Jacobsen.) +// +// Crandall and Pomerance, Prime Numbers: A Computational Perspective, 2nd ed. +// Springer, 2005. +func (n nat) probablyPrimeLucas() bool { + // Discard 0, 1. + if len(n) == 0 || n.cmp(natOne) == 0 { + return false + } + // Two is the only even prime. + // Already checked by caller, but here to allow testing in isolation. + if n[0]&1 == 0 { + return n.cmp(natTwo) == 0 + } + + // Baillie-OEIS "method C" for choosing D, P, Q, + // as in https://oeis.org/A217719/a217719.txt: + // try increasing P ≥ 3 such that D = P² - 4 (so Q = 1) + // until Jacobi(D, n) = -1. + // The search is expected to succeed for non-square n after just a few trials. + // After more than expected failures, check whether n is square + // (which would cause Jacobi(D, n) = 1 for all D not dividing n). + p := Word(3) + d := nat{1} + t1 := nat(nil) // temp + intD := &Int{abs: d} + intN := &Int{abs: n} + for ; ; p++ { + if p > 10000 { + // This is widely believed to be impossible. + // If we get a report, we'll want the exact number n. + panic("math/big: internal error: cannot find (D/n) = -1 for " + intN.String()) + } + d[0] = p*p - 4 + j := Jacobi(intD, intN) + if j == -1 { + break + } + if j == 0 { + // d = p²-4 = (p-2)(p+2). + // If (d/n) == 0 then d shares a prime factor with n. + // Since the loop proceeds in increasing p and starts with p-2==1, + // the shared prime factor must be p+2. + // If p+2 == n, then n is prime; otherwise p+2 is a proper factor of n. + return len(n) == 1 && n[0] == p+2 + } + if p == 40 { + // We'll never find (d/n) = -1 if n is a square. + // If n is a non-square we expect to find a d in just a few attempts on average. + // After 40 attempts, take a moment to check if n is indeed a square. + t1 = t1.sqrt(n) + t1 = t1.sqr(t1) + if t1.cmp(n) == 0 { + return false + } + } + } + + // Grantham definition of "extra strong Lucas pseudoprime", after Thm 2.3 on p. 876 + // (D, P, Q above have become Δ, b, 1): + // + // Let U_n = U_n(b, 1), V_n = V_n(b, 1), and Δ = b²-4. + // An extra strong Lucas pseudoprime to base b is a composite n = 2^r s + Jacobi(Δ, n), + // where s is odd and gcd(n, 2*Δ) = 1, such that either (i) U_s ≡ 0 mod n and V_s ≡ ±2 mod n, + // or (ii) V_{2^t s} ≡ 0 mod n for some 0 ≤ t < r-1. + // + // We know gcd(n, Δ) = 1 or else we'd have found Jacobi(d, n) == 0 above. + // We know gcd(n, 2) = 1 because n is odd. + // + // Arrange s = (n - Jacobi(Δ, n)) / 2^r = (n+1) / 2^r. + s := nat(nil).add(n, natOne) + r := int(s.trailingZeroBits()) + s = s.shr(s, uint(r)) + nm2 := nat(nil).sub(n, natTwo) // n-2 + + // We apply the "almost extra strong" test, which checks the above conditions + // except for U_s ≡ 0 mod n, which allows us to avoid computing any U_k values. + // Jacobsen points out that maybe we should just do the full extra strong test: + // "It is also possible to recover U_n using Crandall and Pomerance equation 3.13: + // U_n = D^-1 (2V_{n+1} - PV_n) allowing us to run the full extra-strong test + // at the cost of a single modular inversion. This computation is easy and fast in GMP, + // so we can get the full extra-strong test at essentially the same performance as the + // almost extra strong test." + + // Compute Lucas sequence V_s(b, 1), where: + // + // V(0) = 2 + // V(1) = P + // V(k) = P V(k-1) - Q V(k-2). + // + // (Remember that due to method C above, P = b, Q = 1.) + // + // In general V(k) = α^k + β^k, where α and β are roots of x² - Px + Q. + // Crandall and Pomerance (p.147) observe that for 0 ≤ j ≤ k, + // + // V(j+k) = V(j)V(k) - V(k-j). + // + // So in particular, to quickly double the subscript: + // + // V(2k) = V(k)² - 2 + // V(2k+1) = V(k) V(k+1) - P + // + // We can therefore start with k=0 and build up to k=s in log₂(s) steps. + natP := nat(nil).setWord(p) + vk := nat(nil).setWord(2) + vk1 := nat(nil).setWord(p) + t2 := nat(nil) // temp + for i := int(s.bitLen()); i >= 0; i-- { + if s.bit(uint(i)) != 0 { + // k' = 2k+1 + // V(k') = V(2k+1) = V(k) V(k+1) - P. + t1 = t1.mul(vk, vk1) + t1 = t1.add(t1, n) + t1 = t1.sub(t1, natP) + t2, vk = t2.div(vk, t1, n) + // V(k'+1) = V(2k+2) = V(k+1)² - 2. + t1 = t1.sqr(vk1) + t1 = t1.add(t1, nm2) + t2, vk1 = t2.div(vk1, t1, n) + } else { + // k' = 2k + // V(k'+1) = V(2k+1) = V(k) V(k+1) - P. + t1 = t1.mul(vk, vk1) + t1 = t1.add(t1, n) + t1 = t1.sub(t1, natP) + t2, vk1 = t2.div(vk1, t1, n) + // V(k') = V(2k) = V(k)² - 2 + t1 = t1.sqr(vk) + t1 = t1.add(t1, nm2) + t2, vk = t2.div(vk, t1, n) + } + } + + // Now k=s, so vk = V(s). Check V(s) ≡ ±2 (mod n). + if vk.cmp(natTwo) == 0 || vk.cmp(nm2) == 0 { + // Check U(s) ≡ 0. + // As suggested by Jacobsen, apply Crandall and Pomerance equation 3.13: + // + // U(k) = D⁻¹ (2 V(k+1) - P V(k)) + // + // Since we are checking for U(k) == 0 it suffices to check 2 V(k+1) == P V(k) mod n, + // or P V(k) - 2 V(k+1) == 0 mod n. + t1 := t1.mul(vk, natP) + t2 := t2.shl(vk1, 1) + if t1.cmp(t2) < 0 { + t1, t2 = t2, t1 + } + t1 = t1.sub(t1, t2) + t3 := vk1 // steal vk1, no longer needed below + vk1 = nil + _ = vk1 + t2, t3 = t2.div(t3, t1, n) + if len(t3) == 0 { + return true + } + } + + // Check V(2^t s) ≡ 0 mod n for some 0 ≤ t < r-1. + for t := 0; t < r-1; t++ { + if len(vk) == 0 { // vk == 0 + return true + } + // Optimization: V(k) = 2 is a fixed point for V(k') = V(k)² - 2, + // so if V(k) = 2, we can stop: we will never find a future V(k) == 0. + if len(vk) == 1 && vk[0] == 2 { // vk == 2 + return false + } + // k' = 2k + // V(k') = V(2k) = V(k)² - 2 + t1 = t1.sqr(vk) + t1 = t1.sub(t1, natTwo) + t2, vk = t2.div(vk, t1, n) + } + return false +} diff --git a/src/math/big/prime_test.go b/src/math/big/prime_test.go new file mode 100644 index 0000000..8596e33 --- /dev/null +++ b/src/math/big/prime_test.go @@ -0,0 +1,222 @@ +// Copyright 2016 The Go Authors. All rights reserved. +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. + +package big + +import ( + "fmt" + "strings" + "testing" + "unicode" +) + +var primes = []string{ + "2", + "3", + "5", + "7", + "11", + + "13756265695458089029", + "13496181268022124907", + "10953742525620032441", + "17908251027575790097", + + // https://golang.org/issue/638 + "18699199384836356663", + + "98920366548084643601728869055592650835572950932266967461790948584315647051443", + "94560208308847015747498523884063394671606671904944666360068158221458669711639", + + // https://primes.utm.edu/lists/small/small3.html + "449417999055441493994709297093108513015373787049558499205492347871729927573118262811508386655998299074566974373711472560655026288668094291699357843464363003144674940345912431129144354948751003607115263071543163", + "230975859993204150666423538988557839555560243929065415434980904258310530753006723857139742334640122533598517597674807096648905501653461687601339782814316124971547968912893214002992086353183070342498989426570593", + "5521712099665906221540423207019333379125265462121169655563495403888449493493629943498064604536961775110765377745550377067893607246020694972959780839151452457728855382113555867743022746090187341871655890805971735385789993", + "203956878356401977405765866929034577280193993314348263094772646453283062722701277632936616063144088173312372882677123879538709400158306567338328279154499698366071906766440037074217117805690872792848149112022286332144876183376326512083574821647933992961249917319836219304274280243803104015000563790123", + + // ECC primes: https://tools.ietf.org/html/draft-ladd-safecurves-02 + "3618502788666131106986593281521497120414687020801267626233049500247285301239", // Curve1174: 2^251-9 + "57896044618658097711785492504343953926634992332820282019728792003956564819949", // Curve25519: 2^255-19 + "9850501549098619803069760025035903451269934817616361666987073351061430442874302652853566563721228910201656997576599", // E-382: 2^382-105 + "42307582002575910332922579714097346549017899709713998034217522897561970639123926132812109468141778230245837569601494931472367", // Curve41417: 2^414-17 + "6864797660130609714981900799081393217269435300143305409394463459185543183397656052122559640661454554977296311391480858037121987999716643812574028291115057151", // E-521: 2^521-1 +} + +var composites = []string{ + "0", + "1", + "21284175091214687912771199898307297748211672914763848041968395774954376176754", + "6084766654921918907427900243509372380954290099172559290432744450051395395951", + "84594350493221918389213352992032324280367711247940675652888030554255915464401", + "82793403787388584738507275144194252681", + + // Arnault, "Rabin-Miller Primality Test: Composite Numbers Which Pass It", + // Mathematics of Computation, 64(209) (January 1995), pp. 335-361. + "1195068768795265792518361315725116351898245581", // strong pseudoprime to prime bases 2 through 29 + // strong pseudoprime to all prime bases up to 200 + ` + 80383745745363949125707961434194210813883768828755814583748891752229 + 74273765333652186502336163960045457915042023603208766569966760987284 + 0439654082329287387918508691668573282677617710293896977394701670823 + 0428687109997439976544144845341155872450633409279022275296229414984 + 2306881685404326457534018329786111298960644845216191652872597534901`, + + // Extra-strong Lucas pseudoprimes. https://oeis.org/A217719 + "989", + "3239", + "5777", + "10877", + "27971", + "29681", + "30739", + "31631", + "39059", + "72389", + "73919", + "75077", + "100127", + "113573", + "125249", + "137549", + "137801", + "153931", + "155819", + "161027", + "162133", + "189419", + "218321", + "231703", + "249331", + "370229", + "429479", + "430127", + "459191", + "473891", + "480689", + "600059", + "621781", + "632249", + "635627", + + "3673744903", + "3281593591", + "2385076987", + "2738053141", + "2009621503", + "1502682721", + "255866131", + "117987841", + "587861", + + "6368689", + "8725753", + "80579735209", + "105919633", +} + +func cutSpace(r rune) rune { + if unicode.IsSpace(r) { + return -1 + } + return r +} + +func TestProbablyPrime(t *testing.T) { + nreps := 20 + if testing.Short() { + nreps = 1 + } + for i, s := range primes { + p, _ := new(Int).SetString(s, 10) + if !p.ProbablyPrime(nreps) || nreps != 1 && !p.ProbablyPrime(1) || !p.ProbablyPrime(0) { + t.Errorf("#%d prime found to be non-prime (%s)", i, s) + } + } + + for i, s := range composites { + s = strings.Map(cutSpace, s) + c, _ := new(Int).SetString(s, 10) + if c.ProbablyPrime(nreps) || nreps != 1 && c.ProbablyPrime(1) || c.ProbablyPrime(0) { + t.Errorf("#%d composite found to be prime (%s)", i, s) + } + } + + // check that ProbablyPrime panics if n <= 0 + c := NewInt(11) // a prime + for _, n := range []int{-1, 0, 1} { + func() { + defer func() { + if n < 0 && recover() == nil { + t.Fatalf("expected panic from ProbablyPrime(%d)", n) + } + }() + if !c.ProbablyPrime(n) { + t.Fatalf("%v should be a prime", c) + } + }() + } +} + +func BenchmarkProbablyPrime(b *testing.B) { + p, _ := new(Int).SetString("203956878356401977405765866929034577280193993314348263094772646453283062722701277632936616063144088173312372882677123879538709400158306567338328279154499698366071906766440037074217117805690872792848149112022286332144876183376326512083574821647933992961249917319836219304274280243803104015000563790123", 10) + for _, n := range []int{0, 1, 5, 10, 20} { + b.Run(fmt.Sprintf("n=%d", n), func(b *testing.B) { + for i := 0; i < b.N; i++ { + p.ProbablyPrime(n) + } + }) + } + + b.Run("Lucas", func(b *testing.B) { + for i := 0; i < b.N; i++ { + p.abs.probablyPrimeLucas() + } + }) + b.Run("MillerRabinBase2", func(b *testing.B) { + for i := 0; i < b.N; i++ { + p.abs.probablyPrimeMillerRabin(1, true) + } + }) +} + +func TestMillerRabinPseudoprimes(t *testing.T) { + testPseudoprimes(t, "probablyPrimeMillerRabin", + func(n nat) bool { return n.probablyPrimeMillerRabin(1, true) && !n.probablyPrimeLucas() }, + // https://oeis.org/A001262 + []int{2047, 3277, 4033, 4681, 8321, 15841, 29341, 42799, 49141, 52633, 65281, 74665, 80581, 85489, 88357, 90751}) +} + +func TestLucasPseudoprimes(t *testing.T) { + testPseudoprimes(t, "probablyPrimeLucas", + func(n nat) bool { return n.probablyPrimeLucas() && !n.probablyPrimeMillerRabin(1, true) }, + // https://oeis.org/A217719 + []int{989, 3239, 5777, 10877, 27971, 29681, 30739, 31631, 39059, 72389, 73919, 75077}) +} + +func testPseudoprimes(t *testing.T, name string, cond func(nat) bool, want []int) { + n := nat{1} + for i := 3; i < 100000; i += 2 { + if testing.Short() { + if len(want) == 0 { + break + } + if i < want[0]-2 { + i = want[0] - 2 + } + } + n[0] = Word(i) + pseudo := cond(n) + if pseudo && (len(want) == 0 || i != want[0]) { + t.Errorf("%s(%v, base=2) = true, want false", name, i) + } else if !pseudo && len(want) >= 1 && i == want[0] { + t.Errorf("%s(%v, base=2) = false, want true", name, i) + } + if len(want) > 0 && i == want[0] { + want = want[1:] + } + } + if len(want) > 0 { + t.Fatalf("forgot to test %v", want) + } +} diff --git a/src/math/big/rat.go b/src/math/big/rat.go new file mode 100644 index 0000000..731a979 --- /dev/null +++ b/src/math/big/rat.go @@ -0,0 +1,544 @@ +// Copyright 2010 The Go Authors. All rights reserved. +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. + +// This file implements multi-precision rational numbers. + +package big + +import ( + "fmt" + "math" +) + +// A Rat represents a quotient a/b of arbitrary precision. +// The zero value for a Rat represents the value 0. +// +// Operations always take pointer arguments (*Rat) rather +// than Rat values, and each unique Rat value requires +// its own unique *Rat pointer. To "copy" a Rat value, +// an existing (or newly allocated) Rat must be set to +// a new value using the Rat.Set method; shallow copies +// of Rats are not supported and may lead to errors. +type Rat struct { + // To make zero values for Rat work w/o initialization, + // a zero value of b (len(b) == 0) acts like b == 1. At + // the earliest opportunity (when an assignment to the Rat + // is made), such uninitialized denominators are set to 1. + // a.neg determines the sign of the Rat, b.neg is ignored. + a, b Int +} + +// NewRat creates a new Rat with numerator a and denominator b. +func NewRat(a, b int64) *Rat { + return new(Rat).SetFrac64(a, b) +} + +// SetFloat64 sets z to exactly f and returns z. +// If f is not finite, SetFloat returns nil. +func (z *Rat) SetFloat64(f float64) *Rat { + const expMask = 1<<11 - 1 + bits := math.Float64bits(f) + mantissa := bits & (1<<52 - 1) + exp := int((bits >> 52) & expMask) + switch exp { + case expMask: // non-finite + return nil + case 0: // denormal + exp -= 1022 + default: // normal + mantissa |= 1 << 52 + exp -= 1023 + } + + shift := 52 - exp + + // Optimization (?): partially pre-normalise. + for mantissa&1 == 0 && shift > 0 { + mantissa >>= 1 + shift-- + } + + z.a.SetUint64(mantissa) + z.a.neg = f < 0 + z.b.Set(intOne) + if shift > 0 { + z.b.Lsh(&z.b, uint(shift)) + } else { + z.a.Lsh(&z.a, uint(-shift)) + } + return z.norm() +} + +// quotToFloat32 returns the non-negative float32 value +// nearest to the quotient a/b, using round-to-even in +// halfway cases. It does not mutate its arguments. +// Preconditions: b is non-zero; a and b have no common factors. +func quotToFloat32(a, b nat) (f float32, exact bool) { + const ( + // float size in bits + Fsize = 32 + + // mantissa + Msize = 23 + Msize1 = Msize + 1 // incl. implicit 1 + Msize2 = Msize1 + 1 + + // exponent + Esize = Fsize - Msize1 + Ebias = 1<<(Esize-1) - 1 + Emin = 1 - Ebias + Emax = Ebias + ) + + // TODO(adonovan): specialize common degenerate cases: 1.0, integers. + alen := a.bitLen() + if alen == 0 { + return 0, true + } + blen := b.bitLen() + if blen == 0 { + panic("division by zero") + } + + // 1. Left-shift A or B such that quotient A/B is in [1<<Msize1, 1<<(Msize2+1) + // (Msize2 bits if A < B when they are left-aligned, Msize2+1 bits if A >= B). + // This is 2 or 3 more than the float32 mantissa field width of Msize: + // - the optional extra bit is shifted away in step 3 below. + // - the high-order 1 is omitted in "normal" representation; + // - the low-order 1 will be used during rounding then discarded. + exp := alen - blen + var a2, b2 nat + a2 = a2.set(a) + b2 = b2.set(b) + if shift := Msize2 - exp; shift > 0 { + a2 = a2.shl(a2, uint(shift)) + } else if shift < 0 { + b2 = b2.shl(b2, uint(-shift)) + } + + // 2. Compute quotient and remainder (q, r). NB: due to the + // extra shift, the low-order bit of q is logically the + // high-order bit of r. + var q nat + q, r := q.div(a2, a2, b2) // (recycle a2) + mantissa := low32(q) + haveRem := len(r) > 0 // mantissa&1 && !haveRem => remainder is exactly half + + // 3. If quotient didn't fit in Msize2 bits, redo division by b2<<1 + // (in effect---we accomplish this incrementally). + if mantissa>>Msize2 == 1 { + if mantissa&1 == 1 { + haveRem = true + } + mantissa >>= 1 + exp++ + } + if mantissa>>Msize1 != 1 { + panic(fmt.Sprintf("expected exactly %d bits of result", Msize2)) + } + + // 4. Rounding. + if Emin-Msize <= exp && exp <= Emin { + // Denormal case; lose 'shift' bits of precision. + shift := uint(Emin - (exp - 1)) // [1..Esize1) + lostbits := mantissa & (1<<shift - 1) + haveRem = haveRem || lostbits != 0 + mantissa >>= shift + exp = 2 - Ebias // == exp + shift + } + // Round q using round-half-to-even. + exact = !haveRem + if mantissa&1 != 0 { + exact = false + if haveRem || mantissa&2 != 0 { + if mantissa++; mantissa >= 1<<Msize2 { + // Complete rollover 11...1 => 100...0, so shift is safe + mantissa >>= 1 + exp++ + } + } + } + mantissa >>= 1 // discard rounding bit. Mantissa now scaled by 1<<Msize1. + + f = float32(math.Ldexp(float64(mantissa), exp-Msize1)) + if math.IsInf(float64(f), 0) { + exact = false + } + return +} + +// quotToFloat64 returns the non-negative float64 value +// nearest to the quotient a/b, using round-to-even in +// halfway cases. It does not mutate its arguments. +// Preconditions: b is non-zero; a and b have no common factors. +func quotToFloat64(a, b nat) (f float64, exact bool) { + const ( + // float size in bits + Fsize = 64 + + // mantissa + Msize = 52 + Msize1 = Msize + 1 // incl. implicit 1 + Msize2 = Msize1 + 1 + + // exponent + Esize = Fsize - Msize1 + Ebias = 1<<(Esize-1) - 1 + Emin = 1 - Ebias + Emax = Ebias + ) + + // TODO(adonovan): specialize common degenerate cases: 1.0, integers. + alen := a.bitLen() + if alen == 0 { + return 0, true + } + blen := b.bitLen() + if blen == 0 { + panic("division by zero") + } + + // 1. Left-shift A or B such that quotient A/B is in [1<<Msize1, 1<<(Msize2+1) + // (Msize2 bits if A < B when they are left-aligned, Msize2+1 bits if A >= B). + // This is 2 or 3 more than the float64 mantissa field width of Msize: + // - the optional extra bit is shifted away in step 3 below. + // - the high-order 1 is omitted in "normal" representation; + // - the low-order 1 will be used during rounding then discarded. + exp := alen - blen + var a2, b2 nat + a2 = a2.set(a) + b2 = b2.set(b) + if shift := Msize2 - exp; shift > 0 { + a2 = a2.shl(a2, uint(shift)) + } else if shift < 0 { + b2 = b2.shl(b2, uint(-shift)) + } + + // 2. Compute quotient and remainder (q, r). NB: due to the + // extra shift, the low-order bit of q is logically the + // high-order bit of r. + var q nat + q, r := q.div(a2, a2, b2) // (recycle a2) + mantissa := low64(q) + haveRem := len(r) > 0 // mantissa&1 && !haveRem => remainder is exactly half + + // 3. If quotient didn't fit in Msize2 bits, redo division by b2<<1 + // (in effect---we accomplish this incrementally). + if mantissa>>Msize2 == 1 { + if mantissa&1 == 1 { + haveRem = true + } + mantissa >>= 1 + exp++ + } + if mantissa>>Msize1 != 1 { + panic(fmt.Sprintf("expected exactly %d bits of result", Msize2)) + } + + // 4. Rounding. + if Emin-Msize <= exp && exp <= Emin { + // Denormal case; lose 'shift' bits of precision. + shift := uint(Emin - (exp - 1)) // [1..Esize1) + lostbits := mantissa & (1<<shift - 1) + haveRem = haveRem || lostbits != 0 + mantissa >>= shift + exp = 2 - Ebias // == exp + shift + } + // Round q using round-half-to-even. + exact = !haveRem + if mantissa&1 != 0 { + exact = false + if haveRem || mantissa&2 != 0 { + if mantissa++; mantissa >= 1<<Msize2 { + // Complete rollover 11...1 => 100...0, so shift is safe + mantissa >>= 1 + exp++ + } + } + } + mantissa >>= 1 // discard rounding bit. Mantissa now scaled by 1<<Msize1. + + f = math.Ldexp(float64(mantissa), exp-Msize1) + if math.IsInf(f, 0) { + exact = false + } + return +} + +// Float32 returns the nearest float32 value for x and a bool indicating +// whether f represents x exactly. If the magnitude of x is too large to +// be represented by a float32, f is an infinity and exact is false. +// The sign of f always matches the sign of x, even if f == 0. +func (x *Rat) Float32() (f float32, exact bool) { + b := x.b.abs + if len(b) == 0 { + b = natOne + } + f, exact = quotToFloat32(x.a.abs, b) + if x.a.neg { + f = -f + } + return +} + +// Float64 returns the nearest float64 value for x and a bool indicating +// whether f represents x exactly. If the magnitude of x is too large to +// be represented by a float64, f is an infinity and exact is false. +// The sign of f always matches the sign of x, even if f == 0. +func (x *Rat) Float64() (f float64, exact bool) { + b := x.b.abs + if len(b) == 0 { + b = natOne + } + f, exact = quotToFloat64(x.a.abs, b) + if x.a.neg { + f = -f + } + return +} + +// SetFrac sets z to a/b and returns z. +// If b == 0, SetFrac panics. +func (z *Rat) SetFrac(a, b *Int) *Rat { + z.a.neg = a.neg != b.neg + babs := b.abs + if len(babs) == 0 { + panic("division by zero") + } + if &z.a == b || alias(z.a.abs, babs) { + babs = nat(nil).set(babs) // make a copy + } + z.a.abs = z.a.abs.set(a.abs) + z.b.abs = z.b.abs.set(babs) + return z.norm() +} + +// SetFrac64 sets z to a/b and returns z. +// If b == 0, SetFrac64 panics. +func (z *Rat) SetFrac64(a, b int64) *Rat { + if b == 0 { + panic("division by zero") + } + z.a.SetInt64(a) + if b < 0 { + b = -b + z.a.neg = !z.a.neg + } + z.b.abs = z.b.abs.setUint64(uint64(b)) + return z.norm() +} + +// SetInt sets z to x (by making a copy of x) and returns z. +func (z *Rat) SetInt(x *Int) *Rat { + z.a.Set(x) + z.b.abs = z.b.abs.setWord(1) + return z +} + +// SetInt64 sets z to x and returns z. +func (z *Rat) SetInt64(x int64) *Rat { + z.a.SetInt64(x) + z.b.abs = z.b.abs.setWord(1) + return z +} + +// SetUint64 sets z to x and returns z. +func (z *Rat) SetUint64(x uint64) *Rat { + z.a.SetUint64(x) + z.b.abs = z.b.abs.setWord(1) + return z +} + +// Set sets z to x (by making a copy of x) and returns z. +func (z *Rat) Set(x *Rat) *Rat { + if z != x { + z.a.Set(&x.a) + z.b.Set(&x.b) + } + if len(z.b.abs) == 0 { + z.b.abs = z.b.abs.setWord(1) + } + return z +} + +// Abs sets z to |x| (the absolute value of x) and returns z. +func (z *Rat) Abs(x *Rat) *Rat { + z.Set(x) + z.a.neg = false + return z +} + +// Neg sets z to -x and returns z. +func (z *Rat) Neg(x *Rat) *Rat { + z.Set(x) + z.a.neg = len(z.a.abs) > 0 && !z.a.neg // 0 has no sign + return z +} + +// Inv sets z to 1/x and returns z. +// If x == 0, Inv panics. +func (z *Rat) Inv(x *Rat) *Rat { + if len(x.a.abs) == 0 { + panic("division by zero") + } + z.Set(x) + z.a.abs, z.b.abs = z.b.abs, z.a.abs + return z +} + +// Sign returns: +// +// -1 if x < 0 +// 0 if x == 0 +// +1 if x > 0 +// +func (x *Rat) Sign() int { + return x.a.Sign() +} + +// IsInt reports whether the denominator of x is 1. +func (x *Rat) IsInt() bool { + return len(x.b.abs) == 0 || x.b.abs.cmp(natOne) == 0 +} + +// Num returns the numerator of x; it may be <= 0. +// The result is a reference to x's numerator; it +// may change if a new value is assigned to x, and vice versa. +// The sign of the numerator corresponds to the sign of x. +func (x *Rat) Num() *Int { + return &x.a +} + +// Denom returns the denominator of x; it is always > 0. +// The result is a reference to x's denominator, unless +// x is an uninitialized (zero value) Rat, in which case +// the result is a new Int of value 1. (To initialize x, +// any operation that sets x will do, including x.Set(x).) +// If the result is a reference to x's denominator it +// may change if a new value is assigned to x, and vice versa. +func (x *Rat) Denom() *Int { + // Note that x.b.neg is guaranteed false. + if len(x.b.abs) == 0 { + // Note: If this proves problematic, we could + // panic instead and require the Rat to + // be explicitly initialized. + return &Int{abs: nat{1}} + } + return &x.b +} + +func (z *Rat) norm() *Rat { + switch { + case len(z.a.abs) == 0: + // z == 0; normalize sign and denominator + z.a.neg = false + fallthrough + case len(z.b.abs) == 0: + // z is integer; normalize denominator + z.b.abs = z.b.abs.setWord(1) + default: + // z is fraction; normalize numerator and denominator + neg := z.a.neg + z.a.neg = false + z.b.neg = false + if f := NewInt(0).lehmerGCD(nil, nil, &z.a, &z.b); f.Cmp(intOne) != 0 { + z.a.abs, _ = z.a.abs.div(nil, z.a.abs, f.abs) + z.b.abs, _ = z.b.abs.div(nil, z.b.abs, f.abs) + } + z.a.neg = neg + } + return z +} + +// mulDenom sets z to the denominator product x*y (by taking into +// account that 0 values for x or y must be interpreted as 1) and +// returns z. +func mulDenom(z, x, y nat) nat { + switch { + case len(x) == 0 && len(y) == 0: + return z.setWord(1) + case len(x) == 0: + return z.set(y) + case len(y) == 0: + return z.set(x) + } + return z.mul(x, y) +} + +// scaleDenom sets z to the product x*f. +// If f == 0 (zero value of denominator), z is set to (a copy of) x. +func (z *Int) scaleDenom(x *Int, f nat) { + if len(f) == 0 { + z.Set(x) + return + } + z.abs = z.abs.mul(x.abs, f) + z.neg = x.neg +} + +// Cmp compares x and y and returns: +// +// -1 if x < y +// 0 if x == y +// +1 if x > y +// +func (x *Rat) Cmp(y *Rat) int { + var a, b Int + a.scaleDenom(&x.a, y.b.abs) + b.scaleDenom(&y.a, x.b.abs) + return a.Cmp(&b) +} + +// Add sets z to the sum x+y and returns z. +func (z *Rat) Add(x, y *Rat) *Rat { + var a1, a2 Int + a1.scaleDenom(&x.a, y.b.abs) + a2.scaleDenom(&y.a, x.b.abs) + z.a.Add(&a1, &a2) + z.b.abs = mulDenom(z.b.abs, x.b.abs, y.b.abs) + return z.norm() +} + +// Sub sets z to the difference x-y and returns z. +func (z *Rat) Sub(x, y *Rat) *Rat { + var a1, a2 Int + a1.scaleDenom(&x.a, y.b.abs) + a2.scaleDenom(&y.a, x.b.abs) + z.a.Sub(&a1, &a2) + z.b.abs = mulDenom(z.b.abs, x.b.abs, y.b.abs) + return z.norm() +} + +// Mul sets z to the product x*y and returns z. +func (z *Rat) Mul(x, y *Rat) *Rat { + if x == y { + // a squared Rat is positive and can't be reduced (no need to call norm()) + z.a.neg = false + z.a.abs = z.a.abs.sqr(x.a.abs) + if len(x.b.abs) == 0 { + z.b.abs = z.b.abs.setWord(1) + } else { + z.b.abs = z.b.abs.sqr(x.b.abs) + } + return z + } + z.a.Mul(&x.a, &y.a) + z.b.abs = mulDenom(z.b.abs, x.b.abs, y.b.abs) + return z.norm() +} + +// Quo sets z to the quotient x/y and returns z. +// If y == 0, Quo panics. +func (z *Rat) Quo(x, y *Rat) *Rat { + if len(y.a.abs) == 0 { + panic("division by zero") + } + var a, b Int + a.scaleDenom(&x.a, y.b.abs) + b.scaleDenom(&y.a, x.b.abs) + z.a.abs = a.abs + z.b.abs = b.abs + z.a.neg = a.neg != b.neg + return z.norm() +} diff --git a/src/math/big/rat_test.go b/src/math/big/rat_test.go new file mode 100644 index 0000000..d98c89b --- /dev/null +++ b/src/math/big/rat_test.go @@ -0,0 +1,746 @@ +// Copyright 2010 The Go Authors. All rights reserved. +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. + +package big + +import ( + "math" + "testing" +) + +func TestZeroRat(t *testing.T) { + var x, y, z Rat + y.SetFrac64(0, 42) + + if x.Cmp(&y) != 0 { + t.Errorf("x and y should be both equal and zero") + } + + if s := x.String(); s != "0/1" { + t.Errorf("got x = %s, want 0/1", s) + } + + if s := x.RatString(); s != "0" { + t.Errorf("got x = %s, want 0", s) + } + + z.Add(&x, &y) + if s := z.RatString(); s != "0" { + t.Errorf("got x+y = %s, want 0", s) + } + + z.Sub(&x, &y) + if s := z.RatString(); s != "0" { + t.Errorf("got x-y = %s, want 0", s) + } + + z.Mul(&x, &y) + if s := z.RatString(); s != "0" { + t.Errorf("got x*y = %s, want 0", s) + } + + // check for division by zero + defer func() { + if s := recover(); s == nil || s.(string) != "division by zero" { + panic(s) + } + }() + z.Quo(&x, &y) +} + +func TestRatSign(t *testing.T) { + zero := NewRat(0, 1) + for _, a := range setStringTests { + x, ok := new(Rat).SetString(a.in) + if !ok { + continue + } + s := x.Sign() + e := x.Cmp(zero) + if s != e { + t.Errorf("got %d; want %d for z = %v", s, e, &x) + } + } +} + +var ratCmpTests = []struct { + rat1, rat2 string + out int +}{ + {"0", "0/1", 0}, + {"1/1", "1", 0}, + {"-1", "-2/2", 0}, + {"1", "0", 1}, + {"0/1", "1/1", -1}, + {"-5/1434770811533343057144", "-5/1434770811533343057145", -1}, + {"49832350382626108453/8964749413", "49832350382626108454/8964749413", -1}, + {"-37414950961700930/7204075375675961", "37414950961700930/7204075375675961", -1}, + {"37414950961700930/7204075375675961", "74829901923401860/14408150751351922", 0}, +} + +func TestRatCmp(t *testing.T) { + for i, test := range ratCmpTests { + x, _ := new(Rat).SetString(test.rat1) + y, _ := new(Rat).SetString(test.rat2) + + out := x.Cmp(y) + if out != test.out { + t.Errorf("#%d got out = %v; want %v", i, out, test.out) + } + } +} + +func TestIsInt(t *testing.T) { + one := NewInt(1) + for _, a := range setStringTests { + x, ok := new(Rat).SetString(a.in) + if !ok { + continue + } + i := x.IsInt() + e := x.Denom().Cmp(one) == 0 + if i != e { + t.Errorf("got IsInt(%v) == %v; want %v", x, i, e) + } + } +} + +func TestRatAbs(t *testing.T) { + zero := new(Rat) + for _, a := range setStringTests { + x, ok := new(Rat).SetString(a.in) + if !ok { + continue + } + e := new(Rat).Set(x) + if e.Cmp(zero) < 0 { + e.Sub(zero, e) + } + z := new(Rat).Abs(x) + if z.Cmp(e) != 0 { + t.Errorf("got Abs(%v) = %v; want %v", x, z, e) + } + } +} + +func TestRatNeg(t *testing.T) { + zero := new(Rat) + for _, a := range setStringTests { + x, ok := new(Rat).SetString(a.in) + if !ok { + continue + } + e := new(Rat).Sub(zero, x) + z := new(Rat).Neg(x) + if z.Cmp(e) != 0 { + t.Errorf("got Neg(%v) = %v; want %v", x, z, e) + } + } +} + +func TestRatInv(t *testing.T) { + zero := new(Rat) + for _, a := range setStringTests { + x, ok := new(Rat).SetString(a.in) + if !ok { + continue + } + if x.Cmp(zero) == 0 { + continue // avoid division by zero + } + e := new(Rat).SetFrac(x.Denom(), x.Num()) + z := new(Rat).Inv(x) + if z.Cmp(e) != 0 { + t.Errorf("got Inv(%v) = %v; want %v", x, z, e) + } + } +} + +type ratBinFun func(z, x, y *Rat) *Rat +type ratBinArg struct { + x, y, z string +} + +func testRatBin(t *testing.T, i int, name string, f ratBinFun, a ratBinArg) { + x, _ := new(Rat).SetString(a.x) + y, _ := new(Rat).SetString(a.y) + z, _ := new(Rat).SetString(a.z) + out := f(new(Rat), x, y) + + if out.Cmp(z) != 0 { + t.Errorf("%s #%d got %s want %s", name, i, out, z) + } +} + +var ratBinTests = []struct { + x, y string + sum, prod string +}{ + {"0", "0", "0", "0"}, + {"0", "1", "1", "0"}, + {"-1", "0", "-1", "0"}, + {"-1", "1", "0", "-1"}, + {"1", "1", "2", "1"}, + {"1/2", "1/2", "1", "1/4"}, + {"1/4", "1/3", "7/12", "1/12"}, + {"2/5", "-14/3", "-64/15", "-28/15"}, + {"4707/49292519774798173060", "-3367/70976135186689855734", "84058377121001851123459/1749296273614329067191168098769082663020", "-1760941/388732505247628681598037355282018369560"}, + {"-61204110018146728334/3", "-31052192278051565633/2", "-215564796870448153567/6", "950260896245257153059642991192710872711/3"}, + {"-854857841473707320655/4237645934602118692642972629634714039", "-18/31750379913563777419", "-27/133467566250814981", "15387441146526731771790/134546868362786310073779084329032722548987800600710485341"}, + {"618575745270541348005638912139/19198433543745179392300736", "-19948846211000086/637313996471", "27674141753240653/30123979153216", "-6169936206128396568797607742807090270137721977/6117715203873571641674006593837351328"}, + {"-3/26206484091896184128", "5/2848423294177090248", "15310893822118706237/9330894968229805033368778458685147968", "-5/24882386581946146755650075889827061248"}, + {"26946729/330400702820", "41563965/225583428284", "1238218672302860271/4658307703098666660055", "224002580204097/14906584649915733312176"}, + {"-8259900599013409474/7", "-84829337473700364773/56707961321161574960", "-468402123685491748914621885145127724451/396955729248131024720", "350340947706464153265156004876107029701/198477864624065512360"}, + {"575775209696864/1320203974639986246357", "29/712593081308", "410331716733912717985762465/940768218243776489278275419794956", "808/45524274987585732633"}, + {"1786597389946320496771/2066653520653241", "6269770/1992362624741777", "3559549865190272133656109052308126637/4117523232840525481453983149257", "8967230/3296219033"}, + {"-36459180403360509753/32150500941194292113930", "9381566963714/9633539", "301622077145533298008420642898530153/309723104686531919656937098270", "-3784609207827/3426986245"}, +} + +func TestRatBin(t *testing.T) { + for i, test := range ratBinTests { + arg := ratBinArg{test.x, test.y, test.sum} + testRatBin(t, i, "Add", (*Rat).Add, arg) + + arg = ratBinArg{test.y, test.x, test.sum} + testRatBin(t, i, "Add symmetric", (*Rat).Add, arg) + + arg = ratBinArg{test.sum, test.x, test.y} + testRatBin(t, i, "Sub", (*Rat).Sub, arg) + + arg = ratBinArg{test.sum, test.y, test.x} + testRatBin(t, i, "Sub symmetric", (*Rat).Sub, arg) + + arg = ratBinArg{test.x, test.y, test.prod} + testRatBin(t, i, "Mul", (*Rat).Mul, arg) + + arg = ratBinArg{test.y, test.x, test.prod} + testRatBin(t, i, "Mul symmetric", (*Rat).Mul, arg) + + if test.x != "0" { + arg = ratBinArg{test.prod, test.x, test.y} + testRatBin(t, i, "Quo", (*Rat).Quo, arg) + } + + if test.y != "0" { + arg = ratBinArg{test.prod, test.y, test.x} + testRatBin(t, i, "Quo symmetric", (*Rat).Quo, arg) + } + } +} + +func TestIssue820(t *testing.T) { + x := NewRat(3, 1) + y := NewRat(2, 1) + z := y.Quo(x, y) + q := NewRat(3, 2) + if z.Cmp(q) != 0 { + t.Errorf("got %s want %s", z, q) + } + + y = NewRat(3, 1) + x = NewRat(2, 1) + z = y.Quo(x, y) + q = NewRat(2, 3) + if z.Cmp(q) != 0 { + t.Errorf("got %s want %s", z, q) + } + + x = NewRat(3, 1) + z = x.Quo(x, x) + q = NewRat(3, 3) + if z.Cmp(q) != 0 { + t.Errorf("got %s want %s", z, q) + } +} + +var setFrac64Tests = []struct { + a, b int64 + out string +}{ + {0, 1, "0"}, + {0, -1, "0"}, + {1, 1, "1"}, + {-1, 1, "-1"}, + {1, -1, "-1"}, + {-1, -1, "1"}, + {-9223372036854775808, -9223372036854775808, "1"}, +} + +func TestRatSetFrac64Rat(t *testing.T) { + for i, test := range setFrac64Tests { + x := new(Rat).SetFrac64(test.a, test.b) + if x.RatString() != test.out { + t.Errorf("#%d got %s want %s", i, x.RatString(), test.out) + } + } +} + +func TestIssue2379(t *testing.T) { + // 1) no aliasing + q := NewRat(3, 2) + x := new(Rat) + x.SetFrac(NewInt(3), NewInt(2)) + if x.Cmp(q) != 0 { + t.Errorf("1) got %s want %s", x, q) + } + + // 2) aliasing of numerator + x = NewRat(2, 3) + x.SetFrac(NewInt(3), x.Num()) + if x.Cmp(q) != 0 { + t.Errorf("2) got %s want %s", x, q) + } + + // 3) aliasing of denominator + x = NewRat(2, 3) + x.SetFrac(x.Denom(), NewInt(2)) + if x.Cmp(q) != 0 { + t.Errorf("3) got %s want %s", x, q) + } + + // 4) aliasing of numerator and denominator + x = NewRat(2, 3) + x.SetFrac(x.Denom(), x.Num()) + if x.Cmp(q) != 0 { + t.Errorf("4) got %s want %s", x, q) + } + + // 5) numerator and denominator are the same + q = NewRat(1, 1) + x = new(Rat) + n := NewInt(7) + x.SetFrac(n, n) + if x.Cmp(q) != 0 { + t.Errorf("5) got %s want %s", x, q) + } +} + +func TestIssue3521(t *testing.T) { + a := new(Int) + b := new(Int) + a.SetString("64375784358435883458348587", 0) + b.SetString("4789759874531", 0) + + // 0) a raw zero value has 1 as denominator + zero := new(Rat) + one := NewInt(1) + if zero.Denom().Cmp(one) != 0 { + t.Errorf("0) got %s want %s", zero.Denom(), one) + } + + // 1a) the denominator of an (uninitialized) zero value is not shared with the value + s := &zero.b + d := zero.Denom() + if d == s { + t.Errorf("1a) got %s (%p) == %s (%p) want different *Int values", d, d, s, s) + } + + // 1b) the denominator of an (uninitialized) value is a new 1 each time + d1 := zero.Denom() + d2 := zero.Denom() + if d1 == d2 { + t.Errorf("1b) got %s (%p) == %s (%p) want different *Int values", d1, d1, d2, d2) + } + + // 1c) the denominator of an initialized zero value is shared with the value + x := new(Rat) + x.Set(x) // initialize x (any operation that sets x explicitly will do) + s = &x.b + d = x.Denom() + if d != s { + t.Errorf("1c) got %s (%p) != %s (%p) want identical *Int values", d, d, s, s) + } + + // 1d) a zero value remains zero independent of denominator + x.Denom().Set(new(Int).Neg(b)) + if x.Cmp(zero) != 0 { + t.Errorf("1d) got %s want %s", x, zero) + } + + // 1e) a zero value may have a denominator != 0 and != 1 + x.Num().Set(a) + qab := new(Rat).SetFrac(a, b) + if x.Cmp(qab) != 0 { + t.Errorf("1e) got %s want %s", x, qab) + } + + // 2a) an integral value becomes a fraction depending on denominator + x.SetFrac64(10, 2) + x.Denom().SetInt64(3) + q53 := NewRat(5, 3) + if x.Cmp(q53) != 0 { + t.Errorf("2a) got %s want %s", x, q53) + } + + // 2b) an integral value becomes a fraction depending on denominator + x = NewRat(10, 2) + x.Denom().SetInt64(3) + if x.Cmp(q53) != 0 { + t.Errorf("2b) got %s want %s", x, q53) + } + + // 3) changing the numerator/denominator of a Rat changes the Rat + x.SetFrac(a, b) + a = x.Num() + b = x.Denom() + a.SetInt64(5) + b.SetInt64(3) + if x.Cmp(q53) != 0 { + t.Errorf("3) got %s want %s", x, q53) + } +} + +func TestFloat32Distribution(t *testing.T) { + // Generate a distribution of (sign, mantissa, exp) values + // broader than the float32 range, and check Rat.Float32() + // always picks the closest float32 approximation. + var add = []int64{ + 0, + 1, + 3, + 5, + 7, + 9, + 11, + } + var winc, einc = uint64(5), 15 // quick test (~60ms on x86-64) + if *long { + winc, einc = uint64(1), 1 // soak test (~1.5s on x86-64) + } + + for _, sign := range "+-" { + for _, a := range add { + for wid := uint64(0); wid < 30; wid += winc { + b := 1<<wid + a + if sign == '-' { + b = -b + } + for exp := -150; exp < 150; exp += einc { + num, den := NewInt(b), NewInt(1) + if exp > 0 { + num.Lsh(num, uint(exp)) + } else { + den.Lsh(den, uint(-exp)) + } + r := new(Rat).SetFrac(num, den) + f, _ := r.Float32() + + if !checkIsBestApprox32(t, f, r) { + // Append context information. + t.Errorf("(input was mantissa %#x, exp %d; f = %g (%b); f ~ %g; r = %v)", + b, exp, f, f, math.Ldexp(float64(b), exp), r) + } + + checkNonLossyRoundtrip32(t, f) + } + } + } + } +} + +func TestFloat64Distribution(t *testing.T) { + // Generate a distribution of (sign, mantissa, exp) values + // broader than the float64 range, and check Rat.Float64() + // always picks the closest float64 approximation. + var add = []int64{ + 0, + 1, + 3, + 5, + 7, + 9, + 11, + } + var winc, einc = uint64(10), 500 // quick test (~12ms on x86-64) + if *long { + winc, einc = uint64(1), 1 // soak test (~75s on x86-64) + } + + for _, sign := range "+-" { + for _, a := range add { + for wid := uint64(0); wid < 60; wid += winc { + b := 1<<wid + a + if sign == '-' { + b = -b + } + for exp := -1100; exp < 1100; exp += einc { + num, den := NewInt(b), NewInt(1) + if exp > 0 { + num.Lsh(num, uint(exp)) + } else { + den.Lsh(den, uint(-exp)) + } + r := new(Rat).SetFrac(num, den) + f, _ := r.Float64() + + if !checkIsBestApprox64(t, f, r) { + // Append context information. + t.Errorf("(input was mantissa %#x, exp %d; f = %g (%b); f ~ %g; r = %v)", + b, exp, f, f, math.Ldexp(float64(b), exp), r) + } + + checkNonLossyRoundtrip64(t, f) + } + } + } + } +} + +// TestSetFloat64NonFinite checks that SetFloat64 of a non-finite value +// returns nil. +func TestSetFloat64NonFinite(t *testing.T) { + for _, f := range []float64{math.NaN(), math.Inf(+1), math.Inf(-1)} { + var r Rat + if r2 := r.SetFloat64(f); r2 != nil { + t.Errorf("SetFloat64(%g) was %v, want nil", f, r2) + } + } +} + +// checkNonLossyRoundtrip32 checks that a float->Rat->float roundtrip is +// non-lossy for finite f. +func checkNonLossyRoundtrip32(t *testing.T, f float32) { + if !isFinite(float64(f)) { + return + } + r := new(Rat).SetFloat64(float64(f)) + if r == nil { + t.Errorf("Rat.SetFloat64(float64(%g) (%b)) == nil", f, f) + return + } + f2, exact := r.Float32() + if f != f2 || !exact { + t.Errorf("Rat.SetFloat64(float64(%g)).Float32() = %g (%b), %v, want %g (%b), %v; delta = %b", + f, f2, f2, exact, f, f, true, f2-f) + } +} + +// checkNonLossyRoundtrip64 checks that a float->Rat->float roundtrip is +// non-lossy for finite f. +func checkNonLossyRoundtrip64(t *testing.T, f float64) { + if !isFinite(f) { + return + } + r := new(Rat).SetFloat64(f) + if r == nil { + t.Errorf("Rat.SetFloat64(%g (%b)) == nil", f, f) + return + } + f2, exact := r.Float64() + if f != f2 || !exact { + t.Errorf("Rat.SetFloat64(%g).Float64() = %g (%b), %v, want %g (%b), %v; delta = %b", + f, f2, f2, exact, f, f, true, f2-f) + } +} + +// delta returns the absolute difference between r and f. +func delta(r *Rat, f float64) *Rat { + d := new(Rat).Sub(r, new(Rat).SetFloat64(f)) + return d.Abs(d) +} + +// checkIsBestApprox32 checks that f is the best possible float32 +// approximation of r. +// Returns true on success. +func checkIsBestApprox32(t *testing.T, f float32, r *Rat) bool { + if math.Abs(float64(f)) >= math.MaxFloat32 { + // Cannot check +Inf, -Inf, nor the float next to them (MaxFloat32). + // But we have tests for these special cases. + return true + } + + // r must be strictly between f0 and f1, the floats bracketing f. + f0 := math.Nextafter32(f, float32(math.Inf(-1))) + f1 := math.Nextafter32(f, float32(math.Inf(+1))) + + // For f to be correct, r must be closer to f than to f0 or f1. + df := delta(r, float64(f)) + df0 := delta(r, float64(f0)) + df1 := delta(r, float64(f1)) + if df.Cmp(df0) > 0 { + t.Errorf("Rat(%v).Float32() = %g (%b), but previous float32 %g (%b) is closer", r, f, f, f0, f0) + return false + } + if df.Cmp(df1) > 0 { + t.Errorf("Rat(%v).Float32() = %g (%b), but next float32 %g (%b) is closer", r, f, f, f1, f1) + return false + } + if df.Cmp(df0) == 0 && !isEven32(f) { + t.Errorf("Rat(%v).Float32() = %g (%b); halfway should have rounded to %g (%b) instead", r, f, f, f0, f0) + return false + } + if df.Cmp(df1) == 0 && !isEven32(f) { + t.Errorf("Rat(%v).Float32() = %g (%b); halfway should have rounded to %g (%b) instead", r, f, f, f1, f1) + return false + } + return true +} + +// checkIsBestApprox64 checks that f is the best possible float64 +// approximation of r. +// Returns true on success. +func checkIsBestApprox64(t *testing.T, f float64, r *Rat) bool { + if math.Abs(f) >= math.MaxFloat64 { + // Cannot check +Inf, -Inf, nor the float next to them (MaxFloat64). + // But we have tests for these special cases. + return true + } + + // r must be strictly between f0 and f1, the floats bracketing f. + f0 := math.Nextafter(f, math.Inf(-1)) + f1 := math.Nextafter(f, math.Inf(+1)) + + // For f to be correct, r must be closer to f than to f0 or f1. + df := delta(r, f) + df0 := delta(r, f0) + df1 := delta(r, f1) + if df.Cmp(df0) > 0 { + t.Errorf("Rat(%v).Float64() = %g (%b), but previous float64 %g (%b) is closer", r, f, f, f0, f0) + return false + } + if df.Cmp(df1) > 0 { + t.Errorf("Rat(%v).Float64() = %g (%b), but next float64 %g (%b) is closer", r, f, f, f1, f1) + return false + } + if df.Cmp(df0) == 0 && !isEven64(f) { + t.Errorf("Rat(%v).Float64() = %g (%b); halfway should have rounded to %g (%b) instead", r, f, f, f0, f0) + return false + } + if df.Cmp(df1) == 0 && !isEven64(f) { + t.Errorf("Rat(%v).Float64() = %g (%b); halfway should have rounded to %g (%b) instead", r, f, f, f1, f1) + return false + } + return true +} + +func isEven32(f float32) bool { return math.Float32bits(f)&1 == 0 } +func isEven64(f float64) bool { return math.Float64bits(f)&1 == 0 } + +func TestIsFinite(t *testing.T) { + finites := []float64{ + 1.0 / 3, + 4891559871276714924261e+222, + math.MaxFloat64, + math.SmallestNonzeroFloat64, + -math.MaxFloat64, + -math.SmallestNonzeroFloat64, + } + for _, f := range finites { + if !isFinite(f) { + t.Errorf("!IsFinite(%g (%b))", f, f) + } + } + nonfinites := []float64{ + math.NaN(), + math.Inf(-1), + math.Inf(+1), + } + for _, f := range nonfinites { + if isFinite(f) { + t.Errorf("IsFinite(%g, (%b))", f, f) + } + } +} + +func TestRatSetInt64(t *testing.T) { + var testCases = []int64{ + 0, + 1, + -1, + 12345, + -98765, + math.MaxInt64, + math.MinInt64, + } + var r = new(Rat) + for i, want := range testCases { + r.SetInt64(want) + if !r.IsInt() { + t.Errorf("#%d: Rat.SetInt64(%d) is not an integer", i, want) + } + num := r.Num() + if !num.IsInt64() { + t.Errorf("#%d: Rat.SetInt64(%d) numerator is not an int64", i, want) + } + got := num.Int64() + if got != want { + t.Errorf("#%d: Rat.SetInt64(%d) = %d, but expected %d", i, want, got, want) + } + } +} + +func TestRatSetUint64(t *testing.T) { + var testCases = []uint64{ + 0, + 1, + 12345, + ^uint64(0), + } + var r = new(Rat) + for i, want := range testCases { + r.SetUint64(want) + if !r.IsInt() { + t.Errorf("#%d: Rat.SetUint64(%d) is not an integer", i, want) + } + num := r.Num() + if !num.IsUint64() { + t.Errorf("#%d: Rat.SetUint64(%d) numerator is not a uint64", i, want) + } + got := num.Uint64() + if got != want { + t.Errorf("#%d: Rat.SetUint64(%d) = %d, but expected %d", i, want, got, want) + } + } +} + +func BenchmarkRatCmp(b *testing.B) { + x, y := NewRat(4, 1), NewRat(7, 2) + for i := 0; i < b.N; i++ { + x.Cmp(y) + } +} + +// TestIssue34919 verifies that a Rat's denominator is not modified +// when simply accessing the Rat value. +func TestIssue34919(t *testing.T) { + for _, acc := range []struct { + name string + f func(*Rat) + }{ + {"Float32", func(x *Rat) { x.Float32() }}, + {"Float64", func(x *Rat) { x.Float64() }}, + {"Inv", func(x *Rat) { new(Rat).Inv(x) }}, + {"Sign", func(x *Rat) { x.Sign() }}, + {"IsInt", func(x *Rat) { x.IsInt() }}, + {"Num", func(x *Rat) { x.Num() }}, + // {"Denom", func(x *Rat) { x.Denom() }}, TODO(gri) should we change the API? See issue #33792. + } { + // A denominator of length 0 is interpreted as 1. Make sure that + // "materialization" of the denominator doesn't lead to setting + // the underlying array element 0 to 1. + r := &Rat{Int{abs: nat{991}}, Int{abs: make(nat, 0, 1)}} + acc.f(r) + if d := r.b.abs[:1][0]; d != 0 { + t.Errorf("%s modified denominator: got %d, want 0", acc.name, d) + } + } +} + +func TestDenomRace(t *testing.T) { + x := NewRat(1, 2) + const N = 3 + c := make(chan bool, N) + for i := 0; i < N; i++ { + go func() { + // Denom (also used by Float.SetRat) used to mutate x unnecessarily, + // provoking race reports when run in the race detector. + x.Denom() + new(Float).SetRat(x) + c <- true + }() + } + for i := 0; i < N; i++ { + <-c + } +} diff --git a/src/math/big/ratconv.go b/src/math/big/ratconv.go new file mode 100644 index 0000000..90053a9 --- /dev/null +++ b/src/math/big/ratconv.go @@ -0,0 +1,380 @@ +// Copyright 2015 The Go Authors. All rights reserved. +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. + +// This file implements rat-to-string conversion functions. + +package big + +import ( + "errors" + "fmt" + "io" + "strconv" + "strings" +) + +func ratTok(ch rune) bool { + return strings.ContainsRune("+-/0123456789.eE", ch) +} + +var ratZero Rat +var _ fmt.Scanner = &ratZero // *Rat must implement fmt.Scanner + +// Scan is a support routine for fmt.Scanner. It accepts the formats +// 'e', 'E', 'f', 'F', 'g', 'G', and 'v'. All formats are equivalent. +func (z *Rat) Scan(s fmt.ScanState, ch rune) error { + tok, err := s.Token(true, ratTok) + if err != nil { + return err + } + if !strings.ContainsRune("efgEFGv", ch) { + return errors.New("Rat.Scan: invalid verb") + } + if _, ok := z.SetString(string(tok)); !ok { + return errors.New("Rat.Scan: invalid syntax") + } + return nil +} + +// SetString sets z to the value of s and returns z and a boolean indicating +// success. s can be given as a (possibly signed) fraction "a/b", or as a +// floating-point number optionally followed by an exponent. +// If a fraction is provided, both the dividend and the divisor may be a +// decimal integer or independently use a prefix of ``0b'', ``0'' or ``0o'', +// or ``0x'' (or their upper-case variants) to denote a binary, octal, or +// hexadecimal integer, respectively. The divisor may not be signed. +// If a floating-point number is provided, it may be in decimal form or +// use any of the same prefixes as above but for ``0'' to denote a non-decimal +// mantissa. A leading ``0'' is considered a decimal leading 0; it does not +// indicate octal representation in this case. +// An optional base-10 ``e'' or base-2 ``p'' (or their upper-case variants) +// exponent may be provided as well, except for hexadecimal floats which +// only accept an (optional) ``p'' exponent (because an ``e'' or ``E'' cannot +// be distinguished from a mantissa digit). If the exponent's absolute value +// is too large, the operation may fail. +// The entire string, not just a prefix, must be valid for success. If the +// operation failed, the value of z is undefined but the returned value is nil. +func (z *Rat) SetString(s string) (*Rat, bool) { + if len(s) == 0 { + return nil, false + } + // len(s) > 0 + + // parse fraction a/b, if any + if sep := strings.Index(s, "/"); sep >= 0 { + if _, ok := z.a.SetString(s[:sep], 0); !ok { + return nil, false + } + r := strings.NewReader(s[sep+1:]) + var err error + if z.b.abs, _, _, err = z.b.abs.scan(r, 0, false); err != nil { + return nil, false + } + // entire string must have been consumed + if _, err = r.ReadByte(); err != io.EOF { + return nil, false + } + if len(z.b.abs) == 0 { + return nil, false + } + return z.norm(), true + } + + // parse floating-point number + r := strings.NewReader(s) + + // sign + neg, err := scanSign(r) + if err != nil { + return nil, false + } + + // mantissa + var base int + var fcount int // fractional digit count; valid if <= 0 + z.a.abs, base, fcount, err = z.a.abs.scan(r, 0, true) + if err != nil { + return nil, false + } + + // exponent + var exp int64 + var ebase int + exp, ebase, err = scanExponent(r, true, true) + if err != nil { + return nil, false + } + + // there should be no unread characters left + if _, err = r.ReadByte(); err != io.EOF { + return nil, false + } + + // special-case 0 (see also issue #16176) + if len(z.a.abs) == 0 { + return z, true + } + // len(z.a.abs) > 0 + + // The mantissa may have a radix point (fcount <= 0) and there + // may be a nonzero exponent exp. The radix point amounts to a + // division by base**(-fcount), which equals a multiplication by + // base**fcount. An exponent means multiplication by ebase**exp. + // Multiplications are commutative, so we can apply them in any + // order. We only have powers of 2 and 10, and we split powers + // of 10 into the product of the same powers of 2 and 5. This + // may reduce the size of shift/multiplication factors or + // divisors required to create the final fraction, depending + // on the actual floating-point value. + + // determine binary or decimal exponent contribution of radix point + var exp2, exp5 int64 + if fcount < 0 { + // The mantissa has a radix point ddd.dddd; and + // -fcount is the number of digits to the right + // of '.'. Adjust relevant exponent accordingly. + d := int64(fcount) + switch base { + case 10: + exp5 = d + fallthrough // 10**e == 5**e * 2**e + case 2: + exp2 = d + case 8: + exp2 = d * 3 // octal digits are 3 bits each + case 16: + exp2 = d * 4 // hexadecimal digits are 4 bits each + default: + panic("unexpected mantissa base") + } + // fcount consumed - not needed anymore + } + + // take actual exponent into account + switch ebase { + case 10: + exp5 += exp + fallthrough // see fallthrough above + case 2: + exp2 += exp + default: + panic("unexpected exponent base") + } + // exp consumed - not needed anymore + + // apply exp5 contributions + // (start with exp5 so the numbers to multiply are smaller) + if exp5 != 0 { + n := exp5 + if n < 0 { + n = -n + if n < 0 { + // This can occur if -n overflows. -(-1 << 63) would become + // -1 << 63, which is still negative. + return nil, false + } + } + if n > 1e6 { + return nil, false // avoid excessively large exponents + } + pow5 := z.b.abs.expNN(natFive, nat(nil).setWord(Word(n)), nil) // use underlying array of z.b.abs + if exp5 > 0 { + z.a.abs = z.a.abs.mul(z.a.abs, pow5) + z.b.abs = z.b.abs.setWord(1) + } else { + z.b.abs = pow5 + } + } else { + z.b.abs = z.b.abs.setWord(1) + } + + // apply exp2 contributions + if exp2 < -1e7 || exp2 > 1e7 { + return nil, false // avoid excessively large exponents + } + if exp2 > 0 { + z.a.abs = z.a.abs.shl(z.a.abs, uint(exp2)) + } else if exp2 < 0 { + z.b.abs = z.b.abs.shl(z.b.abs, uint(-exp2)) + } + + z.a.neg = neg && len(z.a.abs) > 0 // 0 has no sign + + return z.norm(), true +} + +// scanExponent scans the longest possible prefix of r representing a base 10 +// (``e'', ``E'') or a base 2 (``p'', ``P'') exponent, if any. It returns the +// exponent, the exponent base (10 or 2), or a read or syntax error, if any. +// +// If sepOk is set, an underscore character ``_'' may appear between successive +// exponent digits; such underscores do not change the value of the exponent. +// Incorrect placement of underscores is reported as an error if there are no +// other errors. If sepOk is not set, underscores are not recognized and thus +// terminate scanning like any other character that is not a valid digit. +// +// exponent = ( "e" | "E" | "p" | "P" ) [ sign ] digits . +// sign = "+" | "-" . +// digits = digit { [ '_' ] digit } . +// digit = "0" ... "9" . +// +// A base 2 exponent is only permitted if base2ok is set. +func scanExponent(r io.ByteScanner, base2ok, sepOk bool) (exp int64, base int, err error) { + // one char look-ahead + ch, err := r.ReadByte() + if err != nil { + if err == io.EOF { + err = nil + } + return 0, 10, err + } + + // exponent char + switch ch { + case 'e', 'E': + base = 10 + case 'p', 'P': + if base2ok { + base = 2 + break // ok + } + fallthrough // binary exponent not permitted + default: + r.UnreadByte() // ch does not belong to exponent anymore + return 0, 10, nil + } + + // sign + var digits []byte + ch, err = r.ReadByte() + if err == nil && (ch == '+' || ch == '-') { + if ch == '-' { + digits = append(digits, '-') + } + ch, err = r.ReadByte() + } + + // prev encodes the previously seen char: it is one + // of '_', '0' (a digit), or '.' (anything else). A + // valid separator '_' may only occur after a digit. + prev := '.' + invalSep := false + + // exponent value + hasDigits := false + for err == nil { + if '0' <= ch && ch <= '9' { + digits = append(digits, ch) + prev = '0' + hasDigits = true + } else if ch == '_' && sepOk { + if prev != '0' { + invalSep = true + } + prev = '_' + } else { + r.UnreadByte() // ch does not belong to number anymore + break + } + ch, err = r.ReadByte() + } + + if err == io.EOF { + err = nil + } + if err == nil && !hasDigits { + err = errNoDigits + } + if err == nil { + exp, err = strconv.ParseInt(string(digits), 10, 64) + } + // other errors take precedence over invalid separators + if err == nil && (invalSep || prev == '_') { + err = errInvalSep + } + + return +} + +// String returns a string representation of x in the form "a/b" (even if b == 1). +func (x *Rat) String() string { + return string(x.marshal()) +} + +// marshal implements String returning a slice of bytes +func (x *Rat) marshal() []byte { + var buf []byte + buf = x.a.Append(buf, 10) + buf = append(buf, '/') + if len(x.b.abs) != 0 { + buf = x.b.Append(buf, 10) + } else { + buf = append(buf, '1') + } + return buf +} + +// RatString returns a string representation of x in the form "a/b" if b != 1, +// and in the form "a" if b == 1. +func (x *Rat) RatString() string { + if x.IsInt() { + return x.a.String() + } + return x.String() +} + +// FloatString returns a string representation of x in decimal form with prec +// digits of precision after the radix point. The last digit is rounded to +// nearest, with halves rounded away from zero. +func (x *Rat) FloatString(prec int) string { + var buf []byte + + if x.IsInt() { + buf = x.a.Append(buf, 10) + if prec > 0 { + buf = append(buf, '.') + for i := prec; i > 0; i-- { + buf = append(buf, '0') + } + } + return string(buf) + } + // x.b.abs != 0 + + q, r := nat(nil).div(nat(nil), x.a.abs, x.b.abs) + + p := natOne + if prec > 0 { + p = nat(nil).expNN(natTen, nat(nil).setUint64(uint64(prec)), nil) + } + + r = r.mul(r, p) + r, r2 := r.div(nat(nil), r, x.b.abs) + + // see if we need to round up + r2 = r2.add(r2, r2) + if x.b.abs.cmp(r2) <= 0 { + r = r.add(r, natOne) + if r.cmp(p) >= 0 { + q = nat(nil).add(q, natOne) + r = nat(nil).sub(r, p) + } + } + + if x.a.neg { + buf = append(buf, '-') + } + buf = append(buf, q.utoa(10)...) // itoa ignores sign if q == 0 + + if prec > 0 { + buf = append(buf, '.') + rs := r.utoa(10) + for i := prec - len(rs); i > 0; i-- { + buf = append(buf, '0') + } + buf = append(buf, rs...) + } + + return string(buf) +} diff --git a/src/math/big/ratconv_test.go b/src/math/big/ratconv_test.go new file mode 100644 index 0000000..e55e655 --- /dev/null +++ b/src/math/big/ratconv_test.go @@ -0,0 +1,617 @@ +// Copyright 2015 The Go Authors. All rights reserved. +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. + +package big + +import ( + "bytes" + "fmt" + "io" + "math" + "strconv" + "strings" + "testing" +) + +var exponentTests = []struct { + s string // string to be scanned + base2ok bool // true if 'p'/'P' exponents are accepted + sepOk bool // true if '_' separators are accepted + x int64 // expected exponent + b int // expected exponent base + err error // expected error + next rune // next character (or 0, if at EOF) +}{ + // valid, without separators + {"", false, false, 0, 10, nil, 0}, + {"1", false, false, 0, 10, nil, '1'}, + {"e0", false, false, 0, 10, nil, 0}, + {"E1", false, false, 1, 10, nil, 0}, + {"e+10", false, false, 10, 10, nil, 0}, + {"e-10", false, false, -10, 10, nil, 0}, + {"e123456789a", false, false, 123456789, 10, nil, 'a'}, + {"p", false, false, 0, 10, nil, 'p'}, + {"P+100", false, false, 0, 10, nil, 'P'}, + {"p0", true, false, 0, 2, nil, 0}, + {"P-123", true, false, -123, 2, nil, 0}, + {"p+0a", true, false, 0, 2, nil, 'a'}, + {"p+123__", true, false, 123, 2, nil, '_'}, // '_' is not part of the number anymore + + // valid, with separators + {"e+1_0", false, true, 10, 10, nil, 0}, + {"e-1_0", false, true, -10, 10, nil, 0}, + {"e123_456_789a", false, true, 123456789, 10, nil, 'a'}, + {"P+1_00", false, true, 0, 10, nil, 'P'}, + {"p-1_2_3", true, true, -123, 2, nil, 0}, + + // invalid: no digits + {"e", false, false, 0, 10, errNoDigits, 0}, + {"ef", false, false, 0, 10, errNoDigits, 'f'}, + {"e+", false, false, 0, 10, errNoDigits, 0}, + {"E-x", false, false, 0, 10, errNoDigits, 'x'}, + {"p", true, false, 0, 2, errNoDigits, 0}, + {"P-", true, false, 0, 2, errNoDigits, 0}, + {"p+e", true, false, 0, 2, errNoDigits, 'e'}, + {"e+_x", false, true, 0, 10, errNoDigits, 'x'}, + + // invalid: incorrect use of separator + {"e0_", false, true, 0, 10, errInvalSep, 0}, + {"e_0", false, true, 0, 10, errInvalSep, 0}, + {"e-1_2__3", false, true, -123, 10, errInvalSep, 0}, +} + +func TestScanExponent(t *testing.T) { + for _, a := range exponentTests { + r := strings.NewReader(a.s) + x, b, err := scanExponent(r, a.base2ok, a.sepOk) + if err != a.err { + t.Errorf("scanExponent%+v\n\tgot error = %v; want %v", a, err, a.err) + } + if x != a.x { + t.Errorf("scanExponent%+v\n\tgot z = %v; want %v", a, x, a.x) + } + if b != a.b { + t.Errorf("scanExponent%+v\n\tgot b = %d; want %d", a, b, a.b) + } + next, _, err := r.ReadRune() + if err == io.EOF { + next = 0 + err = nil + } + if err == nil && next != a.next { + t.Errorf("scanExponent%+v\n\tgot next = %q; want %q", a, next, a.next) + } + } +} + +type StringTest struct { + in, out string + ok bool +} + +var setStringTests = []StringTest{ + // invalid + {in: "1e"}, + {in: "1.e"}, + {in: "1e+14e-5"}, + {in: "1e4.5"}, + {in: "r"}, + {in: "a/b"}, + {in: "a.b"}, + {in: "1/0"}, + {in: "4/3/2"}, // issue 17001 + {in: "4/3/"}, + {in: "4/3."}, + {in: "4/"}, + {in: "13e-9223372036854775808"}, // CVE-2022-23772 + + // valid + {"0", "0", true}, + {"-0", "0", true}, + {"1", "1", true}, + {"-1", "-1", true}, + {"1.", "1", true}, + {"1e0", "1", true}, + {"1.e1", "10", true}, + {"-0.1", "-1/10", true}, + {"-.1", "-1/10", true}, + {"2/4", "1/2", true}, + {".25", "1/4", true}, + {"-1/5", "-1/5", true}, + {"8129567.7690E14", "812956776900000000000", true}, + {"78189e+4", "781890000", true}, + {"553019.8935e+8", "55301989350000", true}, + {"98765432109876543210987654321e-10", "98765432109876543210987654321/10000000000", true}, + {"9877861857500000E-7", "3951144743/4", true}, + {"2169378.417e-3", "2169378417/1000000", true}, + {"884243222337379604041632732738665534", "884243222337379604041632732738665534", true}, + {"53/70893980658822810696", "53/70893980658822810696", true}, + {"106/141787961317645621392", "53/70893980658822810696", true}, + {"204211327800791583.81095", "4084226556015831676219/20000", true}, + {"0e9999999999", "0", true}, // issue #16176 +} + +// These are not supported by fmt.Fscanf. +var setStringTests2 = []StringTest{ + // invalid + {in: "4/3x"}, + {in: "0/-1"}, + {in: "-1/-1"}, + + // invalid with separators + // (smoke tests only - a comprehensive set of tests is in natconv_test.go) + {in: "10_/1"}, + {in: "_10/1"}, + {in: "1/1__0"}, + + // valid + {"0b1000/3", "8/3", true}, + {"0B1000/0x8", "1", true}, + {"-010/1", "-8", true}, // 0-prefix indicates octal in this case + {"-010.0", "-10", true}, + {"-0o10/1", "-8", true}, + {"0x10/1", "16", true}, + {"0x10/0x20", "1/2", true}, + + {"0010", "10", true}, // 0-prefix is ignored in this case (not a fraction) + {"0x10.0", "16", true}, + {"0x1.8", "3/2", true}, + {"0X1.8p4", "24", true}, + {"0x1.1E2", "2289/2048", true}, // E is part of hex mantissa, not exponent + {"0b1.1E2", "150", true}, + {"0B1.1P3", "12", true}, + {"0o10e-2", "2/25", true}, + {"0O10p-3", "1", true}, + + // valid with separators + // (smoke tests only - a comprehensive set of tests is in natconv_test.go) + {"0b_1000/3", "8/3", true}, + {"0B_10_00/0x8", "1", true}, + {"0xdead/0B1101_1110_1010_1101", "1", true}, + {"0B1101_1110_1010_1101/0XD_E_A_D", "1", true}, + {"1_000.0", "1000", true}, + + {"0x_10.0", "16", true}, + {"0x1_0.0", "16", true}, + {"0x1.8_0", "3/2", true}, + {"0X1.8p0_4", "24", true}, + {"0b1.1_0E2", "150", true}, + {"0o1_0e-2", "2/25", true}, + {"0O_10p-3", "1", true}, +} + +func TestRatSetString(t *testing.T) { + var tests []StringTest + tests = append(tests, setStringTests...) + tests = append(tests, setStringTests2...) + + for i, test := range tests { + x, ok := new(Rat).SetString(test.in) + + if ok { + if !test.ok { + t.Errorf("#%d SetString(%q) expected failure", i, test.in) + } else if x.RatString() != test.out { + t.Errorf("#%d SetString(%q) got %s want %s", i, test.in, x.RatString(), test.out) + } + } else { + if test.ok { + t.Errorf("#%d SetString(%q) expected success", i, test.in) + } else if x != nil { + t.Errorf("#%d SetString(%q) got %p want nil", i, test.in, x) + } + } + } +} + +func TestRatScan(t *testing.T) { + var buf bytes.Buffer + for i, test := range setStringTests { + x := new(Rat) + buf.Reset() + buf.WriteString(test.in) + + _, err := fmt.Fscanf(&buf, "%v", x) + if err == nil != test.ok { + if test.ok { + t.Errorf("#%d (%s) error: %s", i, test.in, err) + } else { + t.Errorf("#%d (%s) expected error", i, test.in) + } + continue + } + if err == nil && x.RatString() != test.out { + t.Errorf("#%d got %s want %s", i, x.RatString(), test.out) + } + } +} + +var floatStringTests = []struct { + in string + prec int + out string +}{ + {"0", 0, "0"}, + {"0", 4, "0.0000"}, + {"1", 0, "1"}, + {"1", 2, "1.00"}, + {"-1", 0, "-1"}, + {"0.05", 1, "0.1"}, + {"-0.05", 1, "-0.1"}, + {".25", 2, "0.25"}, + {".25", 1, "0.3"}, + {".25", 3, "0.250"}, + {"-1/3", 3, "-0.333"}, + {"-2/3", 4, "-0.6667"}, + {"0.96", 1, "1.0"}, + {"0.999", 2, "1.00"}, + {"0.9", 0, "1"}, + {".25", -1, "0"}, + {".55", -1, "1"}, +} + +func TestFloatString(t *testing.T) { + for i, test := range floatStringTests { + x, _ := new(Rat).SetString(test.in) + + if x.FloatString(test.prec) != test.out { + t.Errorf("#%d got %s want %s", i, x.FloatString(test.prec), test.out) + } + } +} + +// Test inputs to Rat.SetString. The prefix "long:" causes the test +// to be skipped except in -long mode. (The threshold is about 500us.) +var float64inputs = []string{ + // Constants plundered from strconv/testfp.txt. + + // Table 1: Stress Inputs for Conversion to 53-bit Binary, < 1/2 ULP + "5e+125", + "69e+267", + "999e-026", + "7861e-034", + "75569e-254", + "928609e-261", + "9210917e+080", + "84863171e+114", + "653777767e+273", + "5232604057e-298", + "27235667517e-109", + "653532977297e-123", + "3142213164987e-294", + "46202199371337e-072", + "231010996856685e-073", + "9324754620109615e+212", + "78459735791271921e+049", + "272104041512242479e+200", + "6802601037806061975e+198", + "20505426358836677347e-221", + "836168422905420598437e-234", + "4891559871276714924261e+222", + + // Table 2: Stress Inputs for Conversion to 53-bit Binary, > 1/2 ULP + "9e-265", + "85e-037", + "623e+100", + "3571e+263", + "81661e+153", + "920657e-023", + "4603285e-024", + "87575437e-309", + "245540327e+122", + "6138508175e+120", + "83356057653e+193", + "619534293513e+124", + "2335141086879e+218", + "36167929443327e-159", + "609610927149051e-255", + "3743626360493413e-165", + "94080055902682397e-242", + "899810892172646163e+283", + "7120190517612959703e+120", + "25188282901709339043e-252", + "308984926168550152811e-052", + "6372891218502368041059e+064", + + // Table 14: Stress Inputs for Conversion to 24-bit Binary, <1/2 ULP + "5e-20", + "67e+14", + "985e+15", + "7693e-42", + "55895e-16", + "996622e-44", + "7038531e-32", + "60419369e-46", + "702990899e-20", + "6930161142e-48", + "25933168707e+13", + "596428896559e+20", + + // Table 15: Stress Inputs for Conversion to 24-bit Binary, >1/2 ULP + "3e-23", + "57e+18", + "789e-35", + "2539e-18", + "76173e+28", + "887745e-11", + "5382571e-37", + "82381273e-35", + "750486563e-38", + "3752432815e-39", + "75224575729e-45", + "459926601011e+15", + + // Constants plundered from strconv/atof_test.go. + + "0", + "1", + "+1", + "1e23", + "1E23", + "100000000000000000000000", + "1e-100", + "123456700", + "99999999999999974834176", + "100000000000000000000001", + "100000000000000008388608", + "100000000000000016777215", + "100000000000000016777216", + "-1", + "-0.1", + "-0", // NB: exception made for this input + "1e-20", + "625e-3", + + // largest float64 + "1.7976931348623157e308", + "-1.7976931348623157e308", + // next float64 - too large + "1.7976931348623159e308", + "-1.7976931348623159e308", + // the border is ...158079 + // borderline - okay + "1.7976931348623158e308", + "-1.7976931348623158e308", + // borderline - too large + "1.797693134862315808e308", + "-1.797693134862315808e308", + + // a little too large + "1e308", + "2e308", + "1e309", + + // way too large + "1e310", + "-1e310", + "1e400", + "-1e400", + "long:1e400000", + "long:-1e400000", + + // denormalized + "1e-305", + "1e-306", + "1e-307", + "1e-308", + "1e-309", + "1e-310", + "1e-322", + // smallest denormal + "5e-324", + "4e-324", + "3e-324", + // too small + "2e-324", + // way too small + "1e-350", + "long:1e-400000", + // way too small, negative + "-1e-350", + "long:-1e-400000", + + // try to overflow exponent + // [Disabled: too slow and memory-hungry with rationals.] + // "1e-4294967296", + // "1e+4294967296", + // "1e-18446744073709551616", + // "1e+18446744073709551616", + + // https://www.exploringbinary.com/java-hangs-when-converting-2-2250738585072012e-308/ + "2.2250738585072012e-308", + // https://www.exploringbinary.com/php-hangs-on-numeric-value-2-2250738585072011e-308/ + "2.2250738585072011e-308", + + // A very large number (initially wrongly parsed by the fast algorithm). + "4.630813248087435e+307", + + // A different kind of very large number. + "22.222222222222222", + "long:2." + strings.Repeat("2", 4000) + "e+1", + + // Exactly halfway between 1 and math.Nextafter(1, 2). + // Round to even (down). + "1.00000000000000011102230246251565404236316680908203125", + // Slightly lower; still round down. + "1.00000000000000011102230246251565404236316680908203124", + // Slightly higher; round up. + "1.00000000000000011102230246251565404236316680908203126", + // Slightly higher, but you have to read all the way to the end. + "long:1.00000000000000011102230246251565404236316680908203125" + strings.Repeat("0", 10000) + "1", + + // Smallest denormal, 2^(-1022-52) + "4.940656458412465441765687928682213723651e-324", + // Half of smallest denormal, 2^(-1022-53) + "2.470328229206232720882843964341106861825e-324", + // A little more than the exact half of smallest denormal + // 2^-1075 + 2^-1100. (Rounds to 1p-1074.) + "2.470328302827751011111470718709768633275e-324", + // The exact halfway between smallest normal and largest denormal: + // 2^-1022 - 2^-1075. (Rounds to 2^-1022.) + "2.225073858507201136057409796709131975935e-308", + + "1152921504606846975", // 1<<60 - 1 + "-1152921504606846975", // -(1<<60 - 1) + "1152921504606846977", // 1<<60 + 1 + "-1152921504606846977", // -(1<<60 + 1) + + "1/3", +} + +// isFinite reports whether f represents a finite rational value. +// It is equivalent to !math.IsNan(f) && !math.IsInf(f, 0). +func isFinite(f float64) bool { + return math.Abs(f) <= math.MaxFloat64 +} + +func TestFloat32SpecialCases(t *testing.T) { + for _, input := range float64inputs { + if strings.HasPrefix(input, "long:") { + if !*long { + continue + } + input = input[len("long:"):] + } + + r, ok := new(Rat).SetString(input) + if !ok { + t.Errorf("Rat.SetString(%q) failed", input) + continue + } + f, exact := r.Float32() + + // 1. Check string -> Rat -> float32 conversions are + // consistent with strconv.ParseFloat. + // Skip this check if the input uses "a/b" rational syntax. + if !strings.Contains(input, "/") { + e64, _ := strconv.ParseFloat(input, 32) + e := float32(e64) + + // Careful: negative Rats too small for + // float64 become -0, but Rat obviously cannot + // preserve the sign from SetString("-0"). + switch { + case math.Float32bits(e) == math.Float32bits(f): + // Ok: bitwise equal. + case f == 0 && r.Num().BitLen() == 0: + // Ok: Rat(0) is equivalent to both +/- float64(0). + default: + t.Errorf("strconv.ParseFloat(%q) = %g (%b), want %g (%b); delta = %g", input, e, e, f, f, f-e) + } + } + + if !isFinite(float64(f)) { + continue + } + + // 2. Check f is best approximation to r. + if !checkIsBestApprox32(t, f, r) { + // Append context information. + t.Errorf("(input was %q)", input) + } + + // 3. Check f->R->f roundtrip is non-lossy. + checkNonLossyRoundtrip32(t, f) + + // 4. Check exactness using slow algorithm. + if wasExact := new(Rat).SetFloat64(float64(f)).Cmp(r) == 0; wasExact != exact { + t.Errorf("Rat.SetString(%q).Float32().exact = %t, want %t", input, exact, wasExact) + } + } +} + +func TestFloat64SpecialCases(t *testing.T) { + for _, input := range float64inputs { + if strings.HasPrefix(input, "long:") { + if !*long { + continue + } + input = input[len("long:"):] + } + + r, ok := new(Rat).SetString(input) + if !ok { + t.Errorf("Rat.SetString(%q) failed", input) + continue + } + f, exact := r.Float64() + + // 1. Check string -> Rat -> float64 conversions are + // consistent with strconv.ParseFloat. + // Skip this check if the input uses "a/b" rational syntax. + if !strings.Contains(input, "/") { + e, _ := strconv.ParseFloat(input, 64) + + // Careful: negative Rats too small for + // float64 become -0, but Rat obviously cannot + // preserve the sign from SetString("-0"). + switch { + case math.Float64bits(e) == math.Float64bits(f): + // Ok: bitwise equal. + case f == 0 && r.Num().BitLen() == 0: + // Ok: Rat(0) is equivalent to both +/- float64(0). + default: + t.Errorf("strconv.ParseFloat(%q) = %g (%b), want %g (%b); delta = %g", input, e, e, f, f, f-e) + } + } + + if !isFinite(f) { + continue + } + + // 2. Check f is best approximation to r. + if !checkIsBestApprox64(t, f, r) { + // Append context information. + t.Errorf("(input was %q)", input) + } + + // 3. Check f->R->f roundtrip is non-lossy. + checkNonLossyRoundtrip64(t, f) + + // 4. Check exactness using slow algorithm. + if wasExact := new(Rat).SetFloat64(f).Cmp(r) == 0; wasExact != exact { + t.Errorf("Rat.SetString(%q).Float64().exact = %t, want %t", input, exact, wasExact) + } + } +} + +func TestIssue31184(t *testing.T) { + var x Rat + for _, want := range []string{ + "-213.090", + "8.192", + "16.000", + } { + x.SetString(want) + got := x.FloatString(3) + if got != want { + t.Errorf("got %s, want %s", got, want) + } + } +} + +func TestIssue45910(t *testing.T) { + var x Rat + for _, test := range []struct { + input string + want bool + }{ + {"1e-1000001", false}, + {"1e-1000000", true}, + {"1e+1000000", true}, + {"1e+1000001", false}, + + {"0p1000000000000", true}, + {"1p-10000001", false}, + {"1p-10000000", true}, + {"1p+10000000", true}, + {"1p+10000001", false}, + {"1.770p02041010010011001001", false}, // test case from issue + } { + _, got := x.SetString(test.input) + if got != test.want { + t.Errorf("SetString(%s) got ok = %v; want %v", test.input, got, test.want) + } + } +} diff --git a/src/math/big/ratmarsh.go b/src/math/big/ratmarsh.go new file mode 100644 index 0000000..56102e8 --- /dev/null +++ b/src/math/big/ratmarsh.go @@ -0,0 +1,81 @@ +// Copyright 2015 The Go Authors. All rights reserved. +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. + +// This file implements encoding/decoding of Rats. + +package big + +import ( + "encoding/binary" + "errors" + "fmt" +) + +// Gob codec version. Permits backward-compatible changes to the encoding. +const ratGobVersion byte = 1 + +// GobEncode implements the gob.GobEncoder interface. +func (x *Rat) GobEncode() ([]byte, error) { + if x == nil { + return nil, nil + } + buf := make([]byte, 1+4+(len(x.a.abs)+len(x.b.abs))*_S) // extra bytes for version and sign bit (1), and numerator length (4) + i := x.b.abs.bytes(buf) + j := x.a.abs.bytes(buf[:i]) + n := i - j + if int(uint32(n)) != n { + // this should never happen + return nil, errors.New("Rat.GobEncode: numerator too large") + } + binary.BigEndian.PutUint32(buf[j-4:j], uint32(n)) + j -= 1 + 4 + b := ratGobVersion << 1 // make space for sign bit + if x.a.neg { + b |= 1 + } + buf[j] = b + return buf[j:], nil +} + +// GobDecode implements the gob.GobDecoder interface. +func (z *Rat) GobDecode(buf []byte) error { + if len(buf) == 0 { + // Other side sent a nil or default value. + *z = Rat{} + return nil + } + if len(buf) < 5 { + return errors.New("Rat.GobDecode: buffer too small") + } + b := buf[0] + if b>>1 != ratGobVersion { + return fmt.Errorf("Rat.GobDecode: encoding version %d not supported", b>>1) + } + const j = 1 + 4 + i := j + binary.BigEndian.Uint32(buf[j-4:j]) + if len(buf) < int(i) { + return errors.New("Rat.GobDecode: buffer too small") + } + z.a.neg = b&1 != 0 + z.a.abs = z.a.abs.setBytes(buf[j:i]) + z.b.abs = z.b.abs.setBytes(buf[i:]) + return nil +} + +// MarshalText implements the encoding.TextMarshaler interface. +func (x *Rat) MarshalText() (text []byte, err error) { + if x.IsInt() { + return x.a.MarshalText() + } + return x.marshal(), nil +} + +// UnmarshalText implements the encoding.TextUnmarshaler interface. +func (z *Rat) UnmarshalText(text []byte) error { + // TODO(gri): get rid of the []byte/string conversion + if _, ok := z.SetString(string(text)); !ok { + return fmt.Errorf("math/big: cannot unmarshal %q into a *big.Rat", text) + } + return nil +} diff --git a/src/math/big/ratmarsh_test.go b/src/math/big/ratmarsh_test.go new file mode 100644 index 0000000..55a9878 --- /dev/null +++ b/src/math/big/ratmarsh_test.go @@ -0,0 +1,137 @@ +// Copyright 2015 The Go Authors. All rights reserved. +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. + +package big + +import ( + "bytes" + "encoding/gob" + "encoding/json" + "encoding/xml" + "testing" +) + +func TestRatGobEncoding(t *testing.T) { + var medium bytes.Buffer + enc := gob.NewEncoder(&medium) + dec := gob.NewDecoder(&medium) + for _, test := range encodingTests { + medium.Reset() // empty buffer for each test case (in case of failures) + var tx Rat + tx.SetString(test + ".14159265") + if err := enc.Encode(&tx); err != nil { + t.Errorf("encoding of %s failed: %s", &tx, err) + continue + } + var rx Rat + if err := dec.Decode(&rx); err != nil { + t.Errorf("decoding of %s failed: %s", &tx, err) + continue + } + if rx.Cmp(&tx) != 0 { + t.Errorf("transmission of %s failed: got %s want %s", &tx, &rx, &tx) + } + } +} + +// Sending a nil Rat pointer (inside a slice) on a round trip through gob should yield a zero. +// TODO: top-level nils. +func TestGobEncodingNilRatInSlice(t *testing.T) { + buf := new(bytes.Buffer) + enc := gob.NewEncoder(buf) + dec := gob.NewDecoder(buf) + + var in = make([]*Rat, 1) + err := enc.Encode(&in) + if err != nil { + t.Errorf("gob encode failed: %q", err) + } + var out []*Rat + err = dec.Decode(&out) + if err != nil { + t.Fatalf("gob decode failed: %q", err) + } + if len(out) != 1 { + t.Fatalf("wrong len; want 1 got %d", len(out)) + } + var zero Rat + if out[0].Cmp(&zero) != 0 { + t.Fatalf("transmission of (*Int)(nil) failed: got %s want 0", out) + } +} + +var ratNums = []string{ + "-141592653589793238462643383279502884197169399375105820974944592307816406286", + "-1415926535897932384626433832795028841971", + "-141592653589793", + "-1", + "0", + "1", + "141592653589793", + "1415926535897932384626433832795028841971", + "141592653589793238462643383279502884197169399375105820974944592307816406286", +} + +var ratDenoms = []string{ + "1", + "718281828459045", + "7182818284590452353602874713526624977572", + "718281828459045235360287471352662497757247093699959574966967627724076630353", +} + +func TestRatJSONEncoding(t *testing.T) { + for _, num := range ratNums { + for _, denom := range ratDenoms { + var tx Rat + tx.SetString(num + "/" + denom) + b, err := json.Marshal(&tx) + if err != nil { + t.Errorf("marshaling of %s failed: %s", &tx, err) + continue + } + var rx Rat + if err := json.Unmarshal(b, &rx); err != nil { + t.Errorf("unmarshaling of %s failed: %s", &tx, err) + continue + } + if rx.Cmp(&tx) != 0 { + t.Errorf("JSON encoding of %s failed: got %s want %s", &tx, &rx, &tx) + } + } + } +} + +func TestRatXMLEncoding(t *testing.T) { + for _, num := range ratNums { + for _, denom := range ratDenoms { + var tx Rat + tx.SetString(num + "/" + denom) + b, err := xml.Marshal(&tx) + if err != nil { + t.Errorf("marshaling of %s failed: %s", &tx, err) + continue + } + var rx Rat + if err := xml.Unmarshal(b, &rx); err != nil { + t.Errorf("unmarshaling of %s failed: %s", &tx, err) + continue + } + if rx.Cmp(&tx) != 0 { + t.Errorf("XML encoding of %s failed: got %s want %s", &tx, &rx, &tx) + } + } + } +} + +func TestRatGobDecodeShortBuffer(t *testing.T) { + for _, tc := range [][]byte{ + []byte{0x2}, + []byte{0x2, 0x0, 0x0, 0x0, 0xff}, + } { + err := NewRat(1, 2).GobDecode(tc) + if err == nil { + t.Error("expected GobDecode to return error for malformed input") + } + } +} diff --git a/src/math/big/roundingmode_string.go b/src/math/big/roundingmode_string.go new file mode 100644 index 0000000..c7629eb --- /dev/null +++ b/src/math/big/roundingmode_string.go @@ -0,0 +1,16 @@ +// Code generated by "stringer -type=RoundingMode"; DO NOT EDIT. + +package big + +import "strconv" + +const _RoundingMode_name = "ToNearestEvenToNearestAwayToZeroAwayFromZeroToNegativeInfToPositiveInf" + +var _RoundingMode_index = [...]uint8{0, 13, 26, 32, 44, 57, 70} + +func (i RoundingMode) String() string { + if i >= RoundingMode(len(_RoundingMode_index)-1) { + return "RoundingMode(" + strconv.FormatInt(int64(i), 10) + ")" + } + return _RoundingMode_name[_RoundingMode_index[i]:_RoundingMode_index[i+1]] +} diff --git a/src/math/big/sqrt.go b/src/math/big/sqrt.go new file mode 100644 index 0000000..0d50164 --- /dev/null +++ b/src/math/big/sqrt.go @@ -0,0 +1,128 @@ +// Copyright 2017 The Go Authors. All rights reserved. +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. + +package big + +import ( + "math" + "sync" +) + +var threeOnce struct { + sync.Once + v *Float +} + +func three() *Float { + threeOnce.Do(func() { + threeOnce.v = NewFloat(3.0) + }) + return threeOnce.v +} + +// Sqrt sets z to the rounded square root of x, and returns it. +// +// If z's precision is 0, it is changed to x's precision before the +// operation. Rounding is performed according to z's precision and +// rounding mode, but z's accuracy is not computed. Specifically, the +// result of z.Acc() is undefined. +// +// The function panics if z < 0. The value of z is undefined in that +// case. +func (z *Float) Sqrt(x *Float) *Float { + if debugFloat { + x.validate() + } + + if z.prec == 0 { + z.prec = x.prec + } + + if x.Sign() == -1 { + // following IEEE754-2008 (section 7.2) + panic(ErrNaN{"square root of negative operand"}) + } + + // handle ±0 and +∞ + if x.form != finite { + z.acc = Exact + z.form = x.form + z.neg = x.neg // IEEE754-2008 requires √±0 = ±0 + return z + } + + // MantExp sets the argument's precision to the receiver's, and + // when z.prec > x.prec this will lower z.prec. Restore it after + // the MantExp call. + prec := z.prec + b := x.MantExp(z) + z.prec = prec + + // Compute √(z·2**b) as + // √( z)·2**(½b) if b is even + // √(2z)·2**(⌊½b⌋) if b > 0 is odd + // √(½z)·2**(⌈½b⌉) if b < 0 is odd + switch b % 2 { + case 0: + // nothing to do + case 1: + z.exp++ + case -1: + z.exp-- + } + // 0.25 <= z < 2.0 + + // Solving 1/x² - z = 0 avoids Quo calls and is faster, especially + // for high precisions. + z.sqrtInverse(z) + + // re-attach halved exponent + return z.SetMantExp(z, b/2) +} + +// Compute √x (to z.prec precision) by solving +// 1/t² - x = 0 +// for t (using Newton's method), and then inverting. +func (z *Float) sqrtInverse(x *Float) { + // let + // f(t) = 1/t² - x + // then + // g(t) = f(t)/f'(t) = -½t(1 - xt²) + // and the next guess is given by + // t2 = t - g(t) = ½t(3 - xt²) + u := newFloat(z.prec) + v := newFloat(z.prec) + three := three() + ng := func(t *Float) *Float { + u.prec = t.prec + v.prec = t.prec + u.Mul(t, t) // u = t² + u.Mul(x, u) // = xt² + v.Sub(three, u) // v = 3 - xt² + u.Mul(t, v) // u = t(3 - xt²) + u.exp-- // = ½t(3 - xt²) + return t.Set(u) + } + + xf, _ := x.Float64() + sqi := newFloat(z.prec) + sqi.SetFloat64(1 / math.Sqrt(xf)) + for prec := z.prec + 32; sqi.prec < prec; { + sqi.prec *= 2 + sqi = ng(sqi) + } + // sqi = 1/√x + + // x/√x = √x + z.Mul(x, sqi) +} + +// newFloat returns a new *Float with space for twice the given +// precision. +func newFloat(prec2 uint32) *Float { + z := new(Float) + // nat.make ensures the slice length is > 0 + z.mant = z.mant.make(int(prec2/_W) * 2) + return z +} diff --git a/src/math/big/sqrt_test.go b/src/math/big/sqrt_test.go new file mode 100644 index 0000000..d314711 --- /dev/null +++ b/src/math/big/sqrt_test.go @@ -0,0 +1,126 @@ +// Copyright 2017 The Go Authors. All rights reserved. +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. + +package big + +import ( + "fmt" + "math" + "math/rand" + "testing" +) + +// TestFloatSqrt64 tests that Float.Sqrt of numbers with 53bit mantissa +// behaves like float math.Sqrt. +func TestFloatSqrt64(t *testing.T) { + for i := 0; i < 1e5; i++ { + if i == 1e2 && testing.Short() { + break + } + r := rand.Float64() + + got := new(Float).SetPrec(53) + got.Sqrt(NewFloat(r)) + want := NewFloat(math.Sqrt(r)) + if got.Cmp(want) != 0 { + t.Fatalf("Sqrt(%g) =\n got %g;\nwant %g", r, got, want) + } + } +} + +func TestFloatSqrt(t *testing.T) { + for _, test := range []struct { + x string + want string + }{ + // Test values were generated on Wolfram Alpha using query + // 'sqrt(N) to 350 digits' + // 350 decimal digits give up to 1000 binary digits. + {"0.03125", "0.17677669529663688110021109052621225982120898442211850914708496724884155980776337985629844179095519659187673077886403712811560450698134215158051518713749197892665283324093819909447499381264409775757143376369499645074628431682460775184106467733011114982619404115381053858929018135497032545349940642599871090667456829147610370507757690729404938184321879"}, + {"0.125", "0.35355339059327376220042218105242451964241796884423701829416993449768311961552675971259688358191039318375346155772807425623120901396268430316103037427498395785330566648187639818894998762528819551514286752738999290149256863364921550368212935466022229965238808230762107717858036270994065090699881285199742181334913658295220741015515381458809876368643757"}, + {"0.5", "0.70710678118654752440084436210484903928483593768847403658833986899536623923105351942519376716382078636750692311545614851246241802792536860632206074854996791570661133296375279637789997525057639103028573505477998580298513726729843100736425870932044459930477616461524215435716072541988130181399762570399484362669827316590441482031030762917619752737287514"}, + {"2.0", "1.4142135623730950488016887242096980785696718753769480731766797379907324784621070388503875343276415727350138462309122970249248360558507372126441214970999358314132226659275055927557999505011527820605714701095599716059702745345968620147285174186408891986095523292304843087143214508397626036279952514079896872533965463318088296406206152583523950547457503"}, + {"3.0", "1.7320508075688772935274463415058723669428052538103806280558069794519330169088000370811461867572485756756261414154067030299699450949989524788116555120943736485280932319023055820679748201010846749232650153123432669033228866506722546689218379712270471316603678615880190499865373798593894676503475065760507566183481296061009476021871903250831458295239598"}, + {"4.0", "2.0"}, + + {"1p512", "1p256"}, + {"4p1024", "2p512"}, + {"9p2048", "3p1024"}, + + {"1p-1024", "1p-512"}, + {"4p-2048", "2p-1024"}, + {"9p-4096", "3p-2048"}, + } { + for _, prec := range []uint{24, 53, 64, 65, 100, 128, 129, 200, 256, 400, 600, 800, 1000} { + x := new(Float).SetPrec(prec) + x.Parse(test.x, 10) + + got := new(Float).SetPrec(prec).Sqrt(x) + want := new(Float).SetPrec(prec) + want.Parse(test.want, 10) + if got.Cmp(want) != 0 { + t.Errorf("prec = %d, Sqrt(%v) =\ngot %g;\nwant %g", + prec, test.x, got, want) + } + + // Square test. + // If got holds the square root of x to precision p, then + // got = √x + k + // for some k such that |k| < 2**(-p). Thus, + // got² = (√x + k)² = x + 2k√n + k² + // and the error must satisfy + // err = |got² - x| ≈ | 2k√n | < 2**(-p+1)*√n + // Ignoring the k² term for simplicity. + + // err = |got² - x| + // (but do intermediate steps with 32 guard digits to + // avoid introducing spurious rounding-related errors) + sq := new(Float).SetPrec(prec+32).Mul(got, got) + diff := new(Float).Sub(sq, x) + err := diff.Abs(diff).SetPrec(prec) + + // maxErr = 2**(-p+1)*√x + one := new(Float).SetPrec(prec).SetInt64(1) + maxErr := new(Float).Mul(new(Float).SetMantExp(one, -int(prec)+1), got) + + if err.Cmp(maxErr) >= 0 { + t.Errorf("prec = %d, Sqrt(%v) =\ngot err %g;\nwant maxErr %g", + prec, test.x, err, maxErr) + } + } + } +} + +func TestFloatSqrtSpecial(t *testing.T) { + for _, test := range []struct { + x *Float + want *Float + }{ + {NewFloat(+0), NewFloat(+0)}, + {NewFloat(-0), NewFloat(-0)}, + {NewFloat(math.Inf(+1)), NewFloat(math.Inf(+1))}, + } { + got := new(Float).Sqrt(test.x) + if got.neg != test.want.neg || got.form != test.want.form { + t.Errorf("Sqrt(%v) = %v (neg: %v); want %v (neg: %v)", + test.x, got, got.neg, test.want, test.want.neg) + } + } + +} + +// Benchmarks + +func BenchmarkFloatSqrt(b *testing.B) { + for _, prec := range []uint{64, 128, 256, 1e3, 1e4, 1e5, 1e6} { + x := NewFloat(2) + z := new(Float).SetPrec(prec) + b.Run(fmt.Sprintf("%v", prec), func(b *testing.B) { + b.ReportAllocs() + for n := 0; n < b.N; n++ { + z.Sqrt(x) + } + }) + } +} diff --git a/src/math/bits.go b/src/math/bits.go new file mode 100644 index 0000000..77bcdbe --- /dev/null +++ b/src/math/bits.go @@ -0,0 +1,62 @@ +// Copyright 2009 The Go Authors. All rights reserved. +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. + +package math + +const ( + uvnan = 0x7FF8000000000001 + uvinf = 0x7FF0000000000000 + uvneginf = 0xFFF0000000000000 + uvone = 0x3FF0000000000000 + mask = 0x7FF + shift = 64 - 11 - 1 + bias = 1023 + signMask = 1 << 63 + fracMask = 1<<shift - 1 +) + +// Inf returns positive infinity if sign >= 0, negative infinity if sign < 0. +func Inf(sign int) float64 { + var v uint64 + if sign >= 0 { + v = uvinf + } else { + v = uvneginf + } + return Float64frombits(v) +} + +// NaN returns an IEEE 754 ``not-a-number'' value. +func NaN() float64 { return Float64frombits(uvnan) } + +// IsNaN reports whether f is an IEEE 754 ``not-a-number'' value. +func IsNaN(f float64) (is bool) { + // IEEE 754 says that only NaNs satisfy f != f. + // To avoid the floating-point hardware, could use: + // x := Float64bits(f); + // return uint32(x>>shift)&mask == mask && x != uvinf && x != uvneginf + return f != f +} + +// IsInf reports whether f is an infinity, according to sign. +// If sign > 0, IsInf reports whether f is positive infinity. +// If sign < 0, IsInf reports whether f is negative infinity. +// If sign == 0, IsInf reports whether f is either infinity. +func IsInf(f float64, sign int) bool { + // Test for infinity by comparing against maximum float. + // To avoid the floating-point hardware, could use: + // x := Float64bits(f); + // return sign >= 0 && x == uvinf || sign <= 0 && x == uvneginf; + return sign >= 0 && f > MaxFloat64 || sign <= 0 && f < -MaxFloat64 +} + +// normalize returns a normal number y and exponent exp +// satisfying x == y × 2**exp. It assumes x is finite and non-zero. +func normalize(x float64) (y float64, exp int) { + const SmallestNormal = 2.2250738585072014e-308 // 2**-1022 + if Abs(x) < SmallestNormal { + return x * (1 << 52), -52 + } + return x, 0 +} diff --git a/src/math/bits/bits.go b/src/math/bits/bits.go new file mode 100644 index 0000000..65452fe --- /dev/null +++ b/src/math/bits/bits.go @@ -0,0 +1,588 @@ +// Copyright 2017 The Go Authors. All rights reserved. +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. + +//go:generate go run make_tables.go + +// Package bits implements bit counting and manipulation +// functions for the predeclared unsigned integer types. +package bits + +const uintSize = 32 << (^uint(0) >> 63) // 32 or 64 + +// UintSize is the size of a uint in bits. +const UintSize = uintSize + +// --- LeadingZeros --- + +// LeadingZeros returns the number of leading zero bits in x; the result is UintSize for x == 0. +func LeadingZeros(x uint) int { return UintSize - Len(x) } + +// LeadingZeros8 returns the number of leading zero bits in x; the result is 8 for x == 0. +func LeadingZeros8(x uint8) int { return 8 - Len8(x) } + +// LeadingZeros16 returns the number of leading zero bits in x; the result is 16 for x == 0. +func LeadingZeros16(x uint16) int { return 16 - Len16(x) } + +// LeadingZeros32 returns the number of leading zero bits in x; the result is 32 for x == 0. +func LeadingZeros32(x uint32) int { return 32 - Len32(x) } + +// LeadingZeros64 returns the number of leading zero bits in x; the result is 64 for x == 0. +func LeadingZeros64(x uint64) int { return 64 - Len64(x) } + +// --- TrailingZeros --- + +// See http://supertech.csail.mit.edu/papers/debruijn.pdf +const deBruijn32 = 0x077CB531 + +var deBruijn32tab = [32]byte{ + 0, 1, 28, 2, 29, 14, 24, 3, 30, 22, 20, 15, 25, 17, 4, 8, + 31, 27, 13, 23, 21, 19, 16, 7, 26, 12, 18, 6, 11, 5, 10, 9, +} + +const deBruijn64 = 0x03f79d71b4ca8b09 + +var deBruijn64tab = [64]byte{ + 0, 1, 56, 2, 57, 49, 28, 3, 61, 58, 42, 50, 38, 29, 17, 4, + 62, 47, 59, 36, 45, 43, 51, 22, 53, 39, 33, 30, 24, 18, 12, 5, + 63, 55, 48, 27, 60, 41, 37, 16, 46, 35, 44, 21, 52, 32, 23, 11, + 54, 26, 40, 15, 34, 20, 31, 10, 25, 14, 19, 9, 13, 8, 7, 6, +} + +// TrailingZeros returns the number of trailing zero bits in x; the result is UintSize for x == 0. +func TrailingZeros(x uint) int { + if UintSize == 32 { + return TrailingZeros32(uint32(x)) + } + return TrailingZeros64(uint64(x)) +} + +// TrailingZeros8 returns the number of trailing zero bits in x; the result is 8 for x == 0. +func TrailingZeros8(x uint8) int { + return int(ntz8tab[x]) +} + +// TrailingZeros16 returns the number of trailing zero bits in x; the result is 16 for x == 0. +func TrailingZeros16(x uint16) int { + if x == 0 { + return 16 + } + // see comment in TrailingZeros64 + return int(deBruijn32tab[uint32(x&-x)*deBruijn32>>(32-5)]) +} + +// TrailingZeros32 returns the number of trailing zero bits in x; the result is 32 for x == 0. +func TrailingZeros32(x uint32) int { + if x == 0 { + return 32 + } + // see comment in TrailingZeros64 + return int(deBruijn32tab[(x&-x)*deBruijn32>>(32-5)]) +} + +// TrailingZeros64 returns the number of trailing zero bits in x; the result is 64 for x == 0. +func TrailingZeros64(x uint64) int { + if x == 0 { + return 64 + } + // If popcount is fast, replace code below with return popcount(^x & (x - 1)). + // + // x & -x leaves only the right-most bit set in the word. Let k be the + // index of that bit. Since only a single bit is set, the value is two + // to the power of k. Multiplying by a power of two is equivalent to + // left shifting, in this case by k bits. The de Bruijn (64 bit) constant + // is such that all six bit, consecutive substrings are distinct. + // Therefore, if we have a left shifted version of this constant we can + // find by how many bits it was shifted by looking at which six bit + // substring ended up at the top of the word. + // (Knuth, volume 4, section 7.3.1) + return int(deBruijn64tab[(x&-x)*deBruijn64>>(64-6)]) +} + +// --- OnesCount --- + +const m0 = 0x5555555555555555 // 01010101 ... +const m1 = 0x3333333333333333 // 00110011 ... +const m2 = 0x0f0f0f0f0f0f0f0f // 00001111 ... +const m3 = 0x00ff00ff00ff00ff // etc. +const m4 = 0x0000ffff0000ffff + +// OnesCount returns the number of one bits ("population count") in x. +func OnesCount(x uint) int { + if UintSize == 32 { + return OnesCount32(uint32(x)) + } + return OnesCount64(uint64(x)) +} + +// OnesCount8 returns the number of one bits ("population count") in x. +func OnesCount8(x uint8) int { + return int(pop8tab[x]) +} + +// OnesCount16 returns the number of one bits ("population count") in x. +func OnesCount16(x uint16) int { + return int(pop8tab[x>>8] + pop8tab[x&0xff]) +} + +// OnesCount32 returns the number of one bits ("population count") in x. +func OnesCount32(x uint32) int { + return int(pop8tab[x>>24] + pop8tab[x>>16&0xff] + pop8tab[x>>8&0xff] + pop8tab[x&0xff]) +} + +// OnesCount64 returns the number of one bits ("population count") in x. +func OnesCount64(x uint64) int { + // Implementation: Parallel summing of adjacent bits. + // See "Hacker's Delight", Chap. 5: Counting Bits. + // The following pattern shows the general approach: + // + // x = x>>1&(m0&m) + x&(m0&m) + // x = x>>2&(m1&m) + x&(m1&m) + // x = x>>4&(m2&m) + x&(m2&m) + // x = x>>8&(m3&m) + x&(m3&m) + // x = x>>16&(m4&m) + x&(m4&m) + // x = x>>32&(m5&m) + x&(m5&m) + // return int(x) + // + // Masking (& operations) can be left away when there's no + // danger that a field's sum will carry over into the next + // field: Since the result cannot be > 64, 8 bits is enough + // and we can ignore the masks for the shifts by 8 and up. + // Per "Hacker's Delight", the first line can be simplified + // more, but it saves at best one instruction, so we leave + // it alone for clarity. + const m = 1<<64 - 1 + x = x>>1&(m0&m) + x&(m0&m) + x = x>>2&(m1&m) + x&(m1&m) + x = (x>>4 + x) & (m2 & m) + x += x >> 8 + x += x >> 16 + x += x >> 32 + return int(x) & (1<<7 - 1) +} + +// --- RotateLeft --- + +// RotateLeft returns the value of x rotated left by (k mod UintSize) bits. +// To rotate x right by k bits, call RotateLeft(x, -k). +// +// This function's execution time does not depend on the inputs. +func RotateLeft(x uint, k int) uint { + if UintSize == 32 { + return uint(RotateLeft32(uint32(x), k)) + } + return uint(RotateLeft64(uint64(x), k)) +} + +// RotateLeft8 returns the value of x rotated left by (k mod 8) bits. +// To rotate x right by k bits, call RotateLeft8(x, -k). +// +// This function's execution time does not depend on the inputs. +func RotateLeft8(x uint8, k int) uint8 { + const n = 8 + s := uint(k) & (n - 1) + return x<<s | x>>(n-s) +} + +// RotateLeft16 returns the value of x rotated left by (k mod 16) bits. +// To rotate x right by k bits, call RotateLeft16(x, -k). +// +// This function's execution time does not depend on the inputs. +func RotateLeft16(x uint16, k int) uint16 { + const n = 16 + s := uint(k) & (n - 1) + return x<<s | x>>(n-s) +} + +// RotateLeft32 returns the value of x rotated left by (k mod 32) bits. +// To rotate x right by k bits, call RotateLeft32(x, -k). +// +// This function's execution time does not depend on the inputs. +func RotateLeft32(x uint32, k int) uint32 { + const n = 32 + s := uint(k) & (n - 1) + return x<<s | x>>(n-s) +} + +// RotateLeft64 returns the value of x rotated left by (k mod 64) bits. +// To rotate x right by k bits, call RotateLeft64(x, -k). +// +// This function's execution time does not depend on the inputs. +func RotateLeft64(x uint64, k int) uint64 { + const n = 64 + s := uint(k) & (n - 1) + return x<<s | x>>(n-s) +} + +// --- Reverse --- + +// Reverse returns the value of x with its bits in reversed order. +func Reverse(x uint) uint { + if UintSize == 32 { + return uint(Reverse32(uint32(x))) + } + return uint(Reverse64(uint64(x))) +} + +// Reverse8 returns the value of x with its bits in reversed order. +func Reverse8(x uint8) uint8 { + return rev8tab[x] +} + +// Reverse16 returns the value of x with its bits in reversed order. +func Reverse16(x uint16) uint16 { + return uint16(rev8tab[x>>8]) | uint16(rev8tab[x&0xff])<<8 +} + +// Reverse32 returns the value of x with its bits in reversed order. +func Reverse32(x uint32) uint32 { + const m = 1<<32 - 1 + x = x>>1&(m0&m) | x&(m0&m)<<1 + x = x>>2&(m1&m) | x&(m1&m)<<2 + x = x>>4&(m2&m) | x&(m2&m)<<4 + return ReverseBytes32(x) +} + +// Reverse64 returns the value of x with its bits in reversed order. +func Reverse64(x uint64) uint64 { + const m = 1<<64 - 1 + x = x>>1&(m0&m) | x&(m0&m)<<1 + x = x>>2&(m1&m) | x&(m1&m)<<2 + x = x>>4&(m2&m) | x&(m2&m)<<4 + return ReverseBytes64(x) +} + +// --- ReverseBytes --- + +// ReverseBytes returns the value of x with its bytes in reversed order. +// +// This function's execution time does not depend on the inputs. +func ReverseBytes(x uint) uint { + if UintSize == 32 { + return uint(ReverseBytes32(uint32(x))) + } + return uint(ReverseBytes64(uint64(x))) +} + +// ReverseBytes16 returns the value of x with its bytes in reversed order. +// +// This function's execution time does not depend on the inputs. +func ReverseBytes16(x uint16) uint16 { + return x>>8 | x<<8 +} + +// ReverseBytes32 returns the value of x with its bytes in reversed order. +// +// This function's execution time does not depend on the inputs. +func ReverseBytes32(x uint32) uint32 { + const m = 1<<32 - 1 + x = x>>8&(m3&m) | x&(m3&m)<<8 + return x>>16 | x<<16 +} + +// ReverseBytes64 returns the value of x with its bytes in reversed order. +// +// This function's execution time does not depend on the inputs. +func ReverseBytes64(x uint64) uint64 { + const m = 1<<64 - 1 + x = x>>8&(m3&m) | x&(m3&m)<<8 + x = x>>16&(m4&m) | x&(m4&m)<<16 + return x>>32 | x<<32 +} + +// --- Len --- + +// Len returns the minimum number of bits required to represent x; the result is 0 for x == 0. +func Len(x uint) int { + if UintSize == 32 { + return Len32(uint32(x)) + } + return Len64(uint64(x)) +} + +// Len8 returns the minimum number of bits required to represent x; the result is 0 for x == 0. +func Len8(x uint8) int { + return int(len8tab[x]) +} + +// Len16 returns the minimum number of bits required to represent x; the result is 0 for x == 0. +func Len16(x uint16) (n int) { + if x >= 1<<8 { + x >>= 8 + n = 8 + } + return n + int(len8tab[x]) +} + +// Len32 returns the minimum number of bits required to represent x; the result is 0 for x == 0. +func Len32(x uint32) (n int) { + if x >= 1<<16 { + x >>= 16 + n = 16 + } + if x >= 1<<8 { + x >>= 8 + n += 8 + } + return n + int(len8tab[x]) +} + +// Len64 returns the minimum number of bits required to represent x; the result is 0 for x == 0. +func Len64(x uint64) (n int) { + if x >= 1<<32 { + x >>= 32 + n = 32 + } + if x >= 1<<16 { + x >>= 16 + n += 16 + } + if x >= 1<<8 { + x >>= 8 + n += 8 + } + return n + int(len8tab[x]) +} + +// --- Add with carry --- + +// Add returns the sum with carry of x, y and carry: sum = x + y + carry. +// The carry input must be 0 or 1; otherwise the behavior is undefined. +// The carryOut output is guaranteed to be 0 or 1. +// +// This function's execution time does not depend on the inputs. +func Add(x, y, carry uint) (sum, carryOut uint) { + if UintSize == 32 { + s32, c32 := Add32(uint32(x), uint32(y), uint32(carry)) + return uint(s32), uint(c32) + } + s64, c64 := Add64(uint64(x), uint64(y), uint64(carry)) + return uint(s64), uint(c64) +} + +// Add32 returns the sum with carry of x, y and carry: sum = x + y + carry. +// The carry input must be 0 or 1; otherwise the behavior is undefined. +// The carryOut output is guaranteed to be 0 or 1. +// +// This function's execution time does not depend on the inputs. +func Add32(x, y, carry uint32) (sum, carryOut uint32) { + sum64 := uint64(x) + uint64(y) + uint64(carry) + sum = uint32(sum64) + carryOut = uint32(sum64 >> 32) + return +} + +// Add64 returns the sum with carry of x, y and carry: sum = x + y + carry. +// The carry input must be 0 or 1; otherwise the behavior is undefined. +// The carryOut output is guaranteed to be 0 or 1. +// +// This function's execution time does not depend on the inputs. +func Add64(x, y, carry uint64) (sum, carryOut uint64) { + sum = x + y + carry + // The sum will overflow if both top bits are set (x & y) or if one of them + // is (x | y), and a carry from the lower place happened. If such a carry + // happens, the top bit will be 1 + 0 + 1 = 0 (&^ sum). + carryOut = ((x & y) | ((x | y) &^ sum)) >> 63 + return +} + +// --- Subtract with borrow --- + +// Sub returns the difference of x, y and borrow: diff = x - y - borrow. +// The borrow input must be 0 or 1; otherwise the behavior is undefined. +// The borrowOut output is guaranteed to be 0 or 1. +// +// This function's execution time does not depend on the inputs. +func Sub(x, y, borrow uint) (diff, borrowOut uint) { + if UintSize == 32 { + d32, b32 := Sub32(uint32(x), uint32(y), uint32(borrow)) + return uint(d32), uint(b32) + } + d64, b64 := Sub64(uint64(x), uint64(y), uint64(borrow)) + return uint(d64), uint(b64) +} + +// Sub32 returns the difference of x, y and borrow, diff = x - y - borrow. +// The borrow input must be 0 or 1; otherwise the behavior is undefined. +// The borrowOut output is guaranteed to be 0 or 1. +// +// This function's execution time does not depend on the inputs. +func Sub32(x, y, borrow uint32) (diff, borrowOut uint32) { + diff = x - y - borrow + // The difference will underflow if the top bit of x is not set and the top + // bit of y is set (^x & y) or if they are the same (^(x ^ y)) and a borrow + // from the lower place happens. If that borrow happens, the result will be + // 1 - 1 - 1 = 0 - 0 - 1 = 1 (& diff). + borrowOut = ((^x & y) | (^(x ^ y) & diff)) >> 31 + return +} + +// Sub64 returns the difference of x, y and borrow: diff = x - y - borrow. +// The borrow input must be 0 or 1; otherwise the behavior is undefined. +// The borrowOut output is guaranteed to be 0 or 1. +// +// This function's execution time does not depend on the inputs. +func Sub64(x, y, borrow uint64) (diff, borrowOut uint64) { + diff = x - y - borrow + // See Sub32 for the bit logic. + borrowOut = ((^x & y) | (^(x ^ y) & diff)) >> 63 + return +} + +// --- Full-width multiply --- + +// Mul returns the full-width product of x and y: (hi, lo) = x * y +// with the product bits' upper half returned in hi and the lower +// half returned in lo. +// +// This function's execution time does not depend on the inputs. +func Mul(x, y uint) (hi, lo uint) { + if UintSize == 32 { + h, l := Mul32(uint32(x), uint32(y)) + return uint(h), uint(l) + } + h, l := Mul64(uint64(x), uint64(y)) + return uint(h), uint(l) +} + +// Mul32 returns the 64-bit product of x and y: (hi, lo) = x * y +// with the product bits' upper half returned in hi and the lower +// half returned in lo. +// +// This function's execution time does not depend on the inputs. +func Mul32(x, y uint32) (hi, lo uint32) { + tmp := uint64(x) * uint64(y) + hi, lo = uint32(tmp>>32), uint32(tmp) + return +} + +// Mul64 returns the 128-bit product of x and y: (hi, lo) = x * y +// with the product bits' upper half returned in hi and the lower +// half returned in lo. +// +// This function's execution time does not depend on the inputs. +func Mul64(x, y uint64) (hi, lo uint64) { + const mask32 = 1<<32 - 1 + x0 := x & mask32 + x1 := x >> 32 + y0 := y & mask32 + y1 := y >> 32 + w0 := x0 * y0 + t := x1*y0 + w0>>32 + w1 := t & mask32 + w2 := t >> 32 + w1 += x0 * y1 + hi = x1*y1 + w2 + w1>>32 + lo = x * y + return +} + +// --- Full-width divide --- + +// Div returns the quotient and remainder of (hi, lo) divided by y: +// quo = (hi, lo)/y, rem = (hi, lo)%y with the dividend bits' upper +// half in parameter hi and the lower half in parameter lo. +// Div panics for y == 0 (division by zero) or y <= hi (quotient overflow). +func Div(hi, lo, y uint) (quo, rem uint) { + if UintSize == 32 { + q, r := Div32(uint32(hi), uint32(lo), uint32(y)) + return uint(q), uint(r) + } + q, r := Div64(uint64(hi), uint64(lo), uint64(y)) + return uint(q), uint(r) +} + +// Div32 returns the quotient and remainder of (hi, lo) divided by y: +// quo = (hi, lo)/y, rem = (hi, lo)%y with the dividend bits' upper +// half in parameter hi and the lower half in parameter lo. +// Div32 panics for y == 0 (division by zero) or y <= hi (quotient overflow). +func Div32(hi, lo, y uint32) (quo, rem uint32) { + if y != 0 && y <= hi { + panic(overflowError) + } + z := uint64(hi)<<32 | uint64(lo) + quo, rem = uint32(z/uint64(y)), uint32(z%uint64(y)) + return +} + +// Div64 returns the quotient and remainder of (hi, lo) divided by y: +// quo = (hi, lo)/y, rem = (hi, lo)%y with the dividend bits' upper +// half in parameter hi and the lower half in parameter lo. +// Div64 panics for y == 0 (division by zero) or y <= hi (quotient overflow). +func Div64(hi, lo, y uint64) (quo, rem uint64) { + const ( + two32 = 1 << 32 + mask32 = two32 - 1 + ) + if y == 0 { + panic(divideError) + } + if y <= hi { + panic(overflowError) + } + + s := uint(LeadingZeros64(y)) + y <<= s + + yn1 := y >> 32 + yn0 := y & mask32 + un32 := hi<<s | lo>>(64-s) + un10 := lo << s + un1 := un10 >> 32 + un0 := un10 & mask32 + q1 := un32 / yn1 + rhat := un32 - q1*yn1 + + for q1 >= two32 || q1*yn0 > two32*rhat+un1 { + q1-- + rhat += yn1 + if rhat >= two32 { + break + } + } + + un21 := un32*two32 + un1 - q1*y + q0 := un21 / yn1 + rhat = un21 - q0*yn1 + + for q0 >= two32 || q0*yn0 > two32*rhat+un0 { + q0-- + rhat += yn1 + if rhat >= two32 { + break + } + } + + return q1*two32 + q0, (un21*two32 + un0 - q0*y) >> s +} + +// Rem returns the remainder of (hi, lo) divided by y. Rem panics for +// y == 0 (division by zero) but, unlike Div, it doesn't panic on a +// quotient overflow. +func Rem(hi, lo, y uint) uint { + if UintSize == 32 { + return uint(Rem32(uint32(hi), uint32(lo), uint32(y))) + } + return uint(Rem64(uint64(hi), uint64(lo), uint64(y))) +} + +// Rem32 returns the remainder of (hi, lo) divided by y. Rem32 panics +// for y == 0 (division by zero) but, unlike Div32, it doesn't panic +// on a quotient overflow. +func Rem32(hi, lo, y uint32) uint32 { + return uint32((uint64(hi)<<32 | uint64(lo)) % uint64(y)) +} + +// Rem64 returns the remainder of (hi, lo) divided by y. Rem64 panics +// for y == 0 (division by zero) but, unlike Div64, it doesn't panic +// on a quotient overflow. +func Rem64(hi, lo, y uint64) uint64 { + // We scale down hi so that hi < y, then use Div64 to compute the + // rem with the guarantee that it won't panic on quotient overflow. + // Given that + // hi ≡ hi%y (mod y) + // we have + // hi<<64 + lo ≡ (hi%y)<<64 + lo (mod y) + _, rem := Div64(hi%y, lo, y) + return rem +} diff --git a/src/math/bits/bits_errors.go b/src/math/bits/bits_errors.go new file mode 100644 index 0000000..61cb5c9 --- /dev/null +++ b/src/math/bits/bits_errors.go @@ -0,0 +1,16 @@ +// Copyright 2019 The Go Authors. All rights reserved. +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. + +//go:build !compiler_bootstrap +// +build !compiler_bootstrap + +package bits + +import _ "unsafe" + +//go:linkname overflowError runtime.overflowError +var overflowError error + +//go:linkname divideError runtime.divideError +var divideError error diff --git a/src/math/bits/bits_errors_bootstrap.go b/src/math/bits/bits_errors_bootstrap.go new file mode 100644 index 0000000..4d610d3 --- /dev/null +++ b/src/math/bits/bits_errors_bootstrap.go @@ -0,0 +1,23 @@ +// Copyright 2019 The Go Authors. All rights reserved. +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. + +//go:build compiler_bootstrap +// +build compiler_bootstrap + +// This version used only for bootstrap (on this path we want +// to avoid use of go:linkname as applied to variables). + +package bits + +type errorString string + +func (e errorString) RuntimeError() {} + +func (e errorString) Error() string { + return "runtime error: " + string(e) +} + +var overflowError = error(errorString("integer overflow")) + +var divideError = error(errorString("integer divide by zero")) diff --git a/src/math/bits/bits_tables.go b/src/math/bits/bits_tables.go new file mode 100644 index 0000000..f869b8d --- /dev/null +++ b/src/math/bits/bits_tables.go @@ -0,0 +1,79 @@ +// Copyright 2017 The Go Authors. All rights reserved. +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. + +// Code generated by go run make_tables.go. DO NOT EDIT. + +package bits + +const ntz8tab = "" + + "\x08\x00\x01\x00\x02\x00\x01\x00\x03\x00\x01\x00\x02\x00\x01\x00" + + "\x04\x00\x01\x00\x02\x00\x01\x00\x03\x00\x01\x00\x02\x00\x01\x00" + + "\x05\x00\x01\x00\x02\x00\x01\x00\x03\x00\x01\x00\x02\x00\x01\x00" + + "\x04\x00\x01\x00\x02\x00\x01\x00\x03\x00\x01\x00\x02\x00\x01\x00" + + "\x06\x00\x01\x00\x02\x00\x01\x00\x03\x00\x01\x00\x02\x00\x01\x00" + + "\x04\x00\x01\x00\x02\x00\x01\x00\x03\x00\x01\x00\x02\x00\x01\x00" + + "\x05\x00\x01\x00\x02\x00\x01\x00\x03\x00\x01\x00\x02\x00\x01\x00" + + "\x04\x00\x01\x00\x02\x00\x01\x00\x03\x00\x01\x00\x02\x00\x01\x00" + + "\x07\x00\x01\x00\x02\x00\x01\x00\x03\x00\x01\x00\x02\x00\x01\x00" + + "\x04\x00\x01\x00\x02\x00\x01\x00\x03\x00\x01\x00\x02\x00\x01\x00" + + "\x05\x00\x01\x00\x02\x00\x01\x00\x03\x00\x01\x00\x02\x00\x01\x00" + + "\x04\x00\x01\x00\x02\x00\x01\x00\x03\x00\x01\x00\x02\x00\x01\x00" + + "\x06\x00\x01\x00\x02\x00\x01\x00\x03\x00\x01\x00\x02\x00\x01\x00" + + "\x04\x00\x01\x00\x02\x00\x01\x00\x03\x00\x01\x00\x02\x00\x01\x00" + + "\x05\x00\x01\x00\x02\x00\x01\x00\x03\x00\x01\x00\x02\x00\x01\x00" + + "\x04\x00\x01\x00\x02\x00\x01\x00\x03\x00\x01\x00\x02\x00\x01\x00" + +const pop8tab = "" + + "\x00\x01\x01\x02\x01\x02\x02\x03\x01\x02\x02\x03\x02\x03\x03\x04" + + "\x01\x02\x02\x03\x02\x03\x03\x04\x02\x03\x03\x04\x03\x04\x04\x05" + + "\x01\x02\x02\x03\x02\x03\x03\x04\x02\x03\x03\x04\x03\x04\x04\x05" + + "\x02\x03\x03\x04\x03\x04\x04\x05\x03\x04\x04\x05\x04\x05\x05\x06" + + "\x01\x02\x02\x03\x02\x03\x03\x04\x02\x03\x03\x04\x03\x04\x04\x05" + + "\x02\x03\x03\x04\x03\x04\x04\x05\x03\x04\x04\x05\x04\x05\x05\x06" + + "\x02\x03\x03\x04\x03\x04\x04\x05\x03\x04\x04\x05\x04\x05\x05\x06" + + "\x03\x04\x04\x05\x04\x05\x05\x06\x04\x05\x05\x06\x05\x06\x06\x07" + + "\x01\x02\x02\x03\x02\x03\x03\x04\x02\x03\x03\x04\x03\x04\x04\x05" + + "\x02\x03\x03\x04\x03\x04\x04\x05\x03\x04\x04\x05\x04\x05\x05\x06" + + "\x02\x03\x03\x04\x03\x04\x04\x05\x03\x04\x04\x05\x04\x05\x05\x06" + + "\x03\x04\x04\x05\x04\x05\x05\x06\x04\x05\x05\x06\x05\x06\x06\x07" + + "\x02\x03\x03\x04\x03\x04\x04\x05\x03\x04\x04\x05\x04\x05\x05\x06" + + "\x03\x04\x04\x05\x04\x05\x05\x06\x04\x05\x05\x06\x05\x06\x06\x07" + + "\x03\x04\x04\x05\x04\x05\x05\x06\x04\x05\x05\x06\x05\x06\x06\x07" + + "\x04\x05\x05\x06\x05\x06\x06\x07\x05\x06\x06\x07\x06\x07\x07\x08" + +const rev8tab = "" + + "\x00\x80\x40\xc0\x20\xa0\x60\xe0\x10\x90\x50\xd0\x30\xb0\x70\xf0" + + "\x08\x88\x48\xc8\x28\xa8\x68\xe8\x18\x98\x58\xd8\x38\xb8\x78\xf8" + + "\x04\x84\x44\xc4\x24\xa4\x64\xe4\x14\x94\x54\xd4\x34\xb4\x74\xf4" + + "\x0c\x8c\x4c\xcc\x2c\xac\x6c\xec\x1c\x9c\x5c\xdc\x3c\xbc\x7c\xfc" + + "\x02\x82\x42\xc2\x22\xa2\x62\xe2\x12\x92\x52\xd2\x32\xb2\x72\xf2" + + "\x0a\x8a\x4a\xca\x2a\xaa\x6a\xea\x1a\x9a\x5a\xda\x3a\xba\x7a\xfa" + + "\x06\x86\x46\xc6\x26\xa6\x66\xe6\x16\x96\x56\xd6\x36\xb6\x76\xf6" + + "\x0e\x8e\x4e\xce\x2e\xae\x6e\xee\x1e\x9e\x5e\xde\x3e\xbe\x7e\xfe" + + "\x01\x81\x41\xc1\x21\xa1\x61\xe1\x11\x91\x51\xd1\x31\xb1\x71\xf1" + + "\x09\x89\x49\xc9\x29\xa9\x69\xe9\x19\x99\x59\xd9\x39\xb9\x79\xf9" + + "\x05\x85\x45\xc5\x25\xa5\x65\xe5\x15\x95\x55\xd5\x35\xb5\x75\xf5" + + "\x0d\x8d\x4d\xcd\x2d\xad\x6d\xed\x1d\x9d\x5d\xdd\x3d\xbd\x7d\xfd" + + "\x03\x83\x43\xc3\x23\xa3\x63\xe3\x13\x93\x53\xd3\x33\xb3\x73\xf3" + + "\x0b\x8b\x4b\xcb\x2b\xab\x6b\xeb\x1b\x9b\x5b\xdb\x3b\xbb\x7b\xfb" + + "\x07\x87\x47\xc7\x27\xa7\x67\xe7\x17\x97\x57\xd7\x37\xb7\x77\xf7" + + "\x0f\x8f\x4f\xcf\x2f\xaf\x6f\xef\x1f\x9f\x5f\xdf\x3f\xbf\x7f\xff" + +const len8tab = "" + + "\x00\x01\x02\x02\x03\x03\x03\x03\x04\x04\x04\x04\x04\x04\x04\x04" + + "\x05\x05\x05\x05\x05\x05\x05\x05\x05\x05\x05\x05\x05\x05\x05\x05" + + "\x06\x06\x06\x06\x06\x06\x06\x06\x06\x06\x06\x06\x06\x06\x06\x06" + + "\x06\x06\x06\x06\x06\x06\x06\x06\x06\x06\x06\x06\x06\x06\x06\x06" + + "\x07\x07\x07\x07\x07\x07\x07\x07\x07\x07\x07\x07\x07\x07\x07\x07" + + "\x07\x07\x07\x07\x07\x07\x07\x07\x07\x07\x07\x07\x07\x07\x07\x07" + + "\x07\x07\x07\x07\x07\x07\x07\x07\x07\x07\x07\x07\x07\x07\x07\x07" + + "\x07\x07\x07\x07\x07\x07\x07\x07\x07\x07\x07\x07\x07\x07\x07\x07" + + "\x08\x08\x08\x08\x08\x08\x08\x08\x08\x08\x08\x08\x08\x08\x08\x08" + + "\x08\x08\x08\x08\x08\x08\x08\x08\x08\x08\x08\x08\x08\x08\x08\x08" + + "\x08\x08\x08\x08\x08\x08\x08\x08\x08\x08\x08\x08\x08\x08\x08\x08" + + "\x08\x08\x08\x08\x08\x08\x08\x08\x08\x08\x08\x08\x08\x08\x08\x08" + + "\x08\x08\x08\x08\x08\x08\x08\x08\x08\x08\x08\x08\x08\x08\x08\x08" + + "\x08\x08\x08\x08\x08\x08\x08\x08\x08\x08\x08\x08\x08\x08\x08\x08" + + "\x08\x08\x08\x08\x08\x08\x08\x08\x08\x08\x08\x08\x08\x08\x08\x08" + + "\x08\x08\x08\x08\x08\x08\x08\x08\x08\x08\x08\x08\x08\x08\x08\x08" diff --git a/src/math/bits/bits_test.go b/src/math/bits/bits_test.go new file mode 100644 index 0000000..23b4539 --- /dev/null +++ b/src/math/bits/bits_test.go @@ -0,0 +1,1347 @@ +// Copyright 2017 The Go Authors. All rights reserved. +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. + +package bits_test + +import ( + . "math/bits" + "runtime" + "testing" + "unsafe" +) + +func TestUintSize(t *testing.T) { + var x uint + if want := unsafe.Sizeof(x) * 8; UintSize != want { + t.Fatalf("UintSize = %d; want %d", UintSize, want) + } +} + +func TestLeadingZeros(t *testing.T) { + for i := 0; i < 256; i++ { + nlz := tab[i].nlz + for k := 0; k < 64-8; k++ { + x := uint64(i) << uint(k) + if x <= 1<<8-1 { + got := LeadingZeros8(uint8(x)) + want := nlz - k + (8 - 8) + if x == 0 { + want = 8 + } + if got != want { + t.Fatalf("LeadingZeros8(%#02x) == %d; want %d", x, got, want) + } + } + + if x <= 1<<16-1 { + got := LeadingZeros16(uint16(x)) + want := nlz - k + (16 - 8) + if x == 0 { + want = 16 + } + if got != want { + t.Fatalf("LeadingZeros16(%#04x) == %d; want %d", x, got, want) + } + } + + if x <= 1<<32-1 { + got := LeadingZeros32(uint32(x)) + want := nlz - k + (32 - 8) + if x == 0 { + want = 32 + } + if got != want { + t.Fatalf("LeadingZeros32(%#08x) == %d; want %d", x, got, want) + } + if UintSize == 32 { + got = LeadingZeros(uint(x)) + if got != want { + t.Fatalf("LeadingZeros(%#08x) == %d; want %d", x, got, want) + } + } + } + + if x <= 1<<64-1 { + got := LeadingZeros64(uint64(x)) + want := nlz - k + (64 - 8) + if x == 0 { + want = 64 + } + if got != want { + t.Fatalf("LeadingZeros64(%#016x) == %d; want %d", x, got, want) + } + if UintSize == 64 { + got = LeadingZeros(uint(x)) + if got != want { + t.Fatalf("LeadingZeros(%#016x) == %d; want %d", x, got, want) + } + } + } + } + } +} + +// Exported (global) variable serving as input for some +// of the benchmarks to ensure side-effect free calls +// are not optimized away. +var Input uint64 = DeBruijn64 + +// Exported (global) variable to store function results +// during benchmarking to ensure side-effect free calls +// are not optimized away. +var Output int + +func BenchmarkLeadingZeros(b *testing.B) { + var s int + for i := 0; i < b.N; i++ { + s += LeadingZeros(uint(Input) >> (uint(i) % UintSize)) + } + Output = s +} + +func BenchmarkLeadingZeros8(b *testing.B) { + var s int + for i := 0; i < b.N; i++ { + s += LeadingZeros8(uint8(Input) >> (uint(i) % 8)) + } + Output = s +} + +func BenchmarkLeadingZeros16(b *testing.B) { + var s int + for i := 0; i < b.N; i++ { + s += LeadingZeros16(uint16(Input) >> (uint(i) % 16)) + } + Output = s +} + +func BenchmarkLeadingZeros32(b *testing.B) { + var s int + for i := 0; i < b.N; i++ { + s += LeadingZeros32(uint32(Input) >> (uint(i) % 32)) + } + Output = s +} + +func BenchmarkLeadingZeros64(b *testing.B) { + var s int + for i := 0; i < b.N; i++ { + s += LeadingZeros64(uint64(Input) >> (uint(i) % 64)) + } + Output = s +} + +func TestTrailingZeros(t *testing.T) { + for i := 0; i < 256; i++ { + ntz := tab[i].ntz + for k := 0; k < 64-8; k++ { + x := uint64(i) << uint(k) + want := ntz + k + if x <= 1<<8-1 { + got := TrailingZeros8(uint8(x)) + if x == 0 { + want = 8 + } + if got != want { + t.Fatalf("TrailingZeros8(%#02x) == %d; want %d", x, got, want) + } + } + + if x <= 1<<16-1 { + got := TrailingZeros16(uint16(x)) + if x == 0 { + want = 16 + } + if got != want { + t.Fatalf("TrailingZeros16(%#04x) == %d; want %d", x, got, want) + } + } + + if x <= 1<<32-1 { + got := TrailingZeros32(uint32(x)) + if x == 0 { + want = 32 + } + if got != want { + t.Fatalf("TrailingZeros32(%#08x) == %d; want %d", x, got, want) + } + if UintSize == 32 { + got = TrailingZeros(uint(x)) + if got != want { + t.Fatalf("TrailingZeros(%#08x) == %d; want %d", x, got, want) + } + } + } + + if x <= 1<<64-1 { + got := TrailingZeros64(uint64(x)) + if x == 0 { + want = 64 + } + if got != want { + t.Fatalf("TrailingZeros64(%#016x) == %d; want %d", x, got, want) + } + if UintSize == 64 { + got = TrailingZeros(uint(x)) + if got != want { + t.Fatalf("TrailingZeros(%#016x) == %d; want %d", x, got, want) + } + } + } + } + } +} + +func BenchmarkTrailingZeros(b *testing.B) { + var s int + for i := 0; i < b.N; i++ { + s += TrailingZeros(uint(Input) << (uint(i) % UintSize)) + } + Output = s +} + +func BenchmarkTrailingZeros8(b *testing.B) { + var s int + for i := 0; i < b.N; i++ { + s += TrailingZeros8(uint8(Input) << (uint(i) % 8)) + } + Output = s +} + +func BenchmarkTrailingZeros16(b *testing.B) { + var s int + for i := 0; i < b.N; i++ { + s += TrailingZeros16(uint16(Input) << (uint(i) % 16)) + } + Output = s +} + +func BenchmarkTrailingZeros32(b *testing.B) { + var s int + for i := 0; i < b.N; i++ { + s += TrailingZeros32(uint32(Input) << (uint(i) % 32)) + } + Output = s +} + +func BenchmarkTrailingZeros64(b *testing.B) { + var s int + for i := 0; i < b.N; i++ { + s += TrailingZeros64(uint64(Input) << (uint(i) % 64)) + } + Output = s +} + +func TestOnesCount(t *testing.T) { + var x uint64 + for i := 0; i <= 64; i++ { + testOnesCount(t, x, i) + x = x<<1 | 1 + } + + for i := 64; i >= 0; i-- { + testOnesCount(t, x, i) + x = x << 1 + } + + for i := 0; i < 256; i++ { + for k := 0; k < 64-8; k++ { + testOnesCount(t, uint64(i)<<uint(k), tab[i].pop) + } + } +} + +func testOnesCount(t *testing.T, x uint64, want int) { + if x <= 1<<8-1 { + got := OnesCount8(uint8(x)) + if got != want { + t.Fatalf("OnesCount8(%#02x) == %d; want %d", uint8(x), got, want) + } + } + + if x <= 1<<16-1 { + got := OnesCount16(uint16(x)) + if got != want { + t.Fatalf("OnesCount16(%#04x) == %d; want %d", uint16(x), got, want) + } + } + + if x <= 1<<32-1 { + got := OnesCount32(uint32(x)) + if got != want { + t.Fatalf("OnesCount32(%#08x) == %d; want %d", uint32(x), got, want) + } + if UintSize == 32 { + got = OnesCount(uint(x)) + if got != want { + t.Fatalf("OnesCount(%#08x) == %d; want %d", uint32(x), got, want) + } + } + } + + if x <= 1<<64-1 { + got := OnesCount64(uint64(x)) + if got != want { + t.Fatalf("OnesCount64(%#016x) == %d; want %d", x, got, want) + } + if UintSize == 64 { + got = OnesCount(uint(x)) + if got != want { + t.Fatalf("OnesCount(%#016x) == %d; want %d", x, got, want) + } + } + } +} + +func BenchmarkOnesCount(b *testing.B) { + var s int + for i := 0; i < b.N; i++ { + s += OnesCount(uint(Input)) + } + Output = s +} + +func BenchmarkOnesCount8(b *testing.B) { + var s int + for i := 0; i < b.N; i++ { + s += OnesCount8(uint8(Input)) + } + Output = s +} + +func BenchmarkOnesCount16(b *testing.B) { + var s int + for i := 0; i < b.N; i++ { + s += OnesCount16(uint16(Input)) + } + Output = s +} + +func BenchmarkOnesCount32(b *testing.B) { + var s int + for i := 0; i < b.N; i++ { + s += OnesCount32(uint32(Input)) + } + Output = s +} + +func BenchmarkOnesCount64(b *testing.B) { + var s int + for i := 0; i < b.N; i++ { + s += OnesCount64(uint64(Input)) + } + Output = s +} + +func TestRotateLeft(t *testing.T) { + var m uint64 = DeBruijn64 + + for k := uint(0); k < 128; k++ { + x8 := uint8(m) + got8 := RotateLeft8(x8, int(k)) + want8 := x8<<(k&0x7) | x8>>(8-k&0x7) + if got8 != want8 { + t.Fatalf("RotateLeft8(%#02x, %d) == %#02x; want %#02x", x8, k, got8, want8) + } + got8 = RotateLeft8(want8, -int(k)) + if got8 != x8 { + t.Fatalf("RotateLeft8(%#02x, -%d) == %#02x; want %#02x", want8, k, got8, x8) + } + + x16 := uint16(m) + got16 := RotateLeft16(x16, int(k)) + want16 := x16<<(k&0xf) | x16>>(16-k&0xf) + if got16 != want16 { + t.Fatalf("RotateLeft16(%#04x, %d) == %#04x; want %#04x", x16, k, got16, want16) + } + got16 = RotateLeft16(want16, -int(k)) + if got16 != x16 { + t.Fatalf("RotateLeft16(%#04x, -%d) == %#04x; want %#04x", want16, k, got16, x16) + } + + x32 := uint32(m) + got32 := RotateLeft32(x32, int(k)) + want32 := x32<<(k&0x1f) | x32>>(32-k&0x1f) + if got32 != want32 { + t.Fatalf("RotateLeft32(%#08x, %d) == %#08x; want %#08x", x32, k, got32, want32) + } + got32 = RotateLeft32(want32, -int(k)) + if got32 != x32 { + t.Fatalf("RotateLeft32(%#08x, -%d) == %#08x; want %#08x", want32, k, got32, x32) + } + if UintSize == 32 { + x := uint(m) + got := RotateLeft(x, int(k)) + want := x<<(k&0x1f) | x>>(32-k&0x1f) + if got != want { + t.Fatalf("RotateLeft(%#08x, %d) == %#08x; want %#08x", x, k, got, want) + } + got = RotateLeft(want, -int(k)) + if got != x { + t.Fatalf("RotateLeft(%#08x, -%d) == %#08x; want %#08x", want, k, got, x) + } + } + + x64 := uint64(m) + got64 := RotateLeft64(x64, int(k)) + want64 := x64<<(k&0x3f) | x64>>(64-k&0x3f) + if got64 != want64 { + t.Fatalf("RotateLeft64(%#016x, %d) == %#016x; want %#016x", x64, k, got64, want64) + } + got64 = RotateLeft64(want64, -int(k)) + if got64 != x64 { + t.Fatalf("RotateLeft64(%#016x, -%d) == %#016x; want %#016x", want64, k, got64, x64) + } + if UintSize == 64 { + x := uint(m) + got := RotateLeft(x, int(k)) + want := x<<(k&0x3f) | x>>(64-k&0x3f) + if got != want { + t.Fatalf("RotateLeft(%#016x, %d) == %#016x; want %#016x", x, k, got, want) + } + got = RotateLeft(want, -int(k)) + if got != x { + t.Fatalf("RotateLeft(%#08x, -%d) == %#08x; want %#08x", want, k, got, x) + } + } + } +} + +func BenchmarkRotateLeft(b *testing.B) { + var s uint + for i := 0; i < b.N; i++ { + s += RotateLeft(uint(Input), i) + } + Output = int(s) +} + +func BenchmarkRotateLeft8(b *testing.B) { + var s uint8 + for i := 0; i < b.N; i++ { + s += RotateLeft8(uint8(Input), i) + } + Output = int(s) +} + +func BenchmarkRotateLeft16(b *testing.B) { + var s uint16 + for i := 0; i < b.N; i++ { + s += RotateLeft16(uint16(Input), i) + } + Output = int(s) +} + +func BenchmarkRotateLeft32(b *testing.B) { + var s uint32 + for i := 0; i < b.N; i++ { + s += RotateLeft32(uint32(Input), i) + } + Output = int(s) +} + +func BenchmarkRotateLeft64(b *testing.B) { + var s uint64 + for i := 0; i < b.N; i++ { + s += RotateLeft64(uint64(Input), i) + } + Output = int(s) +} + +func TestReverse(t *testing.T) { + // test each bit + for i := uint(0); i < 64; i++ { + testReverse(t, uint64(1)<<i, uint64(1)<<(63-i)) + } + + // test a few patterns + for _, test := range []struct { + x, r uint64 + }{ + {0, 0}, + {0x1, 0x8 << 60}, + {0x2, 0x4 << 60}, + {0x3, 0xc << 60}, + {0x4, 0x2 << 60}, + {0x5, 0xa << 60}, + {0x6, 0x6 << 60}, + {0x7, 0xe << 60}, + {0x8, 0x1 << 60}, + {0x9, 0x9 << 60}, + {0xa, 0x5 << 60}, + {0xb, 0xd << 60}, + {0xc, 0x3 << 60}, + {0xd, 0xb << 60}, + {0xe, 0x7 << 60}, + {0xf, 0xf << 60}, + {0x5686487, 0xe12616a000000000}, + {0x0123456789abcdef, 0xf7b3d591e6a2c480}, + } { + testReverse(t, test.x, test.r) + testReverse(t, test.r, test.x) + } +} + +func testReverse(t *testing.T, x64, want64 uint64) { + x8 := uint8(x64) + got8 := Reverse8(x8) + want8 := uint8(want64 >> (64 - 8)) + if got8 != want8 { + t.Fatalf("Reverse8(%#02x) == %#02x; want %#02x", x8, got8, want8) + } + + x16 := uint16(x64) + got16 := Reverse16(x16) + want16 := uint16(want64 >> (64 - 16)) + if got16 != want16 { + t.Fatalf("Reverse16(%#04x) == %#04x; want %#04x", x16, got16, want16) + } + + x32 := uint32(x64) + got32 := Reverse32(x32) + want32 := uint32(want64 >> (64 - 32)) + if got32 != want32 { + t.Fatalf("Reverse32(%#08x) == %#08x; want %#08x", x32, got32, want32) + } + if UintSize == 32 { + x := uint(x32) + got := Reverse(x) + want := uint(want32) + if got != want { + t.Fatalf("Reverse(%#08x) == %#08x; want %#08x", x, got, want) + } + } + + got64 := Reverse64(x64) + if got64 != want64 { + t.Fatalf("Reverse64(%#016x) == %#016x; want %#016x", x64, got64, want64) + } + if UintSize == 64 { + x := uint(x64) + got := Reverse(x) + want := uint(want64) + if got != want { + t.Fatalf("Reverse(%#08x) == %#016x; want %#016x", x, got, want) + } + } +} + +func BenchmarkReverse(b *testing.B) { + var s uint + for i := 0; i < b.N; i++ { + s += Reverse(uint(i)) + } + Output = int(s) +} + +func BenchmarkReverse8(b *testing.B) { + var s uint8 + for i := 0; i < b.N; i++ { + s += Reverse8(uint8(i)) + } + Output = int(s) +} + +func BenchmarkReverse16(b *testing.B) { + var s uint16 + for i := 0; i < b.N; i++ { + s += Reverse16(uint16(i)) + } + Output = int(s) +} + +func BenchmarkReverse32(b *testing.B) { + var s uint32 + for i := 0; i < b.N; i++ { + s += Reverse32(uint32(i)) + } + Output = int(s) +} + +func BenchmarkReverse64(b *testing.B) { + var s uint64 + for i := 0; i < b.N; i++ { + s += Reverse64(uint64(i)) + } + Output = int(s) +} + +func TestReverseBytes(t *testing.T) { + for _, test := range []struct { + x, r uint64 + }{ + {0, 0}, + {0x01, 0x01 << 56}, + {0x0123, 0x2301 << 48}, + {0x012345, 0x452301 << 40}, + {0x01234567, 0x67452301 << 32}, + {0x0123456789, 0x8967452301 << 24}, + {0x0123456789ab, 0xab8967452301 << 16}, + {0x0123456789abcd, 0xcdab8967452301 << 8}, + {0x0123456789abcdef, 0xefcdab8967452301 << 0}, + } { + testReverseBytes(t, test.x, test.r) + testReverseBytes(t, test.r, test.x) + } +} + +func testReverseBytes(t *testing.T, x64, want64 uint64) { + x16 := uint16(x64) + got16 := ReverseBytes16(x16) + want16 := uint16(want64 >> (64 - 16)) + if got16 != want16 { + t.Fatalf("ReverseBytes16(%#04x) == %#04x; want %#04x", x16, got16, want16) + } + + x32 := uint32(x64) + got32 := ReverseBytes32(x32) + want32 := uint32(want64 >> (64 - 32)) + if got32 != want32 { + t.Fatalf("ReverseBytes32(%#08x) == %#08x; want %#08x", x32, got32, want32) + } + if UintSize == 32 { + x := uint(x32) + got := ReverseBytes(x) + want := uint(want32) + if got != want { + t.Fatalf("ReverseBytes(%#08x) == %#08x; want %#08x", x, got, want) + } + } + + got64 := ReverseBytes64(x64) + if got64 != want64 { + t.Fatalf("ReverseBytes64(%#016x) == %#016x; want %#016x", x64, got64, want64) + } + if UintSize == 64 { + x := uint(x64) + got := ReverseBytes(x) + want := uint(want64) + if got != want { + t.Fatalf("ReverseBytes(%#016x) == %#016x; want %#016x", x, got, want) + } + } +} + +func BenchmarkReverseBytes(b *testing.B) { + var s uint + for i := 0; i < b.N; i++ { + s += ReverseBytes(uint(i)) + } + Output = int(s) +} + +func BenchmarkReverseBytes16(b *testing.B) { + var s uint16 + for i := 0; i < b.N; i++ { + s += ReverseBytes16(uint16(i)) + } + Output = int(s) +} + +func BenchmarkReverseBytes32(b *testing.B) { + var s uint32 + for i := 0; i < b.N; i++ { + s += ReverseBytes32(uint32(i)) + } + Output = int(s) +} + +func BenchmarkReverseBytes64(b *testing.B) { + var s uint64 + for i := 0; i < b.N; i++ { + s += ReverseBytes64(uint64(i)) + } + Output = int(s) +} + +func TestLen(t *testing.T) { + for i := 0; i < 256; i++ { + len := 8 - tab[i].nlz + for k := 0; k < 64-8; k++ { + x := uint64(i) << uint(k) + want := 0 + if x != 0 { + want = len + k + } + if x <= 1<<8-1 { + got := Len8(uint8(x)) + if got != want { + t.Fatalf("Len8(%#02x) == %d; want %d", x, got, want) + } + } + + if x <= 1<<16-1 { + got := Len16(uint16(x)) + if got != want { + t.Fatalf("Len16(%#04x) == %d; want %d", x, got, want) + } + } + + if x <= 1<<32-1 { + got := Len32(uint32(x)) + if got != want { + t.Fatalf("Len32(%#08x) == %d; want %d", x, got, want) + } + if UintSize == 32 { + got := Len(uint(x)) + if got != want { + t.Fatalf("Len(%#08x) == %d; want %d", x, got, want) + } + } + } + + if x <= 1<<64-1 { + got := Len64(uint64(x)) + if got != want { + t.Fatalf("Len64(%#016x) == %d; want %d", x, got, want) + } + if UintSize == 64 { + got := Len(uint(x)) + if got != want { + t.Fatalf("Len(%#016x) == %d; want %d", x, got, want) + } + } + } + } + } +} + +const ( + _M = 1<<UintSize - 1 + _M32 = 1<<32 - 1 + _M64 = 1<<64 - 1 +) + +func TestAddSubUint(t *testing.T) { + test := func(msg string, f func(x, y, c uint) (z, cout uint), x, y, c, z, cout uint) { + z1, cout1 := f(x, y, c) + if z1 != z || cout1 != cout { + t.Errorf("%s: got z:cout = %#x:%#x; want %#x:%#x", msg, z1, cout1, z, cout) + } + } + for _, a := range []struct{ x, y, c, z, cout uint }{ + {0, 0, 0, 0, 0}, + {0, 1, 0, 1, 0}, + {0, 0, 1, 1, 0}, + {0, 1, 1, 2, 0}, + {12345, 67890, 0, 80235, 0}, + {12345, 67890, 1, 80236, 0}, + {_M, 1, 0, 0, 1}, + {_M, 0, 1, 0, 1}, + {_M, 1, 1, 1, 1}, + {_M, _M, 0, _M - 1, 1}, + {_M, _M, 1, _M, 1}, + } { + test("Add", Add, a.x, a.y, a.c, a.z, a.cout) + test("Add symmetric", Add, a.y, a.x, a.c, a.z, a.cout) + test("Sub", Sub, a.z, a.x, a.c, a.y, a.cout) + test("Sub symmetric", Sub, a.z, a.y, a.c, a.x, a.cout) + // The above code can't test intrinsic implementation, because the passed function is not called directly. + // The following code uses a closure to test the intrinsic version in case the function is intrinsified. + test("Add intrinsic", func(x, y, c uint) (uint, uint) { return Add(x, y, c) }, a.x, a.y, a.c, a.z, a.cout) + test("Add intrinsic symmetric", func(x, y, c uint) (uint, uint) { return Add(x, y, c) }, a.y, a.x, a.c, a.z, a.cout) + test("Sub intrinsic", func(x, y, c uint) (uint, uint) { return Sub(x, y, c) }, a.z, a.x, a.c, a.y, a.cout) + test("Sub intrinsic symmetric", func(x, y, c uint) (uint, uint) { return Sub(x, y, c) }, a.z, a.y, a.c, a.x, a.cout) + + } +} + +func TestAddSubUint32(t *testing.T) { + test := func(msg string, f func(x, y, c uint32) (z, cout uint32), x, y, c, z, cout uint32) { + z1, cout1 := f(x, y, c) + if z1 != z || cout1 != cout { + t.Errorf("%s: got z:cout = %#x:%#x; want %#x:%#x", msg, z1, cout1, z, cout) + } + } + for _, a := range []struct{ x, y, c, z, cout uint32 }{ + {0, 0, 0, 0, 0}, + {0, 1, 0, 1, 0}, + {0, 0, 1, 1, 0}, + {0, 1, 1, 2, 0}, + {12345, 67890, 0, 80235, 0}, + {12345, 67890, 1, 80236, 0}, + {_M32, 1, 0, 0, 1}, + {_M32, 0, 1, 0, 1}, + {_M32, 1, 1, 1, 1}, + {_M32, _M32, 0, _M32 - 1, 1}, + {_M32, _M32, 1, _M32, 1}, + } { + test("Add32", Add32, a.x, a.y, a.c, a.z, a.cout) + test("Add32 symmetric", Add32, a.y, a.x, a.c, a.z, a.cout) + test("Sub32", Sub32, a.z, a.x, a.c, a.y, a.cout) + test("Sub32 symmetric", Sub32, a.z, a.y, a.c, a.x, a.cout) + } +} + +func TestAddSubUint64(t *testing.T) { + test := func(msg string, f func(x, y, c uint64) (z, cout uint64), x, y, c, z, cout uint64) { + z1, cout1 := f(x, y, c) + if z1 != z || cout1 != cout { + t.Errorf("%s: got z:cout = %#x:%#x; want %#x:%#x", msg, z1, cout1, z, cout) + } + } + for _, a := range []struct{ x, y, c, z, cout uint64 }{ + {0, 0, 0, 0, 0}, + {0, 1, 0, 1, 0}, + {0, 0, 1, 1, 0}, + {0, 1, 1, 2, 0}, + {12345, 67890, 0, 80235, 0}, + {12345, 67890, 1, 80236, 0}, + {_M64, 1, 0, 0, 1}, + {_M64, 0, 1, 0, 1}, + {_M64, 1, 1, 1, 1}, + {_M64, _M64, 0, _M64 - 1, 1}, + {_M64, _M64, 1, _M64, 1}, + } { + test("Add64", Add64, a.x, a.y, a.c, a.z, a.cout) + test("Add64 symmetric", Add64, a.y, a.x, a.c, a.z, a.cout) + test("Sub64", Sub64, a.z, a.x, a.c, a.y, a.cout) + test("Sub64 symmetric", Sub64, a.z, a.y, a.c, a.x, a.cout) + // The above code can't test intrinsic implementation, because the passed function is not called directly. + // The following code uses a closure to test the intrinsic version in case the function is intrinsified. + test("Add64 intrinsic", func(x, y, c uint64) (uint64, uint64) { return Add64(x, y, c) }, a.x, a.y, a.c, a.z, a.cout) + test("Add64 intrinsic symmetric", func(x, y, c uint64) (uint64, uint64) { return Add64(x, y, c) }, a.y, a.x, a.c, a.z, a.cout) + test("Sub64 intrinsic", func(x, y, c uint64) (uint64, uint64) { return Sub64(x, y, c) }, a.z, a.x, a.c, a.y, a.cout) + test("Sub64 intrinsic symmetric", func(x, y, c uint64) (uint64, uint64) { return Sub64(x, y, c) }, a.z, a.y, a.c, a.x, a.cout) + } +} + +func TestAdd64OverflowPanic(t *testing.T) { + // Test that 64-bit overflow panics fire correctly. + // These are designed to improve coverage of compiler intrinsics. + tests := []func(uint64, uint64) uint64{ + func(a, b uint64) uint64 { + x, c := Add64(a, b, 0) + if c > 0 { + panic("overflow") + } + return x + }, + func(a, b uint64) uint64 { + x, c := Add64(a, b, 0) + if c != 0 { + panic("overflow") + } + return x + }, + func(a, b uint64) uint64 { + x, c := Add64(a, b, 0) + if c == 1 { + panic("overflow") + } + return x + }, + func(a, b uint64) uint64 { + x, c := Add64(a, b, 0) + if c != 1 { + return x + } + panic("overflow") + }, + func(a, b uint64) uint64 { + x, c := Add64(a, b, 0) + if c == 0 { + return x + } + panic("overflow") + }, + } + for _, test := range tests { + shouldPanic := func(f func()) { + defer func() { + if err := recover(); err == nil { + t.Fatalf("expected panic") + } + }() + f() + } + + // overflow + shouldPanic(func() { test(_M64, 1) }) + shouldPanic(func() { test(1, _M64) }) + shouldPanic(func() { test(_M64, _M64) }) + + // no overflow + test(_M64, 0) + test(0, 0) + test(1, 1) + } +} + +func TestSub64OverflowPanic(t *testing.T) { + // Test that 64-bit overflow panics fire correctly. + // These are designed to improve coverage of compiler intrinsics. + tests := []func(uint64, uint64) uint64{ + func(a, b uint64) uint64 { + x, c := Sub64(a, b, 0) + if c > 0 { + panic("overflow") + } + return x + }, + func(a, b uint64) uint64 { + x, c := Sub64(a, b, 0) + if c != 0 { + panic("overflow") + } + return x + }, + func(a, b uint64) uint64 { + x, c := Sub64(a, b, 0) + if c == 1 { + panic("overflow") + } + return x + }, + func(a, b uint64) uint64 { + x, c := Sub64(a, b, 0) + if c != 1 { + return x + } + panic("overflow") + }, + func(a, b uint64) uint64 { + x, c := Sub64(a, b, 0) + if c == 0 { + return x + } + panic("overflow") + }, + } + for _, test := range tests { + shouldPanic := func(f func()) { + defer func() { + if err := recover(); err == nil { + t.Fatalf("expected panic") + } + }() + f() + } + + // overflow + shouldPanic(func() { test(0, 1) }) + shouldPanic(func() { test(1, _M64) }) + shouldPanic(func() { test(_M64-1, _M64) }) + + // no overflow + test(_M64, 0) + test(0, 0) + test(1, 1) + } +} + +func TestMulDiv(t *testing.T) { + testMul := func(msg string, f func(x, y uint) (hi, lo uint), x, y, hi, lo uint) { + hi1, lo1 := f(x, y) + if hi1 != hi || lo1 != lo { + t.Errorf("%s: got hi:lo = %#x:%#x; want %#x:%#x", msg, hi1, lo1, hi, lo) + } + } + testDiv := func(msg string, f func(hi, lo, y uint) (q, r uint), hi, lo, y, q, r uint) { + q1, r1 := f(hi, lo, y) + if q1 != q || r1 != r { + t.Errorf("%s: got q:r = %#x:%#x; want %#x:%#x", msg, q1, r1, q, r) + } + } + for _, a := range []struct { + x, y uint + hi, lo, r uint + }{ + {1 << (UintSize - 1), 2, 1, 0, 1}, + {_M, _M, _M - 1, 1, 42}, + } { + testMul("Mul", Mul, a.x, a.y, a.hi, a.lo) + testMul("Mul symmetric", Mul, a.y, a.x, a.hi, a.lo) + testDiv("Div", Div, a.hi, a.lo+a.r, a.y, a.x, a.r) + testDiv("Div symmetric", Div, a.hi, a.lo+a.r, a.x, a.y, a.r) + // The above code can't test intrinsic implementation, because the passed function is not called directly. + // The following code uses a closure to test the intrinsic version in case the function is intrinsified. + testMul("Mul intrinsic", func(x, y uint) (uint, uint) { return Mul(x, y) }, a.x, a.y, a.hi, a.lo) + testMul("Mul intrinsic symmetric", func(x, y uint) (uint, uint) { return Mul(x, y) }, a.y, a.x, a.hi, a.lo) + testDiv("Div intrinsic", func(hi, lo, y uint) (uint, uint) { return Div(hi, lo, y) }, a.hi, a.lo+a.r, a.y, a.x, a.r) + testDiv("Div intrinsic symmetric", func(hi, lo, y uint) (uint, uint) { return Div(hi, lo, y) }, a.hi, a.lo+a.r, a.x, a.y, a.r) + } +} + +func TestMulDiv32(t *testing.T) { + testMul := func(msg string, f func(x, y uint32) (hi, lo uint32), x, y, hi, lo uint32) { + hi1, lo1 := f(x, y) + if hi1 != hi || lo1 != lo { + t.Errorf("%s: got hi:lo = %#x:%#x; want %#x:%#x", msg, hi1, lo1, hi, lo) + } + } + testDiv := func(msg string, f func(hi, lo, y uint32) (q, r uint32), hi, lo, y, q, r uint32) { + q1, r1 := f(hi, lo, y) + if q1 != q || r1 != r { + t.Errorf("%s: got q:r = %#x:%#x; want %#x:%#x", msg, q1, r1, q, r) + } + } + for _, a := range []struct { + x, y uint32 + hi, lo, r uint32 + }{ + {1 << 31, 2, 1, 0, 1}, + {0xc47dfa8c, 50911, 0x98a4, 0x998587f4, 13}, + {_M32, _M32, _M32 - 1, 1, 42}, + } { + testMul("Mul32", Mul32, a.x, a.y, a.hi, a.lo) + testMul("Mul32 symmetric", Mul32, a.y, a.x, a.hi, a.lo) + testDiv("Div32", Div32, a.hi, a.lo+a.r, a.y, a.x, a.r) + testDiv("Div32 symmetric", Div32, a.hi, a.lo+a.r, a.x, a.y, a.r) + } +} + +func TestMulDiv64(t *testing.T) { + testMul := func(msg string, f func(x, y uint64) (hi, lo uint64), x, y, hi, lo uint64) { + hi1, lo1 := f(x, y) + if hi1 != hi || lo1 != lo { + t.Errorf("%s: got hi:lo = %#x:%#x; want %#x:%#x", msg, hi1, lo1, hi, lo) + } + } + testDiv := func(msg string, f func(hi, lo, y uint64) (q, r uint64), hi, lo, y, q, r uint64) { + q1, r1 := f(hi, lo, y) + if q1 != q || r1 != r { + t.Errorf("%s: got q:r = %#x:%#x; want %#x:%#x", msg, q1, r1, q, r) + } + } + for _, a := range []struct { + x, y uint64 + hi, lo, r uint64 + }{ + {1 << 63, 2, 1, 0, 1}, + {0x3626229738a3b9, 0xd8988a9f1cc4a61, 0x2dd0712657fe8, 0x9dd6a3364c358319, 13}, + {_M64, _M64, _M64 - 1, 1, 42}, + } { + testMul("Mul64", Mul64, a.x, a.y, a.hi, a.lo) + testMul("Mul64 symmetric", Mul64, a.y, a.x, a.hi, a.lo) + testDiv("Div64", Div64, a.hi, a.lo+a.r, a.y, a.x, a.r) + testDiv("Div64 symmetric", Div64, a.hi, a.lo+a.r, a.x, a.y, a.r) + // The above code can't test intrinsic implementation, because the passed function is not called directly. + // The following code uses a closure to test the intrinsic version in case the function is intrinsified. + testMul("Mul64 intrinsic", func(x, y uint64) (uint64, uint64) { return Mul64(x, y) }, a.x, a.y, a.hi, a.lo) + testMul("Mul64 intrinsic symmetric", func(x, y uint64) (uint64, uint64) { return Mul64(x, y) }, a.y, a.x, a.hi, a.lo) + testDiv("Div64 intrinsic", func(hi, lo, y uint64) (uint64, uint64) { return Div64(hi, lo, y) }, a.hi, a.lo+a.r, a.y, a.x, a.r) + testDiv("Div64 intrinsic symmetric", func(hi, lo, y uint64) (uint64, uint64) { return Div64(hi, lo, y) }, a.hi, a.lo+a.r, a.x, a.y, a.r) + } +} + +const ( + divZeroError = "runtime error: integer divide by zero" + overflowError = "runtime error: integer overflow" +) + +func TestDivPanicOverflow(t *testing.T) { + // Expect a panic + defer func() { + if err := recover(); err == nil { + t.Error("Div should have panicked when y<=hi") + } else if e, ok := err.(runtime.Error); !ok || e.Error() != overflowError { + t.Errorf("Div expected panic: %q, got: %q ", overflowError, e.Error()) + } + }() + q, r := Div(1, 0, 1) + t.Errorf("undefined q, r = %v, %v calculated when Div should have panicked", q, r) +} + +func TestDiv32PanicOverflow(t *testing.T) { + // Expect a panic + defer func() { + if err := recover(); err == nil { + t.Error("Div32 should have panicked when y<=hi") + } else if e, ok := err.(runtime.Error); !ok || e.Error() != overflowError { + t.Errorf("Div32 expected panic: %q, got: %q ", overflowError, e.Error()) + } + }() + q, r := Div32(1, 0, 1) + t.Errorf("undefined q, r = %v, %v calculated when Div32 should have panicked", q, r) +} + +func TestDiv64PanicOverflow(t *testing.T) { + // Expect a panic + defer func() { + if err := recover(); err == nil { + t.Error("Div64 should have panicked when y<=hi") + } else if e, ok := err.(runtime.Error); !ok || e.Error() != overflowError { + t.Errorf("Div64 expected panic: %q, got: %q ", overflowError, e.Error()) + } + }() + q, r := Div64(1, 0, 1) + t.Errorf("undefined q, r = %v, %v calculated when Div64 should have panicked", q, r) +} + +func TestDivPanicZero(t *testing.T) { + // Expect a panic + defer func() { + if err := recover(); err == nil { + t.Error("Div should have panicked when y==0") + } else if e, ok := err.(runtime.Error); !ok || e.Error() != divZeroError { + t.Errorf("Div expected panic: %q, got: %q ", divZeroError, e.Error()) + } + }() + q, r := Div(1, 1, 0) + t.Errorf("undefined q, r = %v, %v calculated when Div should have panicked", q, r) +} + +func TestDiv32PanicZero(t *testing.T) { + // Expect a panic + defer func() { + if err := recover(); err == nil { + t.Error("Div32 should have panicked when y==0") + } else if e, ok := err.(runtime.Error); !ok || e.Error() != divZeroError { + t.Errorf("Div32 expected panic: %q, got: %q ", divZeroError, e.Error()) + } + }() + q, r := Div32(1, 1, 0) + t.Errorf("undefined q, r = %v, %v calculated when Div32 should have panicked", q, r) +} + +func TestDiv64PanicZero(t *testing.T) { + // Expect a panic + defer func() { + if err := recover(); err == nil { + t.Error("Div64 should have panicked when y==0") + } else if e, ok := err.(runtime.Error); !ok || e.Error() != divZeroError { + t.Errorf("Div64 expected panic: %q, got: %q ", divZeroError, e.Error()) + } + }() + q, r := Div64(1, 1, 0) + t.Errorf("undefined q, r = %v, %v calculated when Div64 should have panicked", q, r) +} + +func TestRem32(t *testing.T) { + // Sanity check: for non-oveflowing dividends, the result is the + // same as the rem returned by Div32 + hi, lo, y := uint32(510510), uint32(9699690), uint32(510510+1) // ensure hi < y + for i := 0; i < 1000; i++ { + r := Rem32(hi, lo, y) + _, r2 := Div32(hi, lo, y) + if r != r2 { + t.Errorf("Rem32(%v, %v, %v) returned %v, but Div32 returned rem %v", hi, lo, y, r, r2) + } + y += 13 + } +} + +func TestRem32Overflow(t *testing.T) { + // To trigger a quotient overflow, we need y <= hi + hi, lo, y := uint32(510510), uint32(9699690), uint32(7) + for i := 0; i < 1000; i++ { + r := Rem32(hi, lo, y) + _, r2 := Div64(0, uint64(hi)<<32|uint64(lo), uint64(y)) + if r != uint32(r2) { + t.Errorf("Rem32(%v, %v, %v) returned %v, but Div64 returned rem %v", hi, lo, y, r, r2) + } + y += 13 + } +} + +func TestRem64(t *testing.T) { + // Sanity check: for non-oveflowing dividends, the result is the + // same as the rem returned by Div64 + hi, lo, y := uint64(510510), uint64(9699690), uint64(510510+1) // ensure hi < y + for i := 0; i < 1000; i++ { + r := Rem64(hi, lo, y) + _, r2 := Div64(hi, lo, y) + if r != r2 { + t.Errorf("Rem64(%v, %v, %v) returned %v, but Div64 returned rem %v", hi, lo, y, r, r2) + } + y += 13 + } +} + +func TestRem64Overflow(t *testing.T) { + Rem64Tests := []struct { + hi, lo, y uint64 + rem uint64 + }{ + // Testcases computed using Python 3, as: + // >>> hi = 42; lo = 1119; y = 42 + // >>> ((hi<<64)+lo) % y + {42, 1119, 42, 27}, + {42, 1119, 38, 9}, + {42, 1119, 26, 23}, + {469, 0, 467, 271}, + {469, 0, 113, 58}, + {111111, 111111, 1171, 803}, + {3968194946088682615, 3192705705065114702, 1000037, 56067}, + } + + for _, rt := range Rem64Tests { + if rt.hi < rt.y { + t.Fatalf("Rem64(%v, %v, %v) is not a test with quo overflow", rt.hi, rt.lo, rt.y) + } + rem := Rem64(rt.hi, rt.lo, rt.y) + if rem != rt.rem { + t.Errorf("Rem64(%v, %v, %v) returned %v, wanted %v", + rt.hi, rt.lo, rt.y, rem, rt.rem) + } + } +} + +func BenchmarkAdd(b *testing.B) { + var z, c uint + for i := 0; i < b.N; i++ { + z, c = Add(uint(Input), uint(i), c) + } + Output = int(z + c) +} + +func BenchmarkAdd32(b *testing.B) { + var z, c uint32 + for i := 0; i < b.N; i++ { + z, c = Add32(uint32(Input), uint32(i), c) + } + Output = int(z + c) +} + +func BenchmarkAdd64(b *testing.B) { + var z, c uint64 + for i := 0; i < b.N; i++ { + z, c = Add64(uint64(Input), uint64(i), c) + } + Output = int(z + c) +} + +func BenchmarkAdd64multiple(b *testing.B) { + var z0 = uint64(Input) + var z1 = uint64(Input) + var z2 = uint64(Input) + var z3 = uint64(Input) + for i := 0; i < b.N; i++ { + var c uint64 + z0, c = Add64(z0, uint64(i), c) + z1, c = Add64(z1, uint64(i), c) + z2, c = Add64(z2, uint64(i), c) + z3, _ = Add64(z3, uint64(i), c) + } + Output = int(z0 + z1 + z2 + z3) +} + +func BenchmarkSub(b *testing.B) { + var z, c uint + for i := 0; i < b.N; i++ { + z, c = Sub(uint(Input), uint(i), c) + } + Output = int(z + c) +} + +func BenchmarkSub32(b *testing.B) { + var z, c uint32 + for i := 0; i < b.N; i++ { + z, c = Sub32(uint32(Input), uint32(i), c) + } + Output = int(z + c) +} + +func BenchmarkSub64(b *testing.B) { + var z, c uint64 + for i := 0; i < b.N; i++ { + z, c = Sub64(uint64(Input), uint64(i), c) + } + Output = int(z + c) +} + +func BenchmarkSub64multiple(b *testing.B) { + var z0 = uint64(Input) + var z1 = uint64(Input) + var z2 = uint64(Input) + var z3 = uint64(Input) + for i := 0; i < b.N; i++ { + var c uint64 + z0, c = Sub64(z0, uint64(i), c) + z1, c = Sub64(z1, uint64(i), c) + z2, c = Sub64(z2, uint64(i), c) + z3, _ = Sub64(z3, uint64(i), c) + } + Output = int(z0 + z1 + z2 + z3) +} + +func BenchmarkMul(b *testing.B) { + var hi, lo uint + for i := 0; i < b.N; i++ { + hi, lo = Mul(uint(Input), uint(i)) + } + Output = int(hi + lo) +} + +func BenchmarkMul32(b *testing.B) { + var hi, lo uint32 + for i := 0; i < b.N; i++ { + hi, lo = Mul32(uint32(Input), uint32(i)) + } + Output = int(hi + lo) +} + +func BenchmarkMul64(b *testing.B) { + var hi, lo uint64 + for i := 0; i < b.N; i++ { + hi, lo = Mul64(uint64(Input), uint64(i)) + } + Output = int(hi + lo) +} + +func BenchmarkDiv(b *testing.B) { + var q, r uint + for i := 0; i < b.N; i++ { + q, r = Div(1, uint(i), uint(Input)) + } + Output = int(q + r) +} + +func BenchmarkDiv32(b *testing.B) { + var q, r uint32 + for i := 0; i < b.N; i++ { + q, r = Div32(1, uint32(i), uint32(Input)) + } + Output = int(q + r) +} + +func BenchmarkDiv64(b *testing.B) { + var q, r uint64 + for i := 0; i < b.N; i++ { + q, r = Div64(1, uint64(i), uint64(Input)) + } + Output = int(q + r) +} + +// ---------------------------------------------------------------------------- +// Testing support + +type entry = struct { + nlz, ntz, pop int +} + +// tab contains results for all uint8 values +var tab [256]entry + +func init() { + tab[0] = entry{8, 8, 0} + for i := 1; i < len(tab); i++ { + // nlz + x := i // x != 0 + n := 0 + for x&0x80 == 0 { + n++ + x <<= 1 + } + tab[i].nlz = n + + // ntz + x = i // x != 0 + n = 0 + for x&1 == 0 { + n++ + x >>= 1 + } + tab[i].ntz = n + + // pop + x = i // x != 0 + n = 0 + for x != 0 { + n += int(x & 1) + x >>= 1 + } + tab[i].pop = n + } +} diff --git a/src/math/bits/example_math_test.go b/src/math/bits/example_math_test.go new file mode 100644 index 0000000..4bb466f --- /dev/null +++ b/src/math/bits/example_math_test.go @@ -0,0 +1,202 @@ +// Copyright 2021 The Go Authors. All rights reserved. +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. + +package bits_test + +import ( + "fmt" + "math/bits" +) + +func ExampleAdd32() { + // First number is 33<<32 + 12 + n1 := []uint32{33, 12} + // Second number is 21<<32 + 23 + n2 := []uint32{21, 23} + // Add them together without producing carry. + d1, carry := bits.Add32(n1[1], n2[1], 0) + d0, _ := bits.Add32(n1[0], n2[0], carry) + nsum := []uint32{d0, d1} + fmt.Printf("%v + %v = %v (carry bit was %v)\n", n1, n2, nsum, carry) + + // First number is 1<<32 + 2147483648 + n1 = []uint32{1, 0x80000000} + // Second number is 1<<32 + 2147483648 + n2 = []uint32{1, 0x80000000} + // Add them together producing carry. + d1, carry = bits.Add32(n1[1], n2[1], 0) + d0, _ = bits.Add32(n1[0], n2[0], carry) + nsum = []uint32{d0, d1} + fmt.Printf("%v + %v = %v (carry bit was %v)\n", n1, n2, nsum, carry) + // Output: + // [33 12] + [21 23] = [54 35] (carry bit was 0) + // [1 2147483648] + [1 2147483648] = [3 0] (carry bit was 1) +} + +func ExampleAdd64() { + // First number is 33<<64 + 12 + n1 := []uint64{33, 12} + // Second number is 21<<64 + 23 + n2 := []uint64{21, 23} + // Add them together without producing carry. + d1, carry := bits.Add64(n1[1], n2[1], 0) + d0, _ := bits.Add64(n1[0], n2[0], carry) + nsum := []uint64{d0, d1} + fmt.Printf("%v + %v = %v (carry bit was %v)\n", n1, n2, nsum, carry) + + // First number is 1<<64 + 9223372036854775808 + n1 = []uint64{1, 0x8000000000000000} + // Second number is 1<<64 + 9223372036854775808 + n2 = []uint64{1, 0x8000000000000000} + // Add them together producing carry. + d1, carry = bits.Add64(n1[1], n2[1], 0) + d0, _ = bits.Add64(n1[0], n2[0], carry) + nsum = []uint64{d0, d1} + fmt.Printf("%v + %v = %v (carry bit was %v)\n", n1, n2, nsum, carry) + // Output: + // [33 12] + [21 23] = [54 35] (carry bit was 0) + // [1 9223372036854775808] + [1 9223372036854775808] = [3 0] (carry bit was 1) +} + +func ExampleSub32() { + // First number is 33<<32 + 23 + n1 := []uint32{33, 23} + // Second number is 21<<32 + 12 + n2 := []uint32{21, 12} + // Sub them together without producing carry. + d1, carry := bits.Sub32(n1[1], n2[1], 0) + d0, _ := bits.Sub32(n1[0], n2[0], carry) + nsum := []uint32{d0, d1} + fmt.Printf("%v - %v = %v (carry bit was %v)\n", n1, n2, nsum, carry) + + // First number is 3<<32 + 2147483647 + n1 = []uint32{3, 0x7fffffff} + // Second number is 1<<32 + 2147483648 + n2 = []uint32{1, 0x80000000} + // Sub them together producing carry. + d1, carry = bits.Sub32(n1[1], n2[1], 0) + d0, _ = bits.Sub32(n1[0], n2[0], carry) + nsum = []uint32{d0, d1} + fmt.Printf("%v - %v = %v (carry bit was %v)\n", n1, n2, nsum, carry) + // Output: + // [33 23] - [21 12] = [12 11] (carry bit was 0) + // [3 2147483647] - [1 2147483648] = [1 4294967295] (carry bit was 1) +} + +func ExampleSub64() { + // First number is 33<<64 + 23 + n1 := []uint64{33, 23} + // Second number is 21<<64 + 12 + n2 := []uint64{21, 12} + // Sub them together without producing carry. + d1, carry := bits.Sub64(n1[1], n2[1], 0) + d0, _ := bits.Sub64(n1[0], n2[0], carry) + nsum := []uint64{d0, d1} + fmt.Printf("%v - %v = %v (carry bit was %v)\n", n1, n2, nsum, carry) + + // First number is 3<<64 + 9223372036854775807 + n1 = []uint64{3, 0x7fffffffffffffff} + // Second number is 1<<64 + 9223372036854775808 + n2 = []uint64{1, 0x8000000000000000} + // Sub them together producing carry. + d1, carry = bits.Sub64(n1[1], n2[1], 0) + d0, _ = bits.Sub64(n1[0], n2[0], carry) + nsum = []uint64{d0, d1} + fmt.Printf("%v - %v = %v (carry bit was %v)\n", n1, n2, nsum, carry) + // Output: + // [33 23] - [21 12] = [12 11] (carry bit was 0) + // [3 9223372036854775807] - [1 9223372036854775808] = [1 18446744073709551615] (carry bit was 1) +} + +func ExampleMul32() { + // First number is 0<<32 + 12 + n1 := []uint32{0, 12} + // Second number is 0<<32 + 12 + n2 := []uint32{0, 12} + // Multiply them together without producing overflow. + hi, lo := bits.Mul32(n1[1], n2[1]) + nsum := []uint32{hi, lo} + fmt.Printf("%v * %v = %v\n", n1[1], n2[1], nsum) + + // First number is 0<<32 + 2147483648 + n1 = []uint32{0, 0x80000000} + // Second number is 0<<32 + 2 + n2 = []uint32{0, 2} + // Multiply them together producing overflow. + hi, lo = bits.Mul32(n1[1], n2[1]) + nsum = []uint32{hi, lo} + fmt.Printf("%v * %v = %v\n", n1[1], n2[1], nsum) + // Output: + // 12 * 12 = [0 144] + // 2147483648 * 2 = [1 0] +} + +func ExampleMul64() { + // First number is 0<<64 + 12 + n1 := []uint64{0, 12} + // Second number is 0<<64 + 12 + n2 := []uint64{0, 12} + // Multiply them together without producing overflow. + hi, lo := bits.Mul64(n1[1], n2[1]) + nsum := []uint64{hi, lo} + fmt.Printf("%v * %v = %v\n", n1[1], n2[1], nsum) + + // First number is 0<<64 + 9223372036854775808 + n1 = []uint64{0, 0x8000000000000000} + // Second number is 0<<64 + 2 + n2 = []uint64{0, 2} + // Multiply them together producing overflow. + hi, lo = bits.Mul64(n1[1], n2[1]) + nsum = []uint64{hi, lo} + fmt.Printf("%v * %v = %v\n", n1[1], n2[1], nsum) + // Output: + // 12 * 12 = [0 144] + // 9223372036854775808 * 2 = [1 0] +} + +func ExampleDiv32() { + // First number is 0<<32 + 6 + n1 := []uint32{0, 6} + // Second number is 0<<32 + 3 + n2 := []uint32{0, 3} + // Divide them together. + quo, rem := bits.Div32(n1[0], n1[1], n2[1]) + nsum := []uint32{quo, rem} + fmt.Printf("[%v %v] / %v = %v\n", n1[0], n1[1], n2[1], nsum) + + // First number is 2<<32 + 2147483648 + n1 = []uint32{2, 0x80000000} + // Second number is 0<<32 + 2147483648 + n2 = []uint32{0, 0x80000000} + // Divide them together. + quo, rem = bits.Div32(n1[0], n1[1], n2[1]) + nsum = []uint32{quo, rem} + fmt.Printf("[%v %v] / %v = %v\n", n1[0], n1[1], n2[1], nsum) + // Output: + // [0 6] / 3 = [2 0] + // [2 2147483648] / 2147483648 = [5 0] +} + +func ExampleDiv64() { + // First number is 0<<64 + 6 + n1 := []uint64{0, 6} + // Second number is 0<<64 + 3 + n2 := []uint64{0, 3} + // Divide them together. + quo, rem := bits.Div64(n1[0], n1[1], n2[1]) + nsum := []uint64{quo, rem} + fmt.Printf("[%v %v] / %v = %v\n", n1[0], n1[1], n2[1], nsum) + + // First number is 2<<64 + 9223372036854775808 + n1 = []uint64{2, 0x8000000000000000} + // Second number is 0<<64 + 9223372036854775808 + n2 = []uint64{0, 0x8000000000000000} + // Divide them together. + quo, rem = bits.Div64(n1[0], n1[1], n2[1]) + nsum = []uint64{quo, rem} + fmt.Printf("[%v %v] / %v = %v\n", n1[0], n1[1], n2[1], nsum) + // Output: + // [0 6] / 3 = [2 0] + // [2 9223372036854775808] / 9223372036854775808 = [5 0] +} diff --git a/src/math/bits/example_test.go b/src/math/bits/example_test.go new file mode 100644 index 0000000..b2ed2cb --- /dev/null +++ b/src/math/bits/example_test.go @@ -0,0 +1,210 @@ +// Copyright 2017 The Go Authors. All rights reserved. +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. + +// Code generated by go run make_examples.go. DO NOT EDIT. + +package bits_test + +import ( + "fmt" + "math/bits" +) + +func ExampleLeadingZeros8() { + fmt.Printf("LeadingZeros8(%08b) = %d\n", 1, bits.LeadingZeros8(1)) + // Output: + // LeadingZeros8(00000001) = 7 +} + +func ExampleLeadingZeros16() { + fmt.Printf("LeadingZeros16(%016b) = %d\n", 1, bits.LeadingZeros16(1)) + // Output: + // LeadingZeros16(0000000000000001) = 15 +} + +func ExampleLeadingZeros32() { + fmt.Printf("LeadingZeros32(%032b) = %d\n", 1, bits.LeadingZeros32(1)) + // Output: + // LeadingZeros32(00000000000000000000000000000001) = 31 +} + +func ExampleLeadingZeros64() { + fmt.Printf("LeadingZeros64(%064b) = %d\n", 1, bits.LeadingZeros64(1)) + // Output: + // LeadingZeros64(0000000000000000000000000000000000000000000000000000000000000001) = 63 +} + +func ExampleTrailingZeros8() { + fmt.Printf("TrailingZeros8(%08b) = %d\n", 14, bits.TrailingZeros8(14)) + // Output: + // TrailingZeros8(00001110) = 1 +} + +func ExampleTrailingZeros16() { + fmt.Printf("TrailingZeros16(%016b) = %d\n", 14, bits.TrailingZeros16(14)) + // Output: + // TrailingZeros16(0000000000001110) = 1 +} + +func ExampleTrailingZeros32() { + fmt.Printf("TrailingZeros32(%032b) = %d\n", 14, bits.TrailingZeros32(14)) + // Output: + // TrailingZeros32(00000000000000000000000000001110) = 1 +} + +func ExampleTrailingZeros64() { + fmt.Printf("TrailingZeros64(%064b) = %d\n", 14, bits.TrailingZeros64(14)) + // Output: + // TrailingZeros64(0000000000000000000000000000000000000000000000000000000000001110) = 1 +} + +func ExampleOnesCount() { + fmt.Printf("OnesCount(%b) = %d\n", 14, bits.OnesCount(14)) + // Output: + // OnesCount(1110) = 3 +} + +func ExampleOnesCount8() { + fmt.Printf("OnesCount8(%08b) = %d\n", 14, bits.OnesCount8(14)) + // Output: + // OnesCount8(00001110) = 3 +} + +func ExampleOnesCount16() { + fmt.Printf("OnesCount16(%016b) = %d\n", 14, bits.OnesCount16(14)) + // Output: + // OnesCount16(0000000000001110) = 3 +} + +func ExampleOnesCount32() { + fmt.Printf("OnesCount32(%032b) = %d\n", 14, bits.OnesCount32(14)) + // Output: + // OnesCount32(00000000000000000000000000001110) = 3 +} + +func ExampleOnesCount64() { + fmt.Printf("OnesCount64(%064b) = %d\n", 14, bits.OnesCount64(14)) + // Output: + // OnesCount64(0000000000000000000000000000000000000000000000000000000000001110) = 3 +} + +func ExampleRotateLeft8() { + fmt.Printf("%08b\n", 15) + fmt.Printf("%08b\n", bits.RotateLeft8(15, 2)) + fmt.Printf("%08b\n", bits.RotateLeft8(15, -2)) + // Output: + // 00001111 + // 00111100 + // 11000011 +} + +func ExampleRotateLeft16() { + fmt.Printf("%016b\n", 15) + fmt.Printf("%016b\n", bits.RotateLeft16(15, 2)) + fmt.Printf("%016b\n", bits.RotateLeft16(15, -2)) + // Output: + // 0000000000001111 + // 0000000000111100 + // 1100000000000011 +} + +func ExampleRotateLeft32() { + fmt.Printf("%032b\n", 15) + fmt.Printf("%032b\n", bits.RotateLeft32(15, 2)) + fmt.Printf("%032b\n", bits.RotateLeft32(15, -2)) + // Output: + // 00000000000000000000000000001111 + // 00000000000000000000000000111100 + // 11000000000000000000000000000011 +} + +func ExampleRotateLeft64() { + fmt.Printf("%064b\n", 15) + fmt.Printf("%064b\n", bits.RotateLeft64(15, 2)) + fmt.Printf("%064b\n", bits.RotateLeft64(15, -2)) + // Output: + // 0000000000000000000000000000000000000000000000000000000000001111 + // 0000000000000000000000000000000000000000000000000000000000111100 + // 1100000000000000000000000000000000000000000000000000000000000011 +} + +func ExampleReverse8() { + fmt.Printf("%08b\n", 19) + fmt.Printf("%08b\n", bits.Reverse8(19)) + // Output: + // 00010011 + // 11001000 +} + +func ExampleReverse16() { + fmt.Printf("%016b\n", 19) + fmt.Printf("%016b\n", bits.Reverse16(19)) + // Output: + // 0000000000010011 + // 1100100000000000 +} + +func ExampleReverse32() { + fmt.Printf("%032b\n", 19) + fmt.Printf("%032b\n", bits.Reverse32(19)) + // Output: + // 00000000000000000000000000010011 + // 11001000000000000000000000000000 +} + +func ExampleReverse64() { + fmt.Printf("%064b\n", 19) + fmt.Printf("%064b\n", bits.Reverse64(19)) + // Output: + // 0000000000000000000000000000000000000000000000000000000000010011 + // 1100100000000000000000000000000000000000000000000000000000000000 +} + +func ExampleReverseBytes16() { + fmt.Printf("%016b\n", 15) + fmt.Printf("%016b\n", bits.ReverseBytes16(15)) + // Output: + // 0000000000001111 + // 0000111100000000 +} + +func ExampleReverseBytes32() { + fmt.Printf("%032b\n", 15) + fmt.Printf("%032b\n", bits.ReverseBytes32(15)) + // Output: + // 00000000000000000000000000001111 + // 00001111000000000000000000000000 +} + +func ExampleReverseBytes64() { + fmt.Printf("%064b\n", 15) + fmt.Printf("%064b\n", bits.ReverseBytes64(15)) + // Output: + // 0000000000000000000000000000000000000000000000000000000000001111 + // 0000111100000000000000000000000000000000000000000000000000000000 +} + +func ExampleLen8() { + fmt.Printf("Len8(%08b) = %d\n", 8, bits.Len8(8)) + // Output: + // Len8(00001000) = 4 +} + +func ExampleLen16() { + fmt.Printf("Len16(%016b) = %d\n", 8, bits.Len16(8)) + // Output: + // Len16(0000000000001000) = 4 +} + +func ExampleLen32() { + fmt.Printf("Len32(%032b) = %d\n", 8, bits.Len32(8)) + // Output: + // Len32(00000000000000000000000000001000) = 4 +} + +func ExampleLen64() { + fmt.Printf("Len64(%064b) = %d\n", 8, bits.Len64(8)) + // Output: + // Len64(0000000000000000000000000000000000000000000000000000000000001000) = 4 +} diff --git a/src/math/bits/export_test.go b/src/math/bits/export_test.go new file mode 100644 index 0000000..8c6f933 --- /dev/null +++ b/src/math/bits/export_test.go @@ -0,0 +1,7 @@ +// Copyright 2018 The Go Authors. All rights reserved. +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. + +package bits + +const DeBruijn64 = deBruijn64 diff --git a/src/math/bits/make_examples.go b/src/math/bits/make_examples.go new file mode 100644 index 0000000..92e9aab --- /dev/null +++ b/src/math/bits/make_examples.go @@ -0,0 +1,113 @@ +// Copyright 2017 The Go Authors. All rights reserved. +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. + +//go:build ignore +// +build ignore + +// This program generates example_test.go. + +package main + +import ( + "bytes" + "fmt" + "log" + "math/bits" + "os" +) + +const header = `// Copyright 2017 The Go Authors. All rights reserved. +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. + +// Code generated by go run make_examples.go. DO NOT EDIT. + +package bits_test + +import ( + "fmt" + "math/bits" +) +` + +func main() { + w := bytes.NewBuffer([]byte(header)) + + for _, e := range []struct { + name string + in int + out [4]any + out2 [4]any + }{ + { + name: "LeadingZeros", + in: 1, + out: [4]any{bits.LeadingZeros8(1), bits.LeadingZeros16(1), bits.LeadingZeros32(1), bits.LeadingZeros64(1)}, + }, + { + name: "TrailingZeros", + in: 14, + out: [4]any{bits.TrailingZeros8(14), bits.TrailingZeros16(14), bits.TrailingZeros32(14), bits.TrailingZeros64(14)}, + }, + { + name: "OnesCount", + in: 14, + out: [4]any{bits.OnesCount8(14), bits.OnesCount16(14), bits.OnesCount32(14), bits.OnesCount64(14)}, + }, + { + name: "RotateLeft", + in: 15, + out: [4]any{bits.RotateLeft8(15, 2), bits.RotateLeft16(15, 2), bits.RotateLeft32(15, 2), bits.RotateLeft64(15, 2)}, + out2: [4]any{bits.RotateLeft8(15, -2), bits.RotateLeft16(15, -2), bits.RotateLeft32(15, -2), bits.RotateLeft64(15, -2)}, + }, + { + name: "Reverse", + in: 19, + out: [4]any{bits.Reverse8(19), bits.Reverse16(19), bits.Reverse32(19), bits.Reverse64(19)}, + }, + { + name: "ReverseBytes", + in: 15, + out: [4]any{nil, bits.ReverseBytes16(15), bits.ReverseBytes32(15), bits.ReverseBytes64(15)}, + }, + { + name: "Len", + in: 8, + out: [4]any{bits.Len8(8), bits.Len16(8), bits.Len32(8), bits.Len64(8)}, + }, + } { + for i, size := range []int{8, 16, 32, 64} { + if e.out[i] == nil { + continue // function doesn't exist + } + f := fmt.Sprintf("%s%d", e.name, size) + fmt.Fprintf(w, "\nfunc Example%s() {\n", f) + switch e.name { + case "RotateLeft", "Reverse", "ReverseBytes": + fmt.Fprintf(w, "\tfmt.Printf(\"%%0%db\\n\", %d)\n", size, e.in) + if e.name == "RotateLeft" { + fmt.Fprintf(w, "\tfmt.Printf(\"%%0%db\\n\", bits.%s(%d, 2))\n", size, f, e.in) + fmt.Fprintf(w, "\tfmt.Printf(\"%%0%db\\n\", bits.%s(%d, -2))\n", size, f, e.in) + } else { + fmt.Fprintf(w, "\tfmt.Printf(\"%%0%db\\n\", bits.%s(%d))\n", size, f, e.in) + } + fmt.Fprintf(w, "\t// Output:\n") + fmt.Fprintf(w, "\t// %0*b\n", size, e.in) + fmt.Fprintf(w, "\t// %0*b\n", size, e.out[i]) + if e.name == "RotateLeft" && e.out2[i] != nil { + fmt.Fprintf(w, "\t// %0*b\n", size, e.out2[i]) + } + default: + fmt.Fprintf(w, "\tfmt.Printf(\"%s(%%0%db) = %%d\\n\", %d, bits.%s(%d))\n", f, size, e.in, f, e.in) + fmt.Fprintf(w, "\t// Output:\n") + fmt.Fprintf(w, "\t// %s(%0*b) = %d\n", f, size, e.in, e.out[i]) + } + fmt.Fprintf(w, "}\n") + } + } + + if err := os.WriteFile("example_test.go", w.Bytes(), 0666); err != nil { + log.Fatal(err) + } +} diff --git a/src/math/bits/make_tables.go b/src/math/bits/make_tables.go new file mode 100644 index 0000000..867025e --- /dev/null +++ b/src/math/bits/make_tables.go @@ -0,0 +1,92 @@ +// Copyright 2017 The Go Authors. All rights reserved. +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. + +//go:build ignore +// +build ignore + +// This program generates bits_tables.go. + +package main + +import ( + "bytes" + "fmt" + "go/format" + "io" + "log" + "os" +) + +var header = []byte(`// Copyright 2017 The Go Authors. All rights reserved. +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. + +// Code generated by go run make_tables.go. DO NOT EDIT. + +package bits + +`) + +func main() { + buf := bytes.NewBuffer(header) + + gen(buf, "ntz8tab", ntz8) + gen(buf, "pop8tab", pop8) + gen(buf, "rev8tab", rev8) + gen(buf, "len8tab", len8) + + out, err := format.Source(buf.Bytes()) + if err != nil { + log.Fatal(err) + } + + err = os.WriteFile("bits_tables.go", out, 0666) + if err != nil { + log.Fatal(err) + } +} + +func gen(w io.Writer, name string, f func(uint8) uint8) { + // Use a const string to allow the compiler to constant-evaluate lookups at constant index. + fmt.Fprintf(w, "const %s = \"\"+\n\"", name) + for i := 0; i < 256; i++ { + fmt.Fprintf(w, "\\x%02x", f(uint8(i))) + if i%16 == 15 && i != 255 { + fmt.Fprint(w, "\"+\n\"") + } + } + fmt.Fprint(w, "\"\n\n") +} + +func ntz8(x uint8) (n uint8) { + for x&1 == 0 && n < 8 { + x >>= 1 + n++ + } + return +} + +func pop8(x uint8) (n uint8) { + for x != 0 { + x &= x - 1 + n++ + } + return +} + +func rev8(x uint8) (r uint8) { + for i := 8; i > 0; i-- { + r = r<<1 | x&1 + x >>= 1 + } + return +} + +func len8(x uint8) (n uint8) { + for x != 0 { + x >>= 1 + n++ + } + return +} diff --git a/src/math/cbrt.go b/src/math/cbrt.go new file mode 100644 index 0000000..45c8ecb --- /dev/null +++ b/src/math/cbrt.go @@ -0,0 +1,84 @@ +// Copyright 2009 The Go Authors. All rights reserved. +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. + +package math + +// The go code is a modified version of the original C code from +// http://www.netlib.org/fdlibm/s_cbrt.c and came with this notice. +// +// ==================================================== +// Copyright (C) 1993 by Sun Microsystems, Inc. All rights reserved. +// +// Developed at SunSoft, a Sun Microsystems, Inc. business. +// Permission to use, copy, modify, and distribute this +// software is freely granted, provided that this notice +// is preserved. +// ==================================================== + +// Cbrt returns the cube root of x. +// +// Special cases are: +// Cbrt(±0) = ±0 +// Cbrt(±Inf) = ±Inf +// Cbrt(NaN) = NaN +func Cbrt(x float64) float64 { + if haveArchCbrt { + return archCbrt(x) + } + return cbrt(x) +} + +func cbrt(x float64) float64 { + const ( + B1 = 715094163 // (682-0.03306235651)*2**20 + B2 = 696219795 // (664-0.03306235651)*2**20 + C = 5.42857142857142815906e-01 // 19/35 = 0x3FE15F15F15F15F1 + D = -7.05306122448979611050e-01 // -864/1225 = 0xBFE691DE2532C834 + E = 1.41428571428571436819e+00 // 99/70 = 0x3FF6A0EA0EA0EA0F + F = 1.60714285714285720630e+00 // 45/28 = 0x3FF9B6DB6DB6DB6E + G = 3.57142857142857150787e-01 // 5/14 = 0x3FD6DB6DB6DB6DB7 + SmallestNormal = 2.22507385850720138309e-308 // 2**-1022 = 0x0010000000000000 + ) + // special cases + switch { + case x == 0 || IsNaN(x) || IsInf(x, 0): + return x + } + + sign := false + if x < 0 { + x = -x + sign = true + } + + // rough cbrt to 5 bits + t := Float64frombits(Float64bits(x)/3 + B1<<32) + if x < SmallestNormal { + // subnormal number + t = float64(1 << 54) // set t= 2**54 + t *= x + t = Float64frombits(Float64bits(t)/3 + B2<<32) + } + + // new cbrt to 23 bits + r := t * t / x + s := C + r*t + t *= G + F/(s+E+D/s) + + // chop to 22 bits, make larger than cbrt(x) + t = Float64frombits(Float64bits(t)&(0xFFFFFFFFC<<28) + 1<<30) + + // one step newton iteration to 53 bits with error less than 0.667ulps + s = t * t // t*t is exact + r = x / s + w := t + t + r = (r - t) / (w + r) // r-s is exact + t = t + t*r + + // restore the sign bit + if sign { + t = -t + } + return t +} diff --git a/src/math/cbrt_s390x.s b/src/math/cbrt_s390x.s new file mode 100644 index 0000000..87bba53 --- /dev/null +++ b/src/math/cbrt_s390x.s @@ -0,0 +1,156 @@ +// Copyright 2017 The Go Authors. All rights reserved. +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. + +#include "textflag.h" + +// Minimax polynomial coefficients and other constants +DATA ·cbrtrodataL9<> + 0(SB)/8, $-.00016272731015974436E+00 +DATA ·cbrtrodataL9<> + 8(SB)/8, $0.66639548758285293179E+00 +DATA ·cbrtrodataL9<> + 16(SB)/8, $0.55519402697349815993E+00 +DATA ·cbrtrodataL9<> + 24(SB)/8, $0.49338566048766782004E+00 +DATA ·cbrtrodataL9<> + 32(SB)/8, $0.45208160036325611486E+00 +DATA ·cbrtrodataL9<> + 40(SB)/8, $0.43099892837778637816E+00 +DATA ·cbrtrodataL9<> + 48(SB)/8, $1.000244140625 +DATA ·cbrtrodataL9<> + 56(SB)/8, $0.33333333333333333333E+00 +DATA ·cbrtrodataL9<> + 64(SB)/8, $79228162514264337593543950336. +GLOBL ·cbrtrodataL9<> + 0(SB), RODATA, $72 + +// Index tables +DATA ·cbrttab32069<> + 0(SB)/8, $0x404030303020202 +DATA ·cbrttab32069<> + 8(SB)/8, $0x101010101000000 +DATA ·cbrttab32069<> + 16(SB)/8, $0x808070706060605 +DATA ·cbrttab32069<> + 24(SB)/8, $0x505040404040303 +DATA ·cbrttab32069<> + 32(SB)/8, $0xe0d0c0c0b0b0b0a +DATA ·cbrttab32069<> + 40(SB)/8, $0xa09090908080808 +DATA ·cbrttab32069<> + 48(SB)/8, $0x11111010100f0f0f +DATA ·cbrttab32069<> + 56(SB)/8, $0xe0e0e0e0e0d0d0d +DATA ·cbrttab32069<> + 64(SB)/8, $0x1515141413131312 +DATA ·cbrttab32069<> + 72(SB)/8, $0x1212111111111010 +GLOBL ·cbrttab32069<> + 0(SB), RODATA, $80 + +DATA ·cbrttab22068<> + 0(SB)/8, $0x151015001420141 +DATA ·cbrttab22068<> + 8(SB)/8, $0x140013201310130 +DATA ·cbrttab22068<> + 16(SB)/8, $0x122012101200112 +DATA ·cbrttab22068<> + 24(SB)/8, $0x111011001020101 +DATA ·cbrttab22068<> + 32(SB)/8, $0x10000f200f100f0 +DATA ·cbrttab22068<> + 40(SB)/8, $0xe200e100e000d2 +DATA ·cbrttab22068<> + 48(SB)/8, $0xd100d000c200c1 +DATA ·cbrttab22068<> + 56(SB)/8, $0xc000b200b100b0 +DATA ·cbrttab22068<> + 64(SB)/8, $0xa200a100a00092 +DATA ·cbrttab22068<> + 72(SB)/8, $0x91009000820081 +DATA ·cbrttab22068<> + 80(SB)/8, $0x80007200710070 +DATA ·cbrttab22068<> + 88(SB)/8, $0x62006100600052 +DATA ·cbrttab22068<> + 96(SB)/8, $0x51005000420041 +DATA ·cbrttab22068<> + 104(SB)/8, $0x40003200310030 +DATA ·cbrttab22068<> + 112(SB)/8, $0x22002100200012 +DATA ·cbrttab22068<> + 120(SB)/8, $0x11001000020001 +GLOBL ·cbrttab22068<> + 0(SB), RODATA, $128 + +DATA ·cbrttab12067<> + 0(SB)/8, $0x53e1529051324fe1 +DATA ·cbrttab12067<> + 8(SB)/8, $0x4e904d324be14a90 +DATA ·cbrttab12067<> + 16(SB)/8, $0x493247e146904532 +DATA ·cbrttab12067<> + 24(SB)/8, $0x43e1429041323fe1 +DATA ·cbrttab12067<> + 32(SB)/8, $0x3e903d323be13a90 +DATA ·cbrttab12067<> + 40(SB)/8, $0x393237e136903532 +DATA ·cbrttab12067<> + 48(SB)/8, $0x33e1329031322fe1 +DATA ·cbrttab12067<> + 56(SB)/8, $0x2e902d322be12a90 +DATA ·cbrttab12067<> + 64(SB)/8, $0xd3e1d290d132cfe1 +DATA ·cbrttab12067<> + 72(SB)/8, $0xce90cd32cbe1ca90 +DATA ·cbrttab12067<> + 80(SB)/8, $0xc932c7e1c690c532 +DATA ·cbrttab12067<> + 88(SB)/8, $0xc3e1c290c132bfe1 +DATA ·cbrttab12067<> + 96(SB)/8, $0xbe90bd32bbe1ba90 +DATA ·cbrttab12067<> + 104(SB)/8, $0xb932b7e1b690b532 +DATA ·cbrttab12067<> + 112(SB)/8, $0xb3e1b290b132afe1 +DATA ·cbrttab12067<> + 120(SB)/8, $0xae90ad32abe1aa90 +GLOBL ·cbrttab12067<> + 0(SB), RODATA, $128 + +// Cbrt returns the cube root of the argument. +// +// Special cases are: +// Cbrt(±0) = ±0 +// Cbrt(±Inf) = ±Inf +// Cbrt(NaN) = NaN +// The algorithm used is minimax polynomial approximation +// with coefficients determined with a Remez exchange algorithm. + +TEXT ·cbrtAsm(SB), NOSPLIT, $0-16 + FMOVD x+0(FP), F0 + MOVD $·cbrtrodataL9<>+0(SB), R9 + LGDR F0, R2 + WORD $0xC039000F //iilf %r3,1048575 + BYTE $0xFF + BYTE $0xFF + SRAD $32, R2 + WORD $0xB9170012 //llgtr %r1,%r2 + MOVW R1, R6 + MOVW R3, R7 + CMPBLE R6, R7, L2 + WORD $0xC0397FEF //iilf %r3,2146435071 + BYTE $0xFF + BYTE $0xFF + MOVW R3, R7 + CMPBLE R6, R7, L8 +L1: + FMOVD F0, ret+8(FP) + RET +L3: +L2: + LTDBR F0, F0 + BEQ L1 + FMOVD F0, F2 + WORD $0xED209040 //mdb %f2,.L10-.L9(%r9) + BYTE $0x00 + BYTE $0x1C + MOVH $0x200, R4 + LGDR F2, R2 + SRAD $32, R2 +L4: + RISBGZ $57, $62, $39, R2, R3 + MOVD $·cbrttab12067<>+0(SB), R1 + WORD $0x48131000 //lh %r1,0(%r3,%r1) + RISBGZ $57, $62, $45, R2, R3 + MOVD $·cbrttab22068<>+0(SB), R5 + RISBGNZ $60, $63, $48, R2, R2 + WORD $0x4A135000 //ah %r1,0(%r3,%r5) + BYTE $0x18 //lr %r3,%r1 + BYTE $0x31 + MOVD $·cbrttab32069<>+0(SB), R1 + FMOVD 56(R9), F1 + FMOVD 48(R9), F5 + WORD $0xEC23393B //rosbg %r2,%r3,57,59,4 + BYTE $0x04 + BYTE $0x56 + WORD $0xE3121000 //llc %r1,0(%r2,%r1) + BYTE $0x00 + BYTE $0x94 + ADDW R3, R1 + ADDW R4, R1 + SLW $16, R1, R1 + SLD $32, R1, R1 + LDGR R1, F2 + WFMDB V2, V2, V4 + WFMDB V4, V0, V6 + WFMSDB V4, V6, V2, V4 + FMOVD 40(R9), F6 + FMSUB F1, F4, F2 + FMOVD 32(R9), F4 + WFMDB V2, V2, V3 + FMOVD 24(R9), F1 + FMUL F3, F0 + FMOVD 16(R9), F3 + WFMADB V2, V0, V5, V2 + FMOVD 8(R9), F5 + FMADD F6, F2, F4 + WFMADB V2, V1, V3, V1 + WFMDB V2, V2, V6 + FMOVD 0(R9), F3 + WFMADB V4, V6, V1, V4 + WFMADB V2, V5, V3, V2 + FMADD F4, F6, F2 + FMADD F2, F0, F0 + FMOVD F0, ret+8(FP) + RET +L8: + MOVH $0x0, R4 + BR L4 diff --git a/src/math/cmplx/abs.go b/src/math/cmplx/abs.go new file mode 100644 index 0000000..2f89d1b --- /dev/null +++ b/src/math/cmplx/abs.go @@ -0,0 +1,13 @@ +// Copyright 2010 The Go Authors. All rights reserved. +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. + +// Package cmplx provides basic constants and mathematical functions for +// complex numbers. Special case handling conforms to the C99 standard +// Annex G IEC 60559-compatible complex arithmetic. +package cmplx + +import "math" + +// Abs returns the absolute value (also called the modulus) of x. +func Abs(x complex128) float64 { return math.Hypot(real(x), imag(x)) } diff --git a/src/math/cmplx/asin.go b/src/math/cmplx/asin.go new file mode 100644 index 0000000..30d019e --- /dev/null +++ b/src/math/cmplx/asin.go @@ -0,0 +1,221 @@ +// Copyright 2010 The Go Authors. All rights reserved. +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. + +package cmplx + +import "math" + +// The original C code, the long comment, and the constants +// below are from http://netlib.sandia.gov/cephes/c9x-complex/clog.c. +// The go code is a simplified version of the original C. +// +// Cephes Math Library Release 2.8: June, 2000 +// Copyright 1984, 1987, 1989, 1992, 2000 by Stephen L. Moshier +// +// The readme file at http://netlib.sandia.gov/cephes/ says: +// Some software in this archive may be from the book _Methods and +// Programs for Mathematical Functions_ (Prentice-Hall or Simon & Schuster +// International, 1989) or from the Cephes Mathematical Library, a +// commercial product. In either event, it is copyrighted by the author. +// What you see here may be used freely but it comes with no support or +// guarantee. +// +// The two known misprints in the book are repaired here in the +// source listings for the gamma function and the incomplete beta +// integral. +// +// Stephen L. Moshier +// moshier@na-net.ornl.gov + +// Complex circular arc sine +// +// DESCRIPTION: +// +// Inverse complex sine: +// 2 +// w = -i clog( iz + csqrt( 1 - z ) ). +// +// casin(z) = -i casinh(iz) +// +// ACCURACY: +// +// Relative error: +// arithmetic domain # trials peak rms +// DEC -10,+10 10100 2.1e-15 3.4e-16 +// IEEE -10,+10 30000 2.2e-14 2.7e-15 +// Larger relative error can be observed for z near zero. +// Also tested by csin(casin(z)) = z. + +// Asin returns the inverse sine of x. +func Asin(x complex128) complex128 { + switch re, im := real(x), imag(x); { + case im == 0 && math.Abs(re) <= 1: + return complex(math.Asin(re), im) + case re == 0 && math.Abs(im) <= 1: + return complex(re, math.Asinh(im)) + case math.IsNaN(im): + switch { + case re == 0: + return complex(re, math.NaN()) + case math.IsInf(re, 0): + return complex(math.NaN(), re) + default: + return NaN() + } + case math.IsInf(im, 0): + switch { + case math.IsNaN(re): + return x + case math.IsInf(re, 0): + return complex(math.Copysign(math.Pi/4, re), im) + default: + return complex(math.Copysign(0, re), im) + } + case math.IsInf(re, 0): + return complex(math.Copysign(math.Pi/2, re), math.Copysign(re, im)) + } + ct := complex(-imag(x), real(x)) // i * x + xx := x * x + x1 := complex(1-real(xx), -imag(xx)) // 1 - x*x + x2 := Sqrt(x1) // x2 = sqrt(1 - x*x) + w := Log(ct + x2) + return complex(imag(w), -real(w)) // -i * w +} + +// Asinh returns the inverse hyperbolic sine of x. +func Asinh(x complex128) complex128 { + switch re, im := real(x), imag(x); { + case im == 0 && math.Abs(re) <= 1: + return complex(math.Asinh(re), im) + case re == 0 && math.Abs(im) <= 1: + return complex(re, math.Asin(im)) + case math.IsInf(re, 0): + switch { + case math.IsInf(im, 0): + return complex(re, math.Copysign(math.Pi/4, im)) + case math.IsNaN(im): + return x + default: + return complex(re, math.Copysign(0.0, im)) + } + case math.IsNaN(re): + switch { + case im == 0: + return x + case math.IsInf(im, 0): + return complex(im, re) + default: + return NaN() + } + case math.IsInf(im, 0): + return complex(math.Copysign(im, re), math.Copysign(math.Pi/2, im)) + } + xx := x * x + x1 := complex(1+real(xx), imag(xx)) // 1 + x*x + return Log(x + Sqrt(x1)) // log(x + sqrt(1 + x*x)) +} + +// Complex circular arc cosine +// +// DESCRIPTION: +// +// w = arccos z = PI/2 - arcsin z. +// +// ACCURACY: +// +// Relative error: +// arithmetic domain # trials peak rms +// DEC -10,+10 5200 1.6e-15 2.8e-16 +// IEEE -10,+10 30000 1.8e-14 2.2e-15 + +// Acos returns the inverse cosine of x. +func Acos(x complex128) complex128 { + w := Asin(x) + return complex(math.Pi/2-real(w), -imag(w)) +} + +// Acosh returns the inverse hyperbolic cosine of x. +func Acosh(x complex128) complex128 { + if x == 0 { + return complex(0, math.Copysign(math.Pi/2, imag(x))) + } + w := Acos(x) + if imag(w) <= 0 { + return complex(-imag(w), real(w)) // i * w + } + return complex(imag(w), -real(w)) // -i * w +} + +// Complex circular arc tangent +// +// DESCRIPTION: +// +// If +// z = x + iy, +// +// then +// 1 ( 2x ) +// Re w = - arctan(-----------) + k PI +// 2 ( 2 2) +// (1 - x - y ) +// +// ( 2 2) +// 1 (x + (y+1) ) +// Im w = - log(------------) +// 4 ( 2 2) +// (x + (y-1) ) +// +// Where k is an arbitrary integer. +// +// catan(z) = -i catanh(iz). +// +// ACCURACY: +// +// Relative error: +// arithmetic domain # trials peak rms +// DEC -10,+10 5900 1.3e-16 7.8e-18 +// IEEE -10,+10 30000 2.3e-15 8.5e-17 +// The check catan( ctan(z) ) = z, with |x| and |y| < PI/2, +// had peak relative error 1.5e-16, rms relative error +// 2.9e-17. See also clog(). + +// Atan returns the inverse tangent of x. +func Atan(x complex128) complex128 { + switch re, im := real(x), imag(x); { + case im == 0: + return complex(math.Atan(re), im) + case re == 0 && math.Abs(im) <= 1: + return complex(re, math.Atanh(im)) + case math.IsInf(im, 0) || math.IsInf(re, 0): + if math.IsNaN(re) { + return complex(math.NaN(), math.Copysign(0, im)) + } + return complex(math.Copysign(math.Pi/2, re), math.Copysign(0, im)) + case math.IsNaN(re) || math.IsNaN(im): + return NaN() + } + x2 := real(x) * real(x) + a := 1 - x2 - imag(x)*imag(x) + if a == 0 { + return NaN() + } + t := 0.5 * math.Atan2(2*real(x), a) + w := reducePi(t) + + t = imag(x) - 1 + b := x2 + t*t + if b == 0 { + return NaN() + } + t = imag(x) + 1 + c := (x2 + t*t) / b + return complex(w, 0.25*math.Log(c)) +} + +// Atanh returns the inverse hyperbolic tangent of x. +func Atanh(x complex128) complex128 { + z := complex(-imag(x), real(x)) // z = i * x + z = Atan(z) + return complex(imag(z), -real(z)) // z = -i * z +} diff --git a/src/math/cmplx/cmath_test.go b/src/math/cmplx/cmath_test.go new file mode 100644 index 0000000..3011e83 --- /dev/null +++ b/src/math/cmplx/cmath_test.go @@ -0,0 +1,1589 @@ +// Copyright 2010 The Go Authors. All rights reserved. +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. + +package cmplx + +import ( + "math" + "testing" +) + +// The higher-precision values in vc26 were used to derive the +// input arguments vc (see also comment below). For reference +// only (do not delete). +var vc26 = []complex128{ + (4.97901192488367350108546816 + 7.73887247457810456552351752i), + (7.73887247457810456552351752 - 0.27688005719200159404635997i), + (-0.27688005719200159404635997 - 5.01060361827107492160848778i), + (-5.01060361827107492160848778 + 9.63629370719841737980004837i), + (9.63629370719841737980004837 + 2.92637723924396464525443662i), + (2.92637723924396464525443662 + 5.22908343145930665230025625i), + (5.22908343145930665230025625 + 2.72793991043601025126008608i), + (2.72793991043601025126008608 + 1.82530809168085506044576505i), + (1.82530809168085506044576505 - 8.68592476857560136238589621i), + (-8.68592476857560136238589621 + 4.97901192488367350108546816i), +} + +var vc = []complex128{ + (4.9790119248836735e+00 + 7.7388724745781045e+00i), + (7.7388724745781045e+00 - 2.7688005719200159e-01i), + (-2.7688005719200159e-01 - 5.0106036182710749e+00i), + (-5.0106036182710749e+00 + 9.6362937071984173e+00i), + (9.6362937071984173e+00 + 2.9263772392439646e+00i), + (2.9263772392439646e+00 + 5.2290834314593066e+00i), + (5.2290834314593066e+00 + 2.7279399104360102e+00i), + (2.7279399104360102e+00 + 1.8253080916808550e+00i), + (1.8253080916808550e+00 - 8.6859247685756013e+00i), + (-8.6859247685756013e+00 + 4.9790119248836735e+00i), +} + +// The expected results below were computed by the high precision calculators +// at https://keisan.casio.com/. More exact input values (array vc[], above) +// were obtained by printing them with "%.26f". The answers were calculated +// to 26 digits (by using the "Digit number" drop-down control of each +// calculator). + +var abs = []float64{ + 9.2022120669932650313380972e+00, + 7.7438239742296106616261394e+00, + 5.0182478202557746902556648e+00, + 1.0861137372799545160704002e+01, + 1.0070841084922199607011905e+01, + 5.9922447613166942183705192e+00, + 5.8978784056736762299945176e+00, + 3.2822866700678709020367184e+00, + 8.8756430028990417290744307e+00, + 1.0011785496777731986390856e+01, +} + +var acos = []complex128{ + (1.0017679804707456328694569 - 2.9138232718554953784519807i), + (0.03606427612041407369636057 + 2.7358584434576260925091256i), + (1.6249365462333796703711823 + 2.3159537454335901187730929i), + (2.0485650849650740120660391 - 3.0795576791204117911123886i), + (0.29621132089073067282488147 - 3.0007392508200622519398814i), + (1.0664555914934156601503632 - 2.4872865024796011364747111i), + (0.48681307452231387690013905 - 2.463655912283054555225301i), + (0.6116977071277574248407752 - 1.8734458851737055262693056i), + (1.3649311280370181331184214 + 2.8793528632328795424123832i), + (2.6189310485682988308904501 - 2.9956543302898767795858704i), +} +var acosh = []complex128{ + (2.9138232718554953784519807 + 1.0017679804707456328694569i), + (2.7358584434576260925091256 - 0.03606427612041407369636057i), + (2.3159537454335901187730929 - 1.6249365462333796703711823i), + (3.0795576791204117911123886 + 2.0485650849650740120660391i), + (3.0007392508200622519398814 + 0.29621132089073067282488147i), + (2.4872865024796011364747111 + 1.0664555914934156601503632i), + (2.463655912283054555225301 + 0.48681307452231387690013905i), + (1.8734458851737055262693056 + 0.6116977071277574248407752i), + (2.8793528632328795424123832 - 1.3649311280370181331184214i), + (2.9956543302898767795858704 + 2.6189310485682988308904501i), +} +var asin = []complex128{ + (0.56902834632415098636186476 + 2.9138232718554953784519807i), + (1.5347320506744825455349611 - 2.7358584434576260925091256i), + (-0.054140219438483051139860579 - 2.3159537454335901187730929i), + (-0.47776875817017739283471738 + 3.0795576791204117911123886i), + (1.2745850059041659464064402 + 3.0007392508200622519398814i), + (0.50434073530148095908095852 + 2.4872865024796011364747111i), + (1.0839832522725827423311826 + 2.463655912283054555225301i), + (0.9590986196671391943905465 + 1.8734458851737055262693056i), + (0.20586519875787848611290031 - 2.8793528632328795424123832i), + (-1.0481347217734022116591284 + 2.9956543302898767795858704i), +} +var asinh = []complex128{ + (2.9113760469415295679342185 + 0.99639459545704326759805893i), + (2.7441755423994259061579029 - 0.035468308789000500601119392i), + (-2.2962136462520690506126678 - 1.5144663565690151885726707i), + (-3.0771233459295725965402455 + 1.0895577967194013849422294i), + (3.0048366100923647417557027 + 0.29346979169819220036454168i), + (2.4800059370795363157364643 + 1.0545868606049165710424232i), + (2.4718773838309585611141821 + 0.47502344364250803363708842i), + (1.8910743588080159144378396 + 0.56882925572563602341139174i), + (2.8735426423367341878069406 - 1.362376149648891420997548i), + (-2.9981750586172477217567878 + 0.5183571985225367505624207i), +} +var atan = []complex128{ + (1.5115747079332741358607654 + 0.091324403603954494382276776i), + (1.4424504323482602560806727 - 0.0045416132642803911503770933i), + (-1.5593488703630532674484026 - 0.20163295409248362456446431i), + (-1.5280619472445889867794105 + 0.081721556230672003746956324i), + (1.4759909163240799678221039 + 0.028602969320691644358773586i), + (1.4877353772046548932715555 + 0.14566877153207281663773599i), + (1.4206983927779191889826 + 0.076830486127880702249439993i), + (1.3162236060498933364869556 + 0.16031313000467530644933363i), + (1.5473450684303703578810093 - 0.11064907507939082484935782i), + (-1.4841462340185253987375812 + 0.049341850305024399493142411i), +} +var atanh = []complex128{ + (0.058375027938968509064640438 + 1.4793488495105334458167782i), + (0.12977343497790381229915667 - 1.5661009410463561327262499i), + (-0.010576456067347252072200088 - 1.3743698658402284549750563i), + (-0.042218595678688358882784918 + 1.4891433968166405606692604i), + (0.095218997991316722061828397 + 1.5416884098777110330499698i), + (0.079965459366890323857556487 + 1.4252510353873192700350435i), + (0.15051245471980726221708301 + 1.4907432533016303804884461i), + (0.25082072933993987714470373 + 1.392057665392187516442986i), + (0.022896108815797135846276662 - 1.4609224989282864208963021i), + (-0.08665624101841876130537396 + 1.5207902036935093480142159i), +} +var conj = []complex128{ + (4.9790119248836735e+00 - 7.7388724745781045e+00i), + (7.7388724745781045e+00 + 2.7688005719200159e-01i), + (-2.7688005719200159e-01 + 5.0106036182710749e+00i), + (-5.0106036182710749e+00 - 9.6362937071984173e+00i), + (9.6362937071984173e+00 - 2.9263772392439646e+00i), + (2.9263772392439646e+00 - 5.2290834314593066e+00i), + (5.2290834314593066e+00 - 2.7279399104360102e+00i), + (2.7279399104360102e+00 - 1.8253080916808550e+00i), + (1.8253080916808550e+00 + 8.6859247685756013e+00i), + (-8.6859247685756013e+00 - 4.9790119248836735e+00i), +} +var cos = []complex128{ + (3.024540920601483938336569e+02 + 1.1073797572517071650045357e+03i), + (1.192858682649064973252758e-01 + 2.7857554122333065540970207e-01i), + (7.2144394304528306603857962e+01 - 2.0500129667076044169954205e+01i), + (2.24921952538403984190541e+03 - 7.317363745602773587049329e+03i), + (-9.148222970032421760015498e+00 + 1.953124661113563541862227e+00i), + (-9.116081175857732248227078e+01 - 1.992669213569952232487371e+01i), + (3.795639179042704640002918e+00 + 6.623513350981458399309662e+00i), + (-2.9144840732498869560679084e+00 - 1.214620271628002917638748e+00i), + (-7.45123482501299743872481e+02 + 2.8641692314488080814066734e+03i), + (-5.371977967039319076416747e+01 + 4.893348341339375830564624e+01i), +} +var cosh = []complex128{ + (8.34638383523018249366948e+00 + 7.2181057886425846415112064e+01i), + (1.10421967379919366952251e+03 - 3.1379638689277575379469861e+02i), + (3.051485206773701584738512e-01 - 2.6805384730105297848044485e-01i), + (-7.33294728684187933370938e+01 + 1.574445942284918251038144e+01i), + (-7.478643293945957535757355e+03 + 1.6348382209913353929473321e+03i), + (4.622316522966235701630926e+00 - 8.088695185566375256093098e+00i), + (-8.544333183278877406197712e+01 + 3.7505836120128166455231717e+01i), + (-1.934457815021493925115198e+00 + 7.3725859611767228178358673e+00i), + (-2.352958770061749348353548e+00 - 2.034982010440878358915409e+00i), + (7.79756457532134748165069e+02 + 2.8549350716819176560377717e+03i), +} +var exp = []complex128{ + (1.669197736864670815125146e+01 + 1.4436895109507663689174096e+02i), + (2.2084389286252583447276212e+03 - 6.2759289284909211238261917e+02i), + (2.227538273122775173434327e-01 + 7.2468284028334191250470034e-01i), + (-6.5182985958153548997881627e-03 - 1.39965837915193860879044e-03i), + (-1.4957286524084015746110777e+04 + 3.269676455931135688988042e+03i), + (9.218158701983105935659273e+00 - 1.6223985291084956009304582e+01i), + (-1.7088175716853040841444505e+02 + 7.501382609870410713795546e+01i), + (-3.852461315830959613132505e+00 + 1.4808420423156073221970892e+01i), + (-4.586775503301407379786695e+00 - 4.178501081246873415144744e+00i), + (4.451337963005453491095747e-05 - 1.62977574205442915935263e-04i), +} +var log = []complex128{ + (2.2194438972179194425697051e+00 + 9.9909115046919291062461269e-01i), + (2.0468956191154167256337289e+00 - 3.5762575021856971295156489e-02i), + (1.6130808329853860438751244e+00 - 1.6259990074019058442232221e+00i), + (2.3851910394823008710032651e+00 + 2.0502936359659111755031062e+00i), + (2.3096442270679923004800651e+00 + 2.9483213155446756211881774e-01i), + (1.7904660933974656106951860e+00 + 1.0605860367252556281902109e+00i), + (1.7745926939841751666177512e+00 + 4.8084556083358307819310911e-01i), + (1.1885403350045342425648780e+00 + 5.8969634164776659423195222e-01i), + (2.1833107837679082586772505e+00 - 1.3636647724582455028314573e+00i), + (2.3037629487273259170991671e+00 + 2.6210913895386013290915234e+00i), +} +var log10 = []complex128{ + (9.6389223745559042474184943e-01 + 4.338997735671419492599631e-01i), + (8.8895547241376579493490892e-01 - 1.5531488990643548254864806e-02i), + (7.0055210462945412305244578e-01 - 7.0616239649481243222248404e-01i), + (1.0358753067322445311676952e+00 + 8.9043121238134980156490909e-01i), + (1.003065742975330237172029e+00 + 1.2804396782187887479857811e-01i), + (7.7758954439739162532085157e-01 + 4.6060666333341810869055108e-01i), + (7.7069581462315327037689152e-01 + 2.0882857371769952195512475e-01i), + (5.1617650901191156135137239e-01 + 2.5610186717615977620363299e-01i), + (9.4819982567026639742663212e-01 - 5.9223208584446952284914289e-01i), + (1.0005115362454417135973429e+00 + 1.1383255270407412817250921e+00i), +} + +type ff struct { + r, theta float64 +} + +var polar = []ff{ + {9.2022120669932650313380972e+00, 9.9909115046919291062461269e-01}, + {7.7438239742296106616261394e+00, -3.5762575021856971295156489e-02}, + {5.0182478202557746902556648e+00, -1.6259990074019058442232221e+00}, + {1.0861137372799545160704002e+01, 2.0502936359659111755031062e+00}, + {1.0070841084922199607011905e+01, 2.9483213155446756211881774e-01}, + {5.9922447613166942183705192e+00, 1.0605860367252556281902109e+00}, + {5.8978784056736762299945176e+00, 4.8084556083358307819310911e-01}, + {3.2822866700678709020367184e+00, 5.8969634164776659423195222e-01}, + {8.8756430028990417290744307e+00, -1.3636647724582455028314573e+00}, + {1.0011785496777731986390856e+01, 2.6210913895386013290915234e+00}, +} +var pow = []complex128{ + (-2.499956739197529585028819e+00 + 1.759751724335650228957144e+00i), + (7.357094338218116311191939e+04 - 5.089973412479151648145882e+04i), + (1.320777296067768517259592e+01 - 3.165621914333901498921986e+01i), + (-3.123287828297300934072149e-07 - 1.9849567521490553032502223e-7i), + (8.0622651468477229614813e+04 - 7.80028727944573092944363e+04i), + (-1.0268824572103165858577141e+00 - 4.716844738244989776610672e-01i), + (-4.35953819012244175753187e+01 + 2.2036445974645306917648585e+02i), + (8.3556092283250594950239e-01 - 1.2261571947167240272593282e+01i), + (1.582292972120769306069625e+03 + 1.273564263524278244782512e+04i), + (6.592208301642122149025369e-08 + 2.584887236651661903526389e-08i), +} +var sin = []complex128{ + (-1.1073801774240233539648544e+03 + 3.024539773002502192425231e+02i), + (1.0317037521400759359744682e+00 - 3.2208979799929570242818e-02i), + (-2.0501952097271429804261058e+01 - 7.2137981348240798841800967e+01i), + (7.3173638080346338642193078e+03 + 2.249219506193664342566248e+03i), + (-1.964375633631808177565226e+00 - 9.0958264713870404464159683e+00i), + (1.992783647158514838337674e+01 - 9.11555769410191350416942e+01i), + (-6.680335650741921444300349e+00 + 3.763353833142432513086117e+00i), + (1.2794028166657459148245993e+00 - 2.7669092099795781155109602e+00i), + (2.8641693949535259594188879e+03 + 7.451234399649871202841615e+02i), + (-4.893811726244659135553033e+01 - 5.371469305562194635957655e+01i), +} +var sinh = []complex128{ + (8.34559353341652565758198e+00 + 7.2187893208650790476628899e+01i), + (1.1042192548260646752051112e+03 - 3.1379650595631635858792056e+02i), + (-8.239469336509264113041849e-02 + 9.9273668758439489098514519e-01i), + (7.332295456982297798219401e+01 - 1.574585908122833444899023e+01i), + (-7.4786432301380582103534216e+03 + 1.63483823493980029604071e+03i), + (4.595842179016870234028347e+00 - 8.135290105518580753211484e+00i), + (-8.543842533574163435246793e+01 + 3.750798997857594068272375e+01i), + (-1.918003500809465688017307e+00 + 7.4358344619793504041350251e+00i), + (-2.233816733239658031433147e+00 - 2.143519070805995056229335e+00i), + (-7.797564130187551181105341e+02 - 2.8549352346594918614806877e+03i), +} +var sqrt = []complex128{ + (2.6628203086086130543813948e+00 + 1.4531345674282185229796902e+00i), + (2.7823278427251986247149295e+00 - 4.9756907317005224529115567e-02i), + (1.5397025302089642757361015e+00 - 1.6271336573016637535695727e+00i), + (1.7103411581506875260277898e+00 + 2.8170677122737589676157029e+00i), + (3.1390392472953103383607947e+00 + 4.6612625849858653248980849e-01i), + (2.1117080764822417640789287e+00 + 1.2381170223514273234967850e+00i), + (2.3587032281672256703926939e+00 + 5.7827111903257349935720172e-01i), + (1.7335262588873410476661577e+00 + 5.2647258220721269141550382e-01i), + (2.3131094974708716531499282e+00 - 1.8775429304303785570775490e+00i), + (8.1420535745048086240947359e-01 + 3.0575897587277248522656113e+00i), +} +var tan = []complex128{ + (-1.928757919086441129134525e-07 + 1.0000003267499169073251826e+00i), + (1.242412685364183792138948e+00 - 3.17149693883133370106696e+00i), + (-4.6745126251587795225571826e-05 - 9.9992439225263959286114298e-01i), + (4.792363401193648192887116e-09 + 1.0000000070589333451557723e+00i), + (2.345740824080089140287315e-03 + 9.947733046570988661022763e-01i), + (-2.396030789494815566088809e-05 + 9.9994781345418591429826779e-01i), + (-7.370204836644931340905303e-03 + 1.0043553413417138987717748e+00i), + (-3.691803847992048527007457e-02 + 9.6475071993469548066328894e-01i), + (-2.781955256713729368401878e-08 - 1.000000049848910609006646e+00i), + (9.4281590064030478879791249e-05 + 9.9999119340863718183758545e-01i), +} +var tanh = []complex128{ + (1.0000921981225144748819918e+00 + 2.160986245871518020231507e-05i), + (9.9999967727531993209562591e-01 - 1.9953763222959658873657676e-07i), + (-1.765485739548037260789686e+00 + 1.7024216325552852445168471e+00i), + (-9.999189442732736452807108e-01 + 3.64906070494473701938098e-05i), + (9.9999999224622333738729767e-01 - 3.560088949517914774813046e-09i), + (1.0029324933367326862499343e+00 - 4.948790309797102353137528e-03i), + (9.9996113064788012488693567e-01 - 4.226995742097032481451259e-05i), + (1.0074784189316340029873945e+00 - 4.194050814891697808029407e-03i), + (9.9385534229718327109131502e-01 + 5.144217985914355502713437e-02i), + (-1.0000000491604982429364892e+00 - 2.901873195374433112227349e-08i), +} + +// huge values along the real axis for testing reducePi in Tan +var hugeIn = []complex128{ + 1 << 28, + 1 << 29, + 1 << 30, + 1 << 35, + -1 << 120, + 1 << 240, + 1 << 300, + -1 << 480, + 1234567891234567 << 180, + -1234567891234567 << 300, +} + +// Results for tanHuge[i] calculated with https://github.com/robpike/ivy +// using 4096 bits of working precision. +var tanHuge = []complex128{ + 5.95641897939639421, + -0.34551069233430392, + -0.78469661331920043, + 0.84276385870875983, + 0.40806638884180424, + -0.37603456702698076, + 4.60901287677810962, + 3.39135965054779932, + -6.76813854009065030, + -0.76417695016604922, +} + +// special cases conform to C99 standard appendix G.6 Complex arithmetic +var inf, nan = math.Inf(1), math.NaN() + +var vcAbsSC = []complex128{ + NaN(), +} +var absSC = []float64{ + math.NaN(), +} +var acosSC = []struct { + in, + want complex128 +}{ + // G.6.1.1 + {complex(zero, zero), + complex(math.Pi/2, -zero)}, + {complex(-zero, zero), + complex(math.Pi/2, -zero)}, + {complex(zero, nan), + complex(math.Pi/2, nan)}, + {complex(-zero, nan), + complex(math.Pi/2, nan)}, + {complex(1.0, inf), + complex(math.Pi/2, -inf)}, + {complex(1.0, nan), + NaN()}, + {complex(-inf, 1.0), + complex(math.Pi, -inf)}, + {complex(inf, 1.0), + complex(0.0, -inf)}, + {complex(-inf, inf), + complex(3*math.Pi/4, -inf)}, + {complex(inf, inf), + complex(math.Pi/4, -inf)}, + {complex(inf, nan), + complex(nan, -inf)}, // imaginary sign unspecified + {complex(-inf, nan), + complex(nan, inf)}, // imaginary sign unspecified + {complex(nan, 1.0), + NaN()}, + {complex(nan, inf), + complex(nan, -inf)}, + {NaN(), + NaN()}, +} +var acoshSC = []struct { + in, + want complex128 +}{ + // G.6.2.1 + {complex(zero, zero), + complex(zero, math.Pi/2)}, + {complex(-zero, zero), + complex(zero, math.Pi/2)}, + {complex(1.0, inf), + complex(inf, math.Pi/2)}, + {complex(1.0, nan), + NaN()}, + {complex(-inf, 1.0), + complex(inf, math.Pi)}, + {complex(inf, 1.0), + complex(inf, zero)}, + {complex(-inf, inf), + complex(inf, 3*math.Pi/4)}, + {complex(inf, inf), + complex(inf, math.Pi/4)}, + {complex(inf, nan), + complex(inf, nan)}, + {complex(-inf, nan), + complex(inf, nan)}, + {complex(nan, 1.0), + NaN()}, + {complex(nan, inf), + complex(inf, nan)}, + {NaN(), + NaN()}, +} +var asinSC = []struct { + in, + want complex128 +}{ + // Derived from Asin(z) = -i * Asinh(i * z), G.6 #7 + {complex(zero, zero), + complex(zero, zero)}, + {complex(1.0, inf), + complex(0, inf)}, + {complex(1.0, nan), + NaN()}, + {complex(inf, 1), + complex(math.Pi/2, inf)}, + {complex(inf, inf), + complex(math.Pi/4, inf)}, + {complex(inf, nan), + complex(nan, inf)}, // imaginary sign unspecified + {complex(nan, zero), + NaN()}, + {complex(nan, 1), + NaN()}, + {complex(nan, inf), + complex(nan, inf)}, + {NaN(), + NaN()}, +} +var asinhSC = []struct { + in, + want complex128 +}{ + // G.6.2.2 + {complex(zero, zero), + complex(zero, zero)}, + {complex(1.0, inf), + complex(inf, math.Pi/2)}, + {complex(1.0, nan), + NaN()}, + {complex(inf, 1.0), + complex(inf, zero)}, + {complex(inf, inf), + complex(inf, math.Pi/4)}, + {complex(inf, nan), + complex(inf, nan)}, + {complex(nan, zero), + complex(nan, zero)}, + {complex(nan, 1.0), + NaN()}, + {complex(nan, inf), + complex(inf, nan)}, // sign of real part unspecified + {NaN(), + NaN()}, +} +var atanSC = []struct { + in, + want complex128 +}{ + // Derived from Atan(z) = -i * Atanh(i * z), G.6 #7 + {complex(0, zero), + complex(0, zero)}, + {complex(0, nan), + NaN()}, + {complex(1.0, zero), + complex(math.Pi/4, zero)}, + {complex(1.0, inf), + complex(math.Pi/2, zero)}, + {complex(1.0, nan), + NaN()}, + {complex(inf, 1), + complex(math.Pi/2, zero)}, + {complex(inf, inf), + complex(math.Pi/2, zero)}, + {complex(inf, nan), + complex(math.Pi/2, zero)}, + {complex(nan, 1), + NaN()}, + {complex(nan, inf), + complex(nan, zero)}, + {NaN(), + NaN()}, +} +var atanhSC = []struct { + in, + want complex128 +}{ + // G.6.2.3 + {complex(zero, zero), + complex(zero, zero)}, + {complex(zero, nan), + complex(zero, nan)}, + {complex(1.0, zero), + complex(inf, zero)}, + {complex(1.0, inf), + complex(0, math.Pi/2)}, + {complex(1.0, nan), + NaN()}, + {complex(inf, 1.0), + complex(zero, math.Pi/2)}, + {complex(inf, inf), + complex(zero, math.Pi/2)}, + {complex(inf, nan), + complex(0, nan)}, + {complex(nan, 1.0), + NaN()}, + {complex(nan, inf), + complex(zero, math.Pi/2)}, // sign of real part not specified. + {NaN(), + NaN()}, +} +var vcConjSC = []complex128{ + NaN(), +} +var conjSC = []complex128{ + NaN(), +} +var cosSC = []struct { + in, + want complex128 +}{ + // Derived from Cos(z) = Cosh(i * z), G.6 #7 + {complex(zero, zero), + complex(1.0, -zero)}, + {complex(zero, inf), + complex(inf, -zero)}, + {complex(zero, nan), + complex(nan, zero)}, // imaginary sign unspecified + {complex(1.0, inf), + complex(inf, -inf)}, + {complex(1.0, nan), + NaN()}, + {complex(inf, zero), + complex(nan, -zero)}, + {complex(inf, 1.0), + NaN()}, + {complex(inf, inf), + complex(inf, nan)}, // real sign unspecified + {complex(inf, nan), + NaN()}, + {complex(nan, zero), + complex(nan, -zero)}, // imaginary sign unspecified + {complex(nan, 1.0), + NaN()}, + {complex(nan, inf), + complex(inf, nan)}, + {NaN(), + NaN()}, +} +var coshSC = []struct { + in, + want complex128 +}{ + // G.6.2.4 + {complex(zero, zero), + complex(1.0, zero)}, + {complex(zero, inf), + complex(nan, zero)}, // imaginary sign unspecified + {complex(zero, nan), + complex(nan, zero)}, // imaginary sign unspecified + {complex(1.0, inf), + NaN()}, + {complex(1.0, nan), + NaN()}, + {complex(inf, zero), + complex(inf, zero)}, + {complex(inf, 1.0), + complex(inf*math.Cos(1.0), inf*math.Sin(1.0))}, // +inf cis(y) + {complex(inf, inf), + complex(inf, nan)}, // real sign unspecified + {complex(inf, nan), + complex(inf, nan)}, + {complex(nan, zero), + complex(nan, zero)}, // imaginary sign unspecified + {complex(nan, 1.0), + NaN()}, + {complex(nan, inf), + NaN()}, + {NaN(), + NaN()}, +} +var expSC = []struct { + in, + want complex128 +}{ + // G.6.3.1 + {complex(zero, zero), + complex(1.0, zero)}, + {complex(-zero, zero), + complex(1.0, zero)}, + {complex(1.0, inf), + NaN()}, + {complex(1.0, nan), + NaN()}, + {complex(inf, zero), + complex(inf, zero)}, + {complex(-inf, 1.0), + complex(math.Copysign(0.0, math.Cos(1.0)), math.Copysign(0.0, math.Sin(1.0)))}, // +0 cis(y) + {complex(inf, 1.0), + complex(inf*math.Cos(1.0), inf*math.Sin(1.0))}, // +inf cis(y) + {complex(-inf, inf), + complex(zero, zero)}, // real and imaginary sign unspecified + {complex(inf, inf), + complex(inf, nan)}, // real sign unspecified + {complex(-inf, nan), + complex(zero, zero)}, // real and imaginary sign unspecified + {complex(inf, nan), + complex(inf, nan)}, // real sign unspecified + {complex(nan, zero), + complex(nan, zero)}, + {complex(nan, 1.0), + NaN()}, + {complex(nan, inf), + NaN()}, + {NaN(), + NaN()}, +} +var vcIsNaNSC = []complex128{ + complex(math.Inf(-1), math.Inf(-1)), + complex(math.Inf(-1), math.NaN()), + complex(math.NaN(), math.Inf(-1)), + complex(0, math.NaN()), + complex(math.NaN(), 0), + complex(math.Inf(1), math.Inf(1)), + complex(math.Inf(1), math.NaN()), + complex(math.NaN(), math.Inf(1)), + complex(math.NaN(), math.NaN()), +} +var isNaNSC = []bool{ + false, + false, + false, + true, + true, + false, + false, + false, + true, +} + +var logSC = []struct { + in, + want complex128 +}{ + // G.6.3.2 + {complex(zero, zero), + complex(-inf, zero)}, + {complex(-zero, zero), + complex(-inf, math.Pi)}, + {complex(1.0, inf), + complex(inf, math.Pi/2)}, + {complex(1.0, nan), + NaN()}, + {complex(-inf, 1.0), + complex(inf, math.Pi)}, + {complex(inf, 1.0), + complex(inf, 0.0)}, + {complex(-inf, inf), + complex(inf, 3*math.Pi/4)}, + {complex(inf, inf), + complex(inf, math.Pi/4)}, + {complex(-inf, nan), + complex(inf, nan)}, + {complex(inf, nan), + complex(inf, nan)}, + {complex(nan, 1.0), + NaN()}, + {complex(nan, inf), + complex(inf, nan)}, + {NaN(), + NaN()}, +} +var log10SC = []struct { + in, + want complex128 +}{ + // derived from Log special cases via Log10(x) = math.Log10E*Log(x) + {complex(zero, zero), + complex(-inf, zero)}, + {complex(-zero, zero), + complex(-inf, float64(math.Log10E)*float64(math.Pi))}, + {complex(1.0, inf), + complex(inf, float64(math.Log10E)*float64(math.Pi/2))}, + {complex(1.0, nan), + NaN()}, + {complex(-inf, 1.0), + complex(inf, float64(math.Log10E)*float64(math.Pi))}, + {complex(inf, 1.0), + complex(inf, 0.0)}, + {complex(-inf, inf), + complex(inf, float64(math.Log10E)*float64(3*math.Pi/4))}, + {complex(inf, inf), + complex(inf, float64(math.Log10E)*float64(math.Pi/4))}, + {complex(-inf, nan), + complex(inf, nan)}, + {complex(inf, nan), + complex(inf, nan)}, + {complex(nan, 1.0), + NaN()}, + {complex(nan, inf), + complex(inf, nan)}, + {NaN(), + NaN()}, +} +var vcPolarSC = []complex128{ + NaN(), +} +var polarSC = []ff{ + {math.NaN(), math.NaN()}, +} +var vcPowSC = [][2]complex128{ + {NaN(), NaN()}, + {0, NaN()}, +} +var powSC = []complex128{ + NaN(), + NaN(), +} +var sinSC = []struct { + in, + want complex128 +}{ + // Derived from Sin(z) = -i * Sinh(i * z), G.6 #7 + {complex(zero, zero), + complex(zero, zero)}, + {complex(zero, inf), + complex(zero, inf)}, + {complex(zero, nan), + complex(zero, nan)}, + {complex(1.0, inf), + complex(inf, inf)}, + {complex(1.0, nan), + NaN()}, + {complex(inf, zero), + complex(nan, zero)}, + {complex(inf, 1.0), + NaN()}, + {complex(inf, inf), + complex(nan, inf)}, + {complex(inf, nan), + NaN()}, + {complex(nan, zero), + complex(nan, zero)}, + {complex(nan, 1.0), + NaN()}, + {complex(nan, inf), + complex(nan, inf)}, + {NaN(), + NaN()}, +} + +var sinhSC = []struct { + in, + want complex128 +}{ + // G.6.2.5 + {complex(zero, zero), + complex(zero, zero)}, + {complex(zero, inf), + complex(zero, nan)}, // real sign unspecified + {complex(zero, nan), + complex(zero, nan)}, // real sign unspecified + {complex(1.0, inf), + NaN()}, + {complex(1.0, nan), + NaN()}, + {complex(inf, zero), + complex(inf, zero)}, + {complex(inf, 1.0), + complex(inf*math.Cos(1.0), inf*math.Sin(1.0))}, // +inf cis(y) + {complex(inf, inf), + complex(inf, nan)}, // real sign unspecified + {complex(inf, nan), + complex(inf, nan)}, // real sign unspecified + {complex(nan, zero), + complex(nan, zero)}, + {complex(nan, 1.0), + NaN()}, + {complex(nan, inf), + NaN()}, + {NaN(), + NaN()}, +} + +var sqrtSC = []struct { + in, + want complex128 +}{ + // G.6.4.2 + {complex(zero, zero), + complex(zero, zero)}, + {complex(-zero, zero), + complex(zero, zero)}, + {complex(1.0, inf), + complex(inf, inf)}, + {complex(nan, inf), + complex(inf, inf)}, + {complex(1.0, nan), + NaN()}, + {complex(-inf, 1.0), + complex(zero, inf)}, + {complex(inf, 1.0), + complex(inf, zero)}, + {complex(-inf, nan), + complex(nan, inf)}, // imaginary sign unspecified + {complex(inf, nan), + complex(inf, nan)}, + {complex(nan, 1.0), + NaN()}, + {NaN(), + NaN()}, +} +var tanSC = []struct { + in, + want complex128 +}{ + // Derived from Tan(z) = -i * Tanh(i * z), G.6 #7 + {complex(zero, zero), + complex(zero, zero)}, + {complex(zero, nan), + complex(zero, nan)}, + {complex(1.0, inf), + complex(zero, 1.0)}, + {complex(1.0, nan), + NaN()}, + {complex(inf, 1.0), + NaN()}, + {complex(inf, inf), + complex(zero, 1.0)}, + {complex(inf, nan), + NaN()}, + {complex(nan, zero), + NaN()}, + {complex(nan, 1.0), + NaN()}, + {complex(nan, inf), + complex(zero, 1.0)}, + {NaN(), + NaN()}, +} +var tanhSC = []struct { + in, + want complex128 +}{ + // G.6.2.6 + {complex(zero, zero), + complex(zero, zero)}, + {complex(1.0, inf), + NaN()}, + {complex(1.0, nan), + NaN()}, + {complex(inf, 1.0), + complex(1.0, math.Copysign(0.0, math.Sin(2*1.0)))}, // 1 + i 0 sin(2y) + {complex(inf, inf), + complex(1.0, zero)}, // imaginary sign unspecified + {complex(inf, nan), + complex(1.0, zero)}, // imaginary sign unspecified + {complex(nan, zero), + complex(nan, zero)}, + {complex(nan, 1.0), + NaN()}, + {complex(nan, inf), + NaN()}, + {NaN(), + NaN()}, +} + +// branch cut continuity checks +// points on each axis at |z| > 1 are checked for one-sided continuity from both the positive and negative side +// all possible branch cuts for the elementary functions are at one of these points + +var zero = 0.0 +var eps = 1.0 / (1 << 53) + +var branchPoints = [][2]complex128{ + {complex(2.0, zero), complex(2.0, eps)}, + {complex(2.0, -zero), complex(2.0, -eps)}, + {complex(-2.0, zero), complex(-2.0, eps)}, + {complex(-2.0, -zero), complex(-2.0, -eps)}, + {complex(zero, 2.0), complex(eps, 2.0)}, + {complex(-zero, 2.0), complex(-eps, 2.0)}, + {complex(zero, -2.0), complex(eps, -2.0)}, + {complex(-zero, -2.0), complex(-eps, -2.0)}, +} + +// functions borrowed from pkg/math/all_test.go +func tolerance(a, b, e float64) bool { + d := a - b + if d < 0 { + d = -d + } + + // note: b is correct (expected) value, a is actual value. + // make error tolerance a fraction of b, not a. + if b != 0 { + e = e * b + if e < 0 { + e = -e + } + } + return d < e +} +func veryclose(a, b float64) bool { return tolerance(a, b, 4e-16) } +func alike(a, b float64) bool { + switch { + case a != a && b != b: // math.IsNaN(a) && math.IsNaN(b): + return true + case a == b: + return math.Signbit(a) == math.Signbit(b) + } + return false +} + +func cTolerance(a, b complex128, e float64) bool { + d := Abs(a - b) + if b != 0 { + e = e * Abs(b) + if e < 0 { + e = -e + } + } + return d < e +} +func cSoclose(a, b complex128, e float64) bool { return cTolerance(a, b, e) } +func cVeryclose(a, b complex128) bool { return cTolerance(a, b, 4e-16) } +func cAlike(a, b complex128) bool { + var realAlike, imagAlike bool + if isExact(real(b)) { + realAlike = alike(real(a), real(b)) + } else { + // Allow non-exact special cases to have errors in ULP. + realAlike = veryclose(real(a), real(b)) + } + if isExact(imag(b)) { + imagAlike = alike(imag(a), imag(b)) + } else { + // Allow non-exact special cases to have errors in ULP. + imagAlike = veryclose(imag(a), imag(b)) + } + return realAlike && imagAlike +} +func isExact(x float64) bool { + // Special cases that should match exactly. Other cases are multiples + // of Pi that may not be last bit identical on all platforms. + return math.IsNaN(x) || math.IsInf(x, 0) || x == 0 || x == 1 || x == -1 +} + +func TestAbs(t *testing.T) { + for i := 0; i < len(vc); i++ { + if f := Abs(vc[i]); !veryclose(abs[i], f) { + t.Errorf("Abs(%g) = %g, want %g", vc[i], f, abs[i]) + } + } + for i := 0; i < len(vcAbsSC); i++ { + if f := Abs(vcAbsSC[i]); !alike(absSC[i], f) { + t.Errorf("Abs(%g) = %g, want %g", vcAbsSC[i], f, absSC[i]) + } + } +} +func TestAcos(t *testing.T) { + for i := 0; i < len(vc); i++ { + if f := Acos(vc[i]); !cSoclose(acos[i], f, 1e-14) { + t.Errorf("Acos(%g) = %g, want %g", vc[i], f, acos[i]) + } + } + for _, v := range acosSC { + if f := Acos(v.in); !cAlike(v.want, f) { + t.Errorf("Acos(%g) = %g, want %g", v.in, f, v.want) + } + if math.IsNaN(imag(v.in)) || math.IsNaN(imag(v.want)) { + // Negating NaN is undefined with regard to the sign bit produced. + continue + } + // Acos(Conj(z)) == Conj(Acos(z)) + if f := Acos(Conj(v.in)); !cAlike(Conj(v.want), f) && !cAlike(v.in, Conj(v.in)) { + t.Errorf("Acos(%g) = %g, want %g", Conj(v.in), f, Conj(v.want)) + } + } + for _, pt := range branchPoints { + if f0, f1 := Acos(pt[0]), Acos(pt[1]); !cVeryclose(f0, f1) { + t.Errorf("Acos(%g) not continuous, got %g want %g", pt[0], f0, f1) + } + } +} +func TestAcosh(t *testing.T) { + for i := 0; i < len(vc); i++ { + if f := Acosh(vc[i]); !cSoclose(acosh[i], f, 1e-14) { + t.Errorf("Acosh(%g) = %g, want %g", vc[i], f, acosh[i]) + } + } + for _, v := range acoshSC { + if f := Acosh(v.in); !cAlike(v.want, f) { + t.Errorf("Acosh(%g) = %g, want %g", v.in, f, v.want) + } + if math.IsNaN(imag(v.in)) || math.IsNaN(imag(v.want)) { + // Negating NaN is undefined with regard to the sign bit produced. + continue + } + // Acosh(Conj(z)) == Conj(Acosh(z)) + if f := Acosh(Conj(v.in)); !cAlike(Conj(v.want), f) && !cAlike(v.in, Conj(v.in)) { + t.Errorf("Acosh(%g) = %g, want %g", Conj(v.in), f, Conj(v.want)) + } + + } + for _, pt := range branchPoints { + if f0, f1 := Acosh(pt[0]), Acosh(pt[1]); !cVeryclose(f0, f1) { + t.Errorf("Acosh(%g) not continuous, got %g want %g", pt[0], f0, f1) + } + } +} +func TestAsin(t *testing.T) { + for i := 0; i < len(vc); i++ { + if f := Asin(vc[i]); !cSoclose(asin[i], f, 1e-14) { + t.Errorf("Asin(%g) = %g, want %g", vc[i], f, asin[i]) + } + } + for _, v := range asinSC { + if f := Asin(v.in); !cAlike(v.want, f) { + t.Errorf("Asin(%g) = %g, want %g", v.in, f, v.want) + } + if math.IsNaN(imag(v.in)) || math.IsNaN(imag(v.want)) { + // Negating NaN is undefined with regard to the sign bit produced. + continue + } + // Asin(Conj(z)) == Asin(Sinh(z)) + if f := Asin(Conj(v.in)); !cAlike(Conj(v.want), f) && !cAlike(v.in, Conj(v.in)) { + t.Errorf("Asin(%g) = %g, want %g", Conj(v.in), f, Conj(v.want)) + } + if math.IsNaN(real(v.in)) || math.IsNaN(real(v.want)) { + // Negating NaN is undefined with regard to the sign bit produced. + continue + } + // Asin(-z) == -Asin(z) + if f := Asin(-v.in); !cAlike(-v.want, f) && !cAlike(v.in, -v.in) { + t.Errorf("Asin(%g) = %g, want %g", -v.in, f, -v.want) + } + } + for _, pt := range branchPoints { + if f0, f1 := Asin(pt[0]), Asin(pt[1]); !cVeryclose(f0, f1) { + t.Errorf("Asin(%g) not continuous, got %g want %g", pt[0], f0, f1) + } + } +} +func TestAsinh(t *testing.T) { + for i := 0; i < len(vc); i++ { + if f := Asinh(vc[i]); !cSoclose(asinh[i], f, 4e-15) { + t.Errorf("Asinh(%g) = %g, want %g", vc[i], f, asinh[i]) + } + } + for _, v := range asinhSC { + if f := Asinh(v.in); !cAlike(v.want, f) { + t.Errorf("Asinh(%g) = %g, want %g", v.in, f, v.want) + } + if math.IsNaN(imag(v.in)) || math.IsNaN(imag(v.want)) { + // Negating NaN is undefined with regard to the sign bit produced. + continue + } + // Asinh(Conj(z)) == Asinh(Sinh(z)) + if f := Asinh(Conj(v.in)); !cAlike(Conj(v.want), f) && !cAlike(v.in, Conj(v.in)) { + t.Errorf("Asinh(%g) = %g, want %g", Conj(v.in), f, Conj(v.want)) + } + if math.IsNaN(real(v.in)) || math.IsNaN(real(v.want)) { + // Negating NaN is undefined with regard to the sign bit produced. + continue + } + // Asinh(-z) == -Asinh(z) + if f := Asinh(-v.in); !cAlike(-v.want, f) && !cAlike(v.in, -v.in) { + t.Errorf("Asinh(%g) = %g, want %g", -v.in, f, -v.want) + } + } + for _, pt := range branchPoints { + if f0, f1 := Asinh(pt[0]), Asinh(pt[1]); !cVeryclose(f0, f1) { + t.Errorf("Asinh(%g) not continuous, got %g want %g", pt[0], f0, f1) + } + } +} +func TestAtan(t *testing.T) { + for i := 0; i < len(vc); i++ { + if f := Atan(vc[i]); !cVeryclose(atan[i], f) { + t.Errorf("Atan(%g) = %g, want %g", vc[i], f, atan[i]) + } + } + for _, v := range atanSC { + if f := Atan(v.in); !cAlike(v.want, f) { + t.Errorf("Atan(%g) = %g, want %g", v.in, f, v.want) + } + if math.IsNaN(imag(v.in)) || math.IsNaN(imag(v.want)) { + // Negating NaN is undefined with regard to the sign bit produced. + continue + } + // Atan(Conj(z)) == Conj(Atan(z)) + if f := Atan(Conj(v.in)); !cAlike(Conj(v.want), f) && !cAlike(v.in, Conj(v.in)) { + t.Errorf("Atan(%g) = %g, want %g", Conj(v.in), f, Conj(v.want)) + } + if math.IsNaN(real(v.in)) || math.IsNaN(real(v.want)) { + // Negating NaN is undefined with regard to the sign bit produced. + continue + } + // Atan(-z) == -Atan(z) + if f := Atan(-v.in); !cAlike(-v.want, f) && !cAlike(v.in, -v.in) { + t.Errorf("Atan(%g) = %g, want %g", -v.in, f, -v.want) + } + } + for _, pt := range branchPoints { + if f0, f1 := Atan(pt[0]), Atan(pt[1]); !cVeryclose(f0, f1) { + t.Errorf("Atan(%g) not continuous, got %g want %g", pt[0], f0, f1) + } + } +} +func TestAtanh(t *testing.T) { + for i := 0; i < len(vc); i++ { + if f := Atanh(vc[i]); !cVeryclose(atanh[i], f) { + t.Errorf("Atanh(%g) = %g, want %g", vc[i], f, atanh[i]) + } + } + for _, v := range atanhSC { + if f := Atanh(v.in); !cAlike(v.want, f) { + t.Errorf("Atanh(%g) = %g, want %g", v.in, f, v.want) + } + if math.IsNaN(imag(v.in)) || math.IsNaN(imag(v.want)) { + // Negating NaN is undefined with regard to the sign bit produced. + continue + } + // Atanh(Conj(z)) == Conj(Atanh(z)) + if f := Atanh(Conj(v.in)); !cAlike(Conj(v.want), f) && !cAlike(v.in, Conj(v.in)) { + t.Errorf("Atanh(%g) = %g, want %g", Conj(v.in), f, Conj(v.want)) + } + if math.IsNaN(real(v.in)) || math.IsNaN(real(v.want)) { + // Negating NaN is undefined with regard to the sign bit produced. + continue + } + // Atanh(-z) == -Atanh(z) + if f := Atanh(-v.in); !cAlike(-v.want, f) && !cAlike(v.in, -v.in) { + t.Errorf("Atanh(%g) = %g, want %g", -v.in, f, -v.want) + } + } + for _, pt := range branchPoints { + if f0, f1 := Atanh(pt[0]), Atanh(pt[1]); !cVeryclose(f0, f1) { + t.Errorf("Atanh(%g) not continuous, got %g want %g", pt[0], f0, f1) + } + } +} +func TestConj(t *testing.T) { + for i := 0; i < len(vc); i++ { + if f := Conj(vc[i]); !cVeryclose(conj[i], f) { + t.Errorf("Conj(%g) = %g, want %g", vc[i], f, conj[i]) + } + } + for i := 0; i < len(vcConjSC); i++ { + if f := Conj(vcConjSC[i]); !cAlike(conjSC[i], f) { + t.Errorf("Conj(%g) = %g, want %g", vcConjSC[i], f, conjSC[i]) + } + } +} +func TestCos(t *testing.T) { + for i := 0; i < len(vc); i++ { + if f := Cos(vc[i]); !cSoclose(cos[i], f, 3e-15) { + t.Errorf("Cos(%g) = %g, want %g", vc[i], f, cos[i]) + } + } + for _, v := range cosSC { + if f := Cos(v.in); !cAlike(v.want, f) { + t.Errorf("Cos(%g) = %g, want %g", v.in, f, v.want) + } + if math.IsNaN(imag(v.in)) || math.IsNaN(imag(v.want)) { + // Negating NaN is undefined with regard to the sign bit produced. + continue + } + // Cos(Conj(z)) == Cos(Cosh(z)) + if f := Cos(Conj(v.in)); !cAlike(Conj(v.want), f) && !cAlike(v.in, Conj(v.in)) { + t.Errorf("Cos(%g) = %g, want %g", Conj(v.in), f, Conj(v.want)) + } + if math.IsNaN(real(v.in)) || math.IsNaN(real(v.want)) { + // Negating NaN is undefined with regard to the sign bit produced. + continue + } + // Cos(-z) == Cos(z) + if f := Cos(-v.in); !cAlike(v.want, f) && !cAlike(v.in, -v.in) { + t.Errorf("Cos(%g) = %g, want %g", -v.in, f, v.want) + } + } +} +func TestCosh(t *testing.T) { + for i := 0; i < len(vc); i++ { + if f := Cosh(vc[i]); !cSoclose(cosh[i], f, 2e-15) { + t.Errorf("Cosh(%g) = %g, want %g", vc[i], f, cosh[i]) + } + } + for _, v := range coshSC { + if f := Cosh(v.in); !cAlike(v.want, f) { + t.Errorf("Cosh(%g) = %g, want %g", v.in, f, v.want) + } + if math.IsNaN(imag(v.in)) || math.IsNaN(imag(v.want)) { + // Negating NaN is undefined with regard to the sign bit produced. + continue + } + // Cosh(Conj(z)) == Conj(Cosh(z)) + if f := Cosh(Conj(v.in)); !cAlike(Conj(v.want), f) && !cAlike(v.in, Conj(v.in)) { + t.Errorf("Cosh(%g) = %g, want %g", Conj(v.in), f, Conj(v.want)) + } + if math.IsNaN(real(v.in)) || math.IsNaN(real(v.want)) { + // Negating NaN is undefined with regard to the sign bit produced. + continue + } + // Cosh(-z) == Cosh(z) + if f := Cosh(-v.in); !cAlike(v.want, f) && !cAlike(v.in, -v.in) { + t.Errorf("Cosh(%g) = %g, want %g", -v.in, f, v.want) + } + } +} +func TestExp(t *testing.T) { + for i := 0; i < len(vc); i++ { + if f := Exp(vc[i]); !cSoclose(exp[i], f, 1e-15) { + t.Errorf("Exp(%g) = %g, want %g", vc[i], f, exp[i]) + } + } + for _, v := range expSC { + if f := Exp(v.in); !cAlike(v.want, f) { + t.Errorf("Exp(%g) = %g, want %g", v.in, f, v.want) + } + if math.IsNaN(imag(v.in)) || math.IsNaN(imag(v.want)) { + // Negating NaN is undefined with regard to the sign bit produced. + continue + } + // Exp(Conj(z)) == Exp(Cosh(z)) + if f := Exp(Conj(v.in)); !cAlike(Conj(v.want), f) && !cAlike(v.in, Conj(v.in)) { + t.Errorf("Exp(%g) = %g, want %g", Conj(v.in), f, Conj(v.want)) + } + } +} +func TestIsNaN(t *testing.T) { + for i := 0; i < len(vcIsNaNSC); i++ { + if f := IsNaN(vcIsNaNSC[i]); isNaNSC[i] != f { + t.Errorf("IsNaN(%v) = %v, want %v", vcIsNaNSC[i], f, isNaNSC[i]) + } + } +} +func TestLog(t *testing.T) { + for i := 0; i < len(vc); i++ { + if f := Log(vc[i]); !cVeryclose(log[i], f) { + t.Errorf("Log(%g) = %g, want %g", vc[i], f, log[i]) + } + } + for _, v := range logSC { + if f := Log(v.in); !cAlike(v.want, f) { + t.Errorf("Log(%g) = %g, want %g", v.in, f, v.want) + } + if math.IsNaN(imag(v.in)) || math.IsNaN(imag(v.want)) { + // Negating NaN is undefined with regard to the sign bit produced. + continue + } + // Log(Conj(z)) == Conj(Log(z)) + if f := Log(Conj(v.in)); !cAlike(Conj(v.want), f) && !cAlike(v.in, Conj(v.in)) { + t.Errorf("Log(%g) = %g, want %g", Conj(v.in), f, Conj(v.want)) + } + } + for _, pt := range branchPoints { + if f0, f1 := Log(pt[0]), Log(pt[1]); !cVeryclose(f0, f1) { + t.Errorf("Log(%g) not continuous, got %g want %g", pt[0], f0, f1) + } + } +} +func TestLog10(t *testing.T) { + for i := 0; i < len(vc); i++ { + if f := Log10(vc[i]); !cVeryclose(log10[i], f) { + t.Errorf("Log10(%g) = %g, want %g", vc[i], f, log10[i]) + } + } + for _, v := range log10SC { + if f := Log10(v.in); !cAlike(v.want, f) { + t.Errorf("Log10(%g) = %g, want %g", v.in, f, v.want) + } + if math.IsNaN(imag(v.in)) || math.IsNaN(imag(v.want)) { + // Negating NaN is undefined with regard to the sign bit produced. + continue + } + // Log10(Conj(z)) == Conj(Log10(z)) + if f := Log10(Conj(v.in)); !cAlike(Conj(v.want), f) && !cAlike(v.in, Conj(v.in)) { + t.Errorf("Log10(%g) = %g, want %g", Conj(v.in), f, Conj(v.want)) + } + } +} +func TestPolar(t *testing.T) { + for i := 0; i < len(vc); i++ { + if r, theta := Polar(vc[i]); !veryclose(polar[i].r, r) && !veryclose(polar[i].theta, theta) { + t.Errorf("Polar(%g) = %g, %g want %g, %g", vc[i], r, theta, polar[i].r, polar[i].theta) + } + } + for i := 0; i < len(vcPolarSC); i++ { + if r, theta := Polar(vcPolarSC[i]); !alike(polarSC[i].r, r) && !alike(polarSC[i].theta, theta) { + t.Errorf("Polar(%g) = %g, %g, want %g, %g", vcPolarSC[i], r, theta, polarSC[i].r, polarSC[i].theta) + } + } +} +func TestPow(t *testing.T) { + // Special cases for Pow(0, c). + var zero = complex(0, 0) + zeroPowers := [][2]complex128{ + {0, 1 + 0i}, + {1.5, 0 + 0i}, + {-1.5, complex(math.Inf(0), 0)}, + {-1.5 + 1.5i, Inf()}, + } + for _, zp := range zeroPowers { + if f := Pow(zero, zp[0]); f != zp[1] { + t.Errorf("Pow(%g, %g) = %g, want %g", zero, zp[0], f, zp[1]) + } + } + var a = complex(3.0, 3.0) + for i := 0; i < len(vc); i++ { + if f := Pow(a, vc[i]); !cSoclose(pow[i], f, 4e-15) { + t.Errorf("Pow(%g, %g) = %g, want %g", a, vc[i], f, pow[i]) + } + } + for i := 0; i < len(vcPowSC); i++ { + if f := Pow(vcPowSC[i][0], vcPowSC[i][1]); !cAlike(powSC[i], f) { + t.Errorf("Pow(%g, %g) = %g, want %g", vcPowSC[i][0], vcPowSC[i][1], f, powSC[i]) + } + } + for _, pt := range branchPoints { + if f0, f1 := Pow(pt[0], 0.1), Pow(pt[1], 0.1); !cVeryclose(f0, f1) { + t.Errorf("Pow(%g, 0.1) not continuous, got %g want %g", pt[0], f0, f1) + } + } +} +func TestRect(t *testing.T) { + for i := 0; i < len(vc); i++ { + if f := Rect(polar[i].r, polar[i].theta); !cVeryclose(vc[i], f) { + t.Errorf("Rect(%g, %g) = %g want %g", polar[i].r, polar[i].theta, f, vc[i]) + } + } + for i := 0; i < len(vcPolarSC); i++ { + if f := Rect(polarSC[i].r, polarSC[i].theta); !cAlike(vcPolarSC[i], f) { + t.Errorf("Rect(%g, %g) = %g, want %g", polarSC[i].r, polarSC[i].theta, f, vcPolarSC[i]) + } + } +} +func TestSin(t *testing.T) { + for i := 0; i < len(vc); i++ { + if f := Sin(vc[i]); !cSoclose(sin[i], f, 2e-15) { + t.Errorf("Sin(%g) = %g, want %g", vc[i], f, sin[i]) + } + } + for _, v := range sinSC { + if f := Sin(v.in); !cAlike(v.want, f) { + t.Errorf("Sin(%g) = %g, want %g", v.in, f, v.want) + } + if math.IsNaN(imag(v.in)) || math.IsNaN(imag(v.want)) { + // Negating NaN is undefined with regard to the sign bit produced. + continue + } + // Sin(Conj(z)) == Conj(Sin(z)) + if f := Sin(Conj(v.in)); !cAlike(Conj(v.want), f) && !cAlike(v.in, Conj(v.in)) { + t.Errorf("Sinh(%g) = %g, want %g", Conj(v.in), f, Conj(v.want)) + } + if math.IsNaN(real(v.in)) || math.IsNaN(real(v.want)) { + // Negating NaN is undefined with regard to the sign bit produced. + continue + } + // Sin(-z) == -Sin(z) + if f := Sin(-v.in); !cAlike(-v.want, f) && !cAlike(v.in, -v.in) { + t.Errorf("Sinh(%g) = %g, want %g", -v.in, f, -v.want) + } + } +} +func TestSinh(t *testing.T) { + for i := 0; i < len(vc); i++ { + if f := Sinh(vc[i]); !cSoclose(sinh[i], f, 2e-15) { + t.Errorf("Sinh(%g) = %g, want %g", vc[i], f, sinh[i]) + } + } + for _, v := range sinhSC { + if f := Sinh(v.in); !cAlike(v.want, f) { + t.Errorf("Sinh(%g) = %g, want %g", v.in, f, v.want) + } + if math.IsNaN(imag(v.in)) || math.IsNaN(imag(v.want)) { + // Negating NaN is undefined with regard to the sign bit produced. + continue + } + // Sinh(Conj(z)) == Conj(Sinh(z)) + if f := Sinh(Conj(v.in)); !cAlike(Conj(v.want), f) && !cAlike(v.in, Conj(v.in)) { + t.Errorf("Sinh(%g) = %g, want %g", Conj(v.in), f, Conj(v.want)) + } + if math.IsNaN(real(v.in)) || math.IsNaN(real(v.want)) { + // Negating NaN is undefined with regard to the sign bit produced. + continue + } + // Sinh(-z) == -Sinh(z) + if f := Sinh(-v.in); !cAlike(-v.want, f) && !cAlike(v.in, -v.in) { + t.Errorf("Sinh(%g) = %g, want %g", -v.in, f, -v.want) + } + } +} +func TestSqrt(t *testing.T) { + for i := 0; i < len(vc); i++ { + if f := Sqrt(vc[i]); !cVeryclose(sqrt[i], f) { + t.Errorf("Sqrt(%g) = %g, want %g", vc[i], f, sqrt[i]) + } + } + for _, v := range sqrtSC { + if f := Sqrt(v.in); !cAlike(v.want, f) { + t.Errorf("Sqrt(%g) = %g, want %g", v.in, f, v.want) + } + if math.IsNaN(imag(v.in)) || math.IsNaN(imag(v.want)) { + // Negating NaN is undefined with regard to the sign bit produced. + continue + } + // Sqrt(Conj(z)) == Conj(Sqrt(z)) + if f := Sqrt(Conj(v.in)); !cAlike(Conj(v.want), f) && !cAlike(v.in, Conj(v.in)) { + t.Errorf("Sqrt(%g) = %g, want %g", Conj(v.in), f, Conj(v.want)) + } + } + for _, pt := range branchPoints { + if f0, f1 := Sqrt(pt[0]), Sqrt(pt[1]); !cVeryclose(f0, f1) { + t.Errorf("Sqrt(%g) not continuous, got %g want %g", pt[0], f0, f1) + } + } +} +func TestTan(t *testing.T) { + for i := 0; i < len(vc); i++ { + if f := Tan(vc[i]); !cSoclose(tan[i], f, 3e-15) { + t.Errorf("Tan(%g) = %g, want %g", vc[i], f, tan[i]) + } + } + for _, v := range tanSC { + if f := Tan(v.in); !cAlike(v.want, f) { + t.Errorf("Tan(%g) = %g, want %g", v.in, f, v.want) + } + if math.IsNaN(imag(v.in)) || math.IsNaN(imag(v.want)) { + // Negating NaN is undefined with regard to the sign bit produced. + continue + } + // Tan(Conj(z)) == Conj(Tan(z)) + if f := Tan(Conj(v.in)); !cAlike(Conj(v.want), f) && !cAlike(v.in, Conj(v.in)) { + t.Errorf("Tan(%g) = %g, want %g", Conj(v.in), f, Conj(v.want)) + } + if math.IsNaN(real(v.in)) || math.IsNaN(real(v.want)) { + // Negating NaN is undefined with regard to the sign bit produced. + continue + } + // Tan(-z) == -Tan(z) + if f := Tan(-v.in); !cAlike(-v.want, f) && !cAlike(v.in, -v.in) { + t.Errorf("Tan(%g) = %g, want %g", -v.in, f, -v.want) + } + } +} +func TestTanh(t *testing.T) { + for i := 0; i < len(vc); i++ { + if f := Tanh(vc[i]); !cSoclose(tanh[i], f, 2e-15) { + t.Errorf("Tanh(%g) = %g, want %g", vc[i], f, tanh[i]) + } + } + for _, v := range tanhSC { + if f := Tanh(v.in); !cAlike(v.want, f) { + t.Errorf("Tanh(%g) = %g, want %g", v.in, f, v.want) + } + if math.IsNaN(imag(v.in)) || math.IsNaN(imag(v.want)) { + // Negating NaN is undefined with regard to the sign bit produced. + continue + } + // Tanh(Conj(z)) == Conj(Tanh(z)) + if f := Tanh(Conj(v.in)); !cAlike(Conj(v.want), f) && !cAlike(v.in, Conj(v.in)) { + t.Errorf("Tanh(%g) = %g, want %g", Conj(v.in), f, Conj(v.want)) + } + if math.IsNaN(real(v.in)) || math.IsNaN(real(v.want)) { + // Negating NaN is undefined with regard to the sign bit produced. + continue + } + // Tanh(-z) == -Tanh(z) + if f := Tanh(-v.in); !cAlike(-v.want, f) && !cAlike(v.in, -v.in) { + t.Errorf("Tanh(%g) = %g, want %g", -v.in, f, -v.want) + } + } +} + +// See issue 17577 +func TestInfiniteLoopIntanSeries(t *testing.T) { + want := Inf() + if got := Cot(0); got != want { + t.Errorf("Cot(0): got %g, want %g", got, want) + } +} + +func BenchmarkAbs(b *testing.B) { + for i := 0; i < b.N; i++ { + Abs(complex(2.5, 3.5)) + } +} +func BenchmarkAcos(b *testing.B) { + for i := 0; i < b.N; i++ { + Acos(complex(2.5, 3.5)) + } +} +func BenchmarkAcosh(b *testing.B) { + for i := 0; i < b.N; i++ { + Acosh(complex(2.5, 3.5)) + } +} +func BenchmarkAsin(b *testing.B) { + for i := 0; i < b.N; i++ { + Asin(complex(2.5, 3.5)) + } +} +func BenchmarkAsinh(b *testing.B) { + for i := 0; i < b.N; i++ { + Asinh(complex(2.5, 3.5)) + } +} +func BenchmarkAtan(b *testing.B) { + for i := 0; i < b.N; i++ { + Atan(complex(2.5, 3.5)) + } +} +func BenchmarkAtanh(b *testing.B) { + for i := 0; i < b.N; i++ { + Atanh(complex(2.5, 3.5)) + } +} +func BenchmarkConj(b *testing.B) { + for i := 0; i < b.N; i++ { + Conj(complex(2.5, 3.5)) + } +} +func BenchmarkCos(b *testing.B) { + for i := 0; i < b.N; i++ { + Cos(complex(2.5, 3.5)) + } +} +func BenchmarkCosh(b *testing.B) { + for i := 0; i < b.N; i++ { + Cosh(complex(2.5, 3.5)) + } +} +func BenchmarkExp(b *testing.B) { + for i := 0; i < b.N; i++ { + Exp(complex(2.5, 3.5)) + } +} +func BenchmarkLog(b *testing.B) { + for i := 0; i < b.N; i++ { + Log(complex(2.5, 3.5)) + } +} +func BenchmarkLog10(b *testing.B) { + for i := 0; i < b.N; i++ { + Log10(complex(2.5, 3.5)) + } +} +func BenchmarkPhase(b *testing.B) { + for i := 0; i < b.N; i++ { + Phase(complex(2.5, 3.5)) + } +} +func BenchmarkPolar(b *testing.B) { + for i := 0; i < b.N; i++ { + Polar(complex(2.5, 3.5)) + } +} +func BenchmarkPow(b *testing.B) { + for i := 0; i < b.N; i++ { + Pow(complex(2.5, 3.5), complex(2.5, 3.5)) + } +} +func BenchmarkRect(b *testing.B) { + for i := 0; i < b.N; i++ { + Rect(2.5, 1.5) + } +} +func BenchmarkSin(b *testing.B) { + for i := 0; i < b.N; i++ { + Sin(complex(2.5, 3.5)) + } +} +func BenchmarkSinh(b *testing.B) { + for i := 0; i < b.N; i++ { + Sinh(complex(2.5, 3.5)) + } +} +func BenchmarkSqrt(b *testing.B) { + for i := 0; i < b.N; i++ { + Sqrt(complex(2.5, 3.5)) + } +} +func BenchmarkTan(b *testing.B) { + for i := 0; i < b.N; i++ { + Tan(complex(2.5, 3.5)) + } +} +func BenchmarkTanh(b *testing.B) { + for i := 0; i < b.N; i++ { + Tanh(complex(2.5, 3.5)) + } +} diff --git a/src/math/cmplx/conj.go b/src/math/cmplx/conj.go new file mode 100644 index 0000000..34a4277 --- /dev/null +++ b/src/math/cmplx/conj.go @@ -0,0 +1,8 @@ +// Copyright 2010 The Go Authors. All rights reserved. +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. + +package cmplx + +// Conj returns the complex conjugate of x. +func Conj(x complex128) complex128 { return complex(real(x), -imag(x)) } diff --git a/src/math/cmplx/example_test.go b/src/math/cmplx/example_test.go new file mode 100644 index 0000000..f0ed963 --- /dev/null +++ b/src/math/cmplx/example_test.go @@ -0,0 +1,28 @@ +// Copyright 2016 The Go Authors. All rights reserved. +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. + +package cmplx_test + +import ( + "fmt" + "math" + "math/cmplx" +) + +func ExampleAbs() { + fmt.Printf("%.1f", cmplx.Abs(3+4i)) + // Output: 5.0 +} + +// ExampleExp computes Euler's identity. +func ExampleExp() { + fmt.Printf("%.1f", cmplx.Exp(1i*math.Pi)+1) + // Output: (0.0+0.0i) +} + +func ExamplePolar() { + r, theta := cmplx.Polar(2i) + fmt.Printf("r: %.1f, θ: %.1f*π", r, theta/math.Pi) + // Output: r: 2.0, θ: 0.5*π +} diff --git a/src/math/cmplx/exp.go b/src/math/cmplx/exp.go new file mode 100644 index 0000000..d5d0a5d --- /dev/null +++ b/src/math/cmplx/exp.go @@ -0,0 +1,72 @@ +// Copyright 2010 The Go Authors. All rights reserved. +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. + +package cmplx + +import "math" + +// The original C code, the long comment, and the constants +// below are from http://netlib.sandia.gov/cephes/c9x-complex/clog.c. +// The go code is a simplified version of the original C. +// +// Cephes Math Library Release 2.8: June, 2000 +// Copyright 1984, 1987, 1989, 1992, 2000 by Stephen L. Moshier +// +// The readme file at http://netlib.sandia.gov/cephes/ says: +// Some software in this archive may be from the book _Methods and +// Programs for Mathematical Functions_ (Prentice-Hall or Simon & Schuster +// International, 1989) or from the Cephes Mathematical Library, a +// commercial product. In either event, it is copyrighted by the author. +// What you see here may be used freely but it comes with no support or +// guarantee. +// +// The two known misprints in the book are repaired here in the +// source listings for the gamma function and the incomplete beta +// integral. +// +// Stephen L. Moshier +// moshier@na-net.ornl.gov + +// Complex exponential function +// +// DESCRIPTION: +// +// Returns the complex exponential of the complex argument z. +// +// If +// z = x + iy, +// r = exp(x), +// then +// w = r cos y + i r sin y. +// +// ACCURACY: +// +// Relative error: +// arithmetic domain # trials peak rms +// DEC -10,+10 8700 3.7e-17 1.1e-17 +// IEEE -10,+10 30000 3.0e-16 8.7e-17 + +// Exp returns e**x, the base-e exponential of x. +func Exp(x complex128) complex128 { + switch re, im := real(x), imag(x); { + case math.IsInf(re, 0): + switch { + case re > 0 && im == 0: + return x + case math.IsInf(im, 0) || math.IsNaN(im): + if re < 0 { + return complex(0, math.Copysign(0, im)) + } else { + return complex(math.Inf(1.0), math.NaN()) + } + } + case math.IsNaN(re): + if im == 0 { + return complex(math.NaN(), im) + } + } + r := math.Exp(real(x)) + s, c := math.Sincos(imag(x)) + return complex(r*c, r*s) +} diff --git a/src/math/cmplx/huge_test.go b/src/math/cmplx/huge_test.go new file mode 100644 index 0000000..e794cf2 --- /dev/null +++ b/src/math/cmplx/huge_test.go @@ -0,0 +1,22 @@ +// Copyright 2020 The Go Authors. All rights reserved. +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. + +// Disabled for s390x because it uses assembly routines that are not +// accurate for huge arguments. + +//go:build !s390x + +package cmplx + +import ( + "testing" +) + +func TestTanHuge(t *testing.T) { + for i, x := range hugeIn { + if f := Tan(x); !cSoclose(tanHuge[i], f, 3e-15) { + t.Errorf("Tan(%g) = %g, want %g", x, f, tanHuge[i]) + } + } +} diff --git a/src/math/cmplx/isinf.go b/src/math/cmplx/isinf.go new file mode 100644 index 0000000..6273cd3 --- /dev/null +++ b/src/math/cmplx/isinf.go @@ -0,0 +1,21 @@ +// Copyright 2010 The Go Authors. All rights reserved. +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. + +package cmplx + +import "math" + +// IsInf reports whether either real(x) or imag(x) is an infinity. +func IsInf(x complex128) bool { + if math.IsInf(real(x), 0) || math.IsInf(imag(x), 0) { + return true + } + return false +} + +// Inf returns a complex infinity, complex(+Inf, +Inf). +func Inf() complex128 { + inf := math.Inf(1) + return complex(inf, inf) +} diff --git a/src/math/cmplx/isnan.go b/src/math/cmplx/isnan.go new file mode 100644 index 0000000..d3382c0 --- /dev/null +++ b/src/math/cmplx/isnan.go @@ -0,0 +1,25 @@ +// Copyright 2010 The Go Authors. All rights reserved. +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. + +package cmplx + +import "math" + +// IsNaN reports whether either real(x) or imag(x) is NaN +// and neither is an infinity. +func IsNaN(x complex128) bool { + switch { + case math.IsInf(real(x), 0) || math.IsInf(imag(x), 0): + return false + case math.IsNaN(real(x)) || math.IsNaN(imag(x)): + return true + } + return false +} + +// NaN returns a complex ``not-a-number'' value. +func NaN() complex128 { + nan := math.NaN() + return complex(nan, nan) +} diff --git a/src/math/cmplx/log.go b/src/math/cmplx/log.go new file mode 100644 index 0000000..fd39c76 --- /dev/null +++ b/src/math/cmplx/log.go @@ -0,0 +1,65 @@ +// Copyright 2010 The Go Authors. All rights reserved. +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. + +package cmplx + +import "math" + +// The original C code, the long comment, and the constants +// below are from http://netlib.sandia.gov/cephes/c9x-complex/clog.c. +// The go code is a simplified version of the original C. +// +// Cephes Math Library Release 2.8: June, 2000 +// Copyright 1984, 1987, 1989, 1992, 2000 by Stephen L. Moshier +// +// The readme file at http://netlib.sandia.gov/cephes/ says: +// Some software in this archive may be from the book _Methods and +// Programs for Mathematical Functions_ (Prentice-Hall or Simon & Schuster +// International, 1989) or from the Cephes Mathematical Library, a +// commercial product. In either event, it is copyrighted by the author. +// What you see here may be used freely but it comes with no support or +// guarantee. +// +// The two known misprints in the book are repaired here in the +// source listings for the gamma function and the incomplete beta +// integral. +// +// Stephen L. Moshier +// moshier@na-net.ornl.gov + +// Complex natural logarithm +// +// DESCRIPTION: +// +// Returns complex logarithm to the base e (2.718...) of +// the complex argument z. +// +// If +// z = x + iy, r = sqrt( x**2 + y**2 ), +// then +// w = log(r) + i arctan(y/x). +// +// The arctangent ranges from -PI to +PI. +// +// ACCURACY: +// +// Relative error: +// arithmetic domain # trials peak rms +// DEC -10,+10 7000 8.5e-17 1.9e-17 +// IEEE -10,+10 30000 5.0e-15 1.1e-16 +// +// Larger relative error can be observed for z near 1 +i0. +// In IEEE arithmetic the peak absolute error is 5.2e-16, rms +// absolute error 1.0e-16. + +// Log returns the natural logarithm of x. +func Log(x complex128) complex128 { + return complex(math.Log(Abs(x)), Phase(x)) +} + +// Log10 returns the decimal logarithm of x. +func Log10(x complex128) complex128 { + z := Log(x) + return complex(math.Log10E*real(z), math.Log10E*imag(z)) +} diff --git a/src/math/cmplx/phase.go b/src/math/cmplx/phase.go new file mode 100644 index 0000000..03cece8 --- /dev/null +++ b/src/math/cmplx/phase.go @@ -0,0 +1,11 @@ +// Copyright 2010 The Go Authors. All rights reserved. +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. + +package cmplx + +import "math" + +// Phase returns the phase (also called the argument) of x. +// The returned value is in the range [-Pi, Pi]. +func Phase(x complex128) float64 { return math.Atan2(imag(x), real(x)) } diff --git a/src/math/cmplx/polar.go b/src/math/cmplx/polar.go new file mode 100644 index 0000000..9b192bc --- /dev/null +++ b/src/math/cmplx/polar.go @@ -0,0 +1,12 @@ +// Copyright 2010 The Go Authors. All rights reserved. +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. + +package cmplx + +// Polar returns the absolute value r and phase θ of x, +// such that x = r * e**θi. +// The phase is in the range [-Pi, Pi]. +func Polar(x complex128) (r, θ float64) { + return Abs(x), Phase(x) +} diff --git a/src/math/cmplx/pow.go b/src/math/cmplx/pow.go new file mode 100644 index 0000000..5a405f8 --- /dev/null +++ b/src/math/cmplx/pow.go @@ -0,0 +1,81 @@ +// Copyright 2010 The Go Authors. All rights reserved. +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. + +package cmplx + +import "math" + +// The original C code, the long comment, and the constants +// below are from http://netlib.sandia.gov/cephes/c9x-complex/clog.c. +// The go code is a simplified version of the original C. +// +// Cephes Math Library Release 2.8: June, 2000 +// Copyright 1984, 1987, 1989, 1992, 2000 by Stephen L. Moshier +// +// The readme file at http://netlib.sandia.gov/cephes/ says: +// Some software in this archive may be from the book _Methods and +// Programs for Mathematical Functions_ (Prentice-Hall or Simon & Schuster +// International, 1989) or from the Cephes Mathematical Library, a +// commercial product. In either event, it is copyrighted by the author. +// What you see here may be used freely but it comes with no support or +// guarantee. +// +// The two known misprints in the book are repaired here in the +// source listings for the gamma function and the incomplete beta +// integral. +// +// Stephen L. Moshier +// moshier@na-net.ornl.gov + +// Complex power function +// +// DESCRIPTION: +// +// Raises complex A to the complex Zth power. +// Definition is per AMS55 # 4.2.8, +// analytically equivalent to cpow(a,z) = cexp(z clog(a)). +// +// ACCURACY: +// +// Relative error: +// arithmetic domain # trials peak rms +// IEEE -10,+10 30000 9.4e-15 1.5e-15 + +// Pow returns x**y, the base-x exponential of y. +// For generalized compatibility with math.Pow: +// Pow(0, ±0) returns 1+0i +// Pow(0, c) for real(c)<0 returns Inf+0i if imag(c) is zero, otherwise Inf+Inf i. +func Pow(x, y complex128) complex128 { + if x == 0 { // Guaranteed also true for x == -0. + if IsNaN(y) { + return NaN() + } + r, i := real(y), imag(y) + switch { + case r == 0: + return 1 + case r < 0: + if i == 0 { + return complex(math.Inf(1), 0) + } + return Inf() + case r > 0: + return 0 + } + panic("not reached") + } + modulus := Abs(x) + if modulus == 0 { + return complex(0, 0) + } + r := math.Pow(modulus, real(y)) + arg := Phase(x) + theta := real(y) * arg + if imag(y) != 0 { + r *= math.Exp(-imag(y) * arg) + theta += imag(y) * math.Log(modulus) + } + s, c := math.Sincos(theta) + return complex(r*c, r*s) +} diff --git a/src/math/cmplx/rect.go b/src/math/cmplx/rect.go new file mode 100644 index 0000000..bf94d78 --- /dev/null +++ b/src/math/cmplx/rect.go @@ -0,0 +1,13 @@ +// Copyright 2010 The Go Authors. All rights reserved. +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. + +package cmplx + +import "math" + +// Rect returns the complex number x with polar coordinates r, θ. +func Rect(r, θ float64) complex128 { + s, c := math.Sincos(θ) + return complex(r*c, r*s) +} diff --git a/src/math/cmplx/sin.go b/src/math/cmplx/sin.go new file mode 100644 index 0000000..febac0e --- /dev/null +++ b/src/math/cmplx/sin.go @@ -0,0 +1,184 @@ +// Copyright 2010 The Go Authors. All rights reserved. +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. + +package cmplx + +import "math" + +// The original C code, the long comment, and the constants +// below are from http://netlib.sandia.gov/cephes/c9x-complex/clog.c. +// The go code is a simplified version of the original C. +// +// Cephes Math Library Release 2.8: June, 2000 +// Copyright 1984, 1987, 1989, 1992, 2000 by Stephen L. Moshier +// +// The readme file at http://netlib.sandia.gov/cephes/ says: +// Some software in this archive may be from the book _Methods and +// Programs for Mathematical Functions_ (Prentice-Hall or Simon & Schuster +// International, 1989) or from the Cephes Mathematical Library, a +// commercial product. In either event, it is copyrighted by the author. +// What you see here may be used freely but it comes with no support or +// guarantee. +// +// The two known misprints in the book are repaired here in the +// source listings for the gamma function and the incomplete beta +// integral. +// +// Stephen L. Moshier +// moshier@na-net.ornl.gov + +// Complex circular sine +// +// DESCRIPTION: +// +// If +// z = x + iy, +// +// then +// +// w = sin x cosh y + i cos x sinh y. +// +// csin(z) = -i csinh(iz). +// +// ACCURACY: +// +// Relative error: +// arithmetic domain # trials peak rms +// DEC -10,+10 8400 5.3e-17 1.3e-17 +// IEEE -10,+10 30000 3.8e-16 1.0e-16 +// Also tested by csin(casin(z)) = z. + +// Sin returns the sine of x. +func Sin(x complex128) complex128 { + switch re, im := real(x), imag(x); { + case im == 0 && (math.IsInf(re, 0) || math.IsNaN(re)): + return complex(math.NaN(), im) + case math.IsInf(im, 0): + switch { + case re == 0: + return x + case math.IsInf(re, 0) || math.IsNaN(re): + return complex(math.NaN(), im) + } + case re == 0 && math.IsNaN(im): + return x + } + s, c := math.Sincos(real(x)) + sh, ch := sinhcosh(imag(x)) + return complex(s*ch, c*sh) +} + +// Complex hyperbolic sine +// +// DESCRIPTION: +// +// csinh z = (cexp(z) - cexp(-z))/2 +// = sinh x * cos y + i cosh x * sin y . +// +// ACCURACY: +// +// Relative error: +// arithmetic domain # trials peak rms +// IEEE -10,+10 30000 3.1e-16 8.2e-17 + +// Sinh returns the hyperbolic sine of x. +func Sinh(x complex128) complex128 { + switch re, im := real(x), imag(x); { + case re == 0 && (math.IsInf(im, 0) || math.IsNaN(im)): + return complex(re, math.NaN()) + case math.IsInf(re, 0): + switch { + case im == 0: + return complex(re, im) + case math.IsInf(im, 0) || math.IsNaN(im): + return complex(re, math.NaN()) + } + case im == 0 && math.IsNaN(re): + return complex(math.NaN(), im) + } + s, c := math.Sincos(imag(x)) + sh, ch := sinhcosh(real(x)) + return complex(c*sh, s*ch) +} + +// Complex circular cosine +// +// DESCRIPTION: +// +// If +// z = x + iy, +// +// then +// +// w = cos x cosh y - i sin x sinh y. +// +// ACCURACY: +// +// Relative error: +// arithmetic domain # trials peak rms +// DEC -10,+10 8400 4.5e-17 1.3e-17 +// IEEE -10,+10 30000 3.8e-16 1.0e-16 + +// Cos returns the cosine of x. +func Cos(x complex128) complex128 { + switch re, im := real(x), imag(x); { + case im == 0 && (math.IsInf(re, 0) || math.IsNaN(re)): + return complex(math.NaN(), -im*math.Copysign(0, re)) + case math.IsInf(im, 0): + switch { + case re == 0: + return complex(math.Inf(1), -re*math.Copysign(0, im)) + case math.IsInf(re, 0) || math.IsNaN(re): + return complex(math.Inf(1), math.NaN()) + } + case re == 0 && math.IsNaN(im): + return complex(math.NaN(), 0) + } + s, c := math.Sincos(real(x)) + sh, ch := sinhcosh(imag(x)) + return complex(c*ch, -s*sh) +} + +// Complex hyperbolic cosine +// +// DESCRIPTION: +// +// ccosh(z) = cosh x cos y + i sinh x sin y . +// +// ACCURACY: +// +// Relative error: +// arithmetic domain # trials peak rms +// IEEE -10,+10 30000 2.9e-16 8.1e-17 + +// Cosh returns the hyperbolic cosine of x. +func Cosh(x complex128) complex128 { + switch re, im := real(x), imag(x); { + case re == 0 && (math.IsInf(im, 0) || math.IsNaN(im)): + return complex(math.NaN(), re*math.Copysign(0, im)) + case math.IsInf(re, 0): + switch { + case im == 0: + return complex(math.Inf(1), im*math.Copysign(0, re)) + case math.IsInf(im, 0) || math.IsNaN(im): + return complex(math.Inf(1), math.NaN()) + } + case im == 0 && math.IsNaN(re): + return complex(math.NaN(), im) + } + s, c := math.Sincos(imag(x)) + sh, ch := sinhcosh(real(x)) + return complex(c*ch, s*sh) +} + +// calculate sinh and cosh +func sinhcosh(x float64) (sh, ch float64) { + if math.Abs(x) <= 0.5 { + return math.Sinh(x), math.Cosh(x) + } + e := math.Exp(x) + ei := 0.5 / e + e *= 0.5 + return e - ei, e + ei +} diff --git a/src/math/cmplx/sqrt.go b/src/math/cmplx/sqrt.go new file mode 100644 index 0000000..eddce2f --- /dev/null +++ b/src/math/cmplx/sqrt.go @@ -0,0 +1,107 @@ +// Copyright 2010 The Go Authors. All rights reserved. +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. + +package cmplx + +import "math" + +// The original C code, the long comment, and the constants +// below are from http://netlib.sandia.gov/cephes/c9x-complex/clog.c. +// The go code is a simplified version of the original C. +// +// Cephes Math Library Release 2.8: June, 2000 +// Copyright 1984, 1987, 1989, 1992, 2000 by Stephen L. Moshier +// +// The readme file at http://netlib.sandia.gov/cephes/ says: +// Some software in this archive may be from the book _Methods and +// Programs for Mathematical Functions_ (Prentice-Hall or Simon & Schuster +// International, 1989) or from the Cephes Mathematical Library, a +// commercial product. In either event, it is copyrighted by the author. +// What you see here may be used freely but it comes with no support or +// guarantee. +// +// The two known misprints in the book are repaired here in the +// source listings for the gamma function and the incomplete beta +// integral. +// +// Stephen L. Moshier +// moshier@na-net.ornl.gov + +// Complex square root +// +// DESCRIPTION: +// +// If z = x + iy, r = |z|, then +// +// 1/2 +// Re w = [ (r + x)/2 ] , +// +// 1/2 +// Im w = [ (r - x)/2 ] . +// +// Cancellation error in r-x or r+x is avoided by using the +// identity 2 Re w Im w = y. +// +// Note that -w is also a square root of z. The root chosen +// is always in the right half plane and Im w has the same sign as y. +// +// ACCURACY: +// +// Relative error: +// arithmetic domain # trials peak rms +// DEC -10,+10 25000 3.2e-17 9.6e-18 +// IEEE -10,+10 1,000,000 2.9e-16 6.1e-17 + +// Sqrt returns the square root of x. +// The result r is chosen so that real(r) ≥ 0 and imag(r) has the same sign as imag(x). +func Sqrt(x complex128) complex128 { + if imag(x) == 0 { + // Ensure that imag(r) has the same sign as imag(x) for imag(x) == signed zero. + if real(x) == 0 { + return complex(0, imag(x)) + } + if real(x) < 0 { + return complex(0, math.Copysign(math.Sqrt(-real(x)), imag(x))) + } + return complex(math.Sqrt(real(x)), imag(x)) + } else if math.IsInf(imag(x), 0) { + return complex(math.Inf(1.0), imag(x)) + } + if real(x) == 0 { + if imag(x) < 0 { + r := math.Sqrt(-0.5 * imag(x)) + return complex(r, -r) + } + r := math.Sqrt(0.5 * imag(x)) + return complex(r, r) + } + a := real(x) + b := imag(x) + var scale float64 + // Rescale to avoid internal overflow or underflow. + if math.Abs(a) > 4 || math.Abs(b) > 4 { + a *= 0.25 + b *= 0.25 + scale = 2 + } else { + a *= 1.8014398509481984e16 // 2**54 + b *= 1.8014398509481984e16 + scale = 7.450580596923828125e-9 // 2**-27 + } + r := math.Hypot(a, b) + var t float64 + if a > 0 { + t = math.Sqrt(0.5*r + 0.5*a) + r = scale * math.Abs((0.5*b)/t) + t *= scale + } else { + r = math.Sqrt(0.5*r - 0.5*a) + t = scale * math.Abs((0.5*b)/r) + r *= scale + } + if b < 0 { + return complex(t, -r) + } + return complex(t, r) +} diff --git a/src/math/cmplx/tan.go b/src/math/cmplx/tan.go new file mode 100644 index 0000000..67a1133 --- /dev/null +++ b/src/math/cmplx/tan.go @@ -0,0 +1,297 @@ +// Copyright 2010 The Go Authors. All rights reserved. +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. + +package cmplx + +import ( + "math" + "math/bits" +) + +// The original C code, the long comment, and the constants +// below are from http://netlib.sandia.gov/cephes/c9x-complex/clog.c. +// The go code is a simplified version of the original C. +// +// Cephes Math Library Release 2.8: June, 2000 +// Copyright 1984, 1987, 1989, 1992, 2000 by Stephen L. Moshier +// +// The readme file at http://netlib.sandia.gov/cephes/ says: +// Some software in this archive may be from the book _Methods and +// Programs for Mathematical Functions_ (Prentice-Hall or Simon & Schuster +// International, 1989) or from the Cephes Mathematical Library, a +// commercial product. In either event, it is copyrighted by the author. +// What you see here may be used freely but it comes with no support or +// guarantee. +// +// The two known misprints in the book are repaired here in the +// source listings for the gamma function and the incomplete beta +// integral. +// +// Stephen L. Moshier +// moshier@na-net.ornl.gov + +// Complex circular tangent +// +// DESCRIPTION: +// +// If +// z = x + iy, +// +// then +// +// sin 2x + i sinh 2y +// w = --------------------. +// cos 2x + cosh 2y +// +// On the real axis the denominator is zero at odd multiples +// of PI/2. The denominator is evaluated by its Taylor +// series near these points. +// +// ctan(z) = -i ctanh(iz). +// +// ACCURACY: +// +// Relative error: +// arithmetic domain # trials peak rms +// DEC -10,+10 5200 7.1e-17 1.6e-17 +// IEEE -10,+10 30000 7.2e-16 1.2e-16 +// Also tested by ctan * ccot = 1 and catan(ctan(z)) = z. + +// Tan returns the tangent of x. +func Tan(x complex128) complex128 { + switch re, im := real(x), imag(x); { + case math.IsInf(im, 0): + switch { + case math.IsInf(re, 0) || math.IsNaN(re): + return complex(math.Copysign(0, re), math.Copysign(1, im)) + } + return complex(math.Copysign(0, math.Sin(2*re)), math.Copysign(1, im)) + case re == 0 && math.IsNaN(im): + return x + } + d := math.Cos(2*real(x)) + math.Cosh(2*imag(x)) + if math.Abs(d) < 0.25 { + d = tanSeries(x) + } + if d == 0 { + return Inf() + } + return complex(math.Sin(2*real(x))/d, math.Sinh(2*imag(x))/d) +} + +// Complex hyperbolic tangent +// +// DESCRIPTION: +// +// tanh z = (sinh 2x + i sin 2y) / (cosh 2x + cos 2y) . +// +// ACCURACY: +// +// Relative error: +// arithmetic domain # trials peak rms +// IEEE -10,+10 30000 1.7e-14 2.4e-16 + +// Tanh returns the hyperbolic tangent of x. +func Tanh(x complex128) complex128 { + switch re, im := real(x), imag(x); { + case math.IsInf(re, 0): + switch { + case math.IsInf(im, 0) || math.IsNaN(im): + return complex(math.Copysign(1, re), math.Copysign(0, im)) + } + return complex(math.Copysign(1, re), math.Copysign(0, math.Sin(2*im))) + case im == 0 && math.IsNaN(re): + return x + } + d := math.Cosh(2*real(x)) + math.Cos(2*imag(x)) + if d == 0 { + return Inf() + } + return complex(math.Sinh(2*real(x))/d, math.Sin(2*imag(x))/d) +} + +// reducePi reduces the input argument x to the range (-Pi/2, Pi/2]. +// x must be greater than or equal to 0. For small arguments it +// uses Cody-Waite reduction in 3 float64 parts based on: +// "Elementary Function Evaluation: Algorithms and Implementation" +// Jean-Michel Muller, 1997. +// For very large arguments it uses Payne-Hanek range reduction based on: +// "ARGUMENT REDUCTION FOR HUGE ARGUMENTS: Good to the Last Bit" +// K. C. Ng et al, March 24, 1992. +func reducePi(x float64) float64 { + // reduceThreshold is the maximum value of x where the reduction using + // Cody-Waite reduction still gives accurate results. This threshold + // is set by t*PIn being representable as a float64 without error + // where t is given by t = floor(x * (1 / Pi)) and PIn are the leading partial + // terms of Pi. Since the leading terms, PI1 and PI2 below, have 30 and 32 + // trailing zero bits respectively, t should have less than 30 significant bits. + // t < 1<<30 -> floor(x*(1/Pi)+0.5) < 1<<30 -> x < (1<<30-1) * Pi - 0.5 + // So, conservatively we can take x < 1<<30. + const reduceThreshold float64 = 1 << 30 + if math.Abs(x) < reduceThreshold { + // Use Cody-Waite reduction in three parts. + const ( + // PI1, PI2 and PI3 comprise an extended precision value of PI + // such that PI ~= PI1 + PI2 + PI3. The parts are chosen so + // that PI1 and PI2 have an approximately equal number of trailing + // zero bits. This ensures that t*PI1 and t*PI2 are exact for + // large integer values of t. The full precision PI3 ensures the + // approximation of PI is accurate to 102 bits to handle cancellation + // during subtraction. + PI1 = 3.141592502593994 // 0x400921fb40000000 + PI2 = 1.5099578831723193e-07 // 0x3e84442d00000000 + PI3 = 1.0780605716316238e-14 // 0x3d08469898cc5170 + ) + t := x / math.Pi + t += 0.5 + t = float64(int64(t)) // int64(t) = the multiple + return ((x - t*PI1) - t*PI2) - t*PI3 + } + // Must apply Payne-Hanek range reduction + const ( + mask = 0x7FF + shift = 64 - 11 - 1 + bias = 1023 + fracMask = 1<<shift - 1 + ) + // Extract out the integer and exponent such that, + // x = ix * 2 ** exp. + ix := math.Float64bits(x) + exp := int(ix>>shift&mask) - bias - shift + ix &= fracMask + ix |= 1 << shift + + // mPi is the binary digits of 1/Pi as a uint64 array, + // that is, 1/Pi = Sum mPi[i]*2^(-64*i). + // 19 64-bit digits give 1216 bits of precision + // to handle the largest possible float64 exponent. + var mPi = [...]uint64{ + 0x0000000000000000, + 0x517cc1b727220a94, + 0xfe13abe8fa9a6ee0, + 0x6db14acc9e21c820, + 0xff28b1d5ef5de2b0, + 0xdb92371d2126e970, + 0x0324977504e8c90e, + 0x7f0ef58e5894d39f, + 0x74411afa975da242, + 0x74ce38135a2fbf20, + 0x9cc8eb1cc1a99cfa, + 0x4e422fc5defc941d, + 0x8ffc4bffef02cc07, + 0xf79788c5ad05368f, + 0xb69b3f6793e584db, + 0xa7a31fb34f2ff516, + 0xba93dd63f5f2f8bd, + 0x9e839cfbc5294975, + 0x35fdafd88fc6ae84, + 0x2b0198237e3db5d5, + } + // Use the exponent to extract the 3 appropriate uint64 digits from mPi, + // B ~ (z0, z1, z2), such that the product leading digit has the exponent -64. + // Note, exp >= 50 since x >= reduceThreshold and exp < 971 for maximum float64. + digit, bitshift := uint(exp+64)/64, uint(exp+64)%64 + z0 := (mPi[digit] << bitshift) | (mPi[digit+1] >> (64 - bitshift)) + z1 := (mPi[digit+1] << bitshift) | (mPi[digit+2] >> (64 - bitshift)) + z2 := (mPi[digit+2] << bitshift) | (mPi[digit+3] >> (64 - bitshift)) + // Multiply mantissa by the digits and extract the upper two digits (hi, lo). + z2hi, _ := bits.Mul64(z2, ix) + z1hi, z1lo := bits.Mul64(z1, ix) + z0lo := z0 * ix + lo, c := bits.Add64(z1lo, z2hi, 0) + hi, _ := bits.Add64(z0lo, z1hi, c) + // Find the magnitude of the fraction. + lz := uint(bits.LeadingZeros64(hi)) + e := uint64(bias - (lz + 1)) + // Clear implicit mantissa bit and shift into place. + hi = (hi << (lz + 1)) | (lo >> (64 - (lz + 1))) + hi >>= 64 - shift + // Include the exponent and convert to a float. + hi |= e << shift + x = math.Float64frombits(hi) + // map to (-Pi/2, Pi/2] + if x > 0.5 { + x-- + } + return math.Pi * x +} + +// Taylor series expansion for cosh(2y) - cos(2x) +func tanSeries(z complex128) float64 { + const MACHEP = 1.0 / (1 << 53) + x := math.Abs(2 * real(z)) + y := math.Abs(2 * imag(z)) + x = reducePi(x) + x = x * x + y = y * y + x2 := 1.0 + y2 := 1.0 + f := 1.0 + rn := 0.0 + d := 0.0 + for { + rn++ + f *= rn + rn++ + f *= rn + x2 *= x + y2 *= y + t := y2 + x2 + t /= f + d += t + + rn++ + f *= rn + rn++ + f *= rn + x2 *= x + y2 *= y + t = y2 - x2 + t /= f + d += t + if !(math.Abs(t/d) > MACHEP) { + // Caution: Use ! and > instead of <= for correct behavior if t/d is NaN. + // See issue 17577. + break + } + } + return d +} + +// Complex circular cotangent +// +// DESCRIPTION: +// +// If +// z = x + iy, +// +// then +// +// sin 2x - i sinh 2y +// w = --------------------. +// cosh 2y - cos 2x +// +// On the real axis, the denominator has zeros at even +// multiples of PI/2. Near these points it is evaluated +// by a Taylor series. +// +// ACCURACY: +// +// Relative error: +// arithmetic domain # trials peak rms +// DEC -10,+10 3000 6.5e-17 1.6e-17 +// IEEE -10,+10 30000 9.2e-16 1.2e-16 +// Also tested by ctan * ccot = 1 + i0. + +// Cot returns the cotangent of x. +func Cot(x complex128) complex128 { + d := math.Cosh(2*imag(x)) - math.Cos(2*real(x)) + if math.Abs(d) < 0.25 { + d = tanSeries(x) + } + if d == 0 { + return Inf() + } + return complex(math.Sin(2*real(x))/d, -math.Sinh(2*imag(x))/d) +} diff --git a/src/math/const.go b/src/math/const.go new file mode 100644 index 0000000..5ea935f --- /dev/null +++ b/src/math/const.go @@ -0,0 +1,57 @@ +// Copyright 2009 The Go Authors. All rights reserved. +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. + +// Package math provides basic constants and mathematical functions. +// +// This package does not guarantee bit-identical results across architectures. +package math + +// Mathematical constants. +const ( + E = 2.71828182845904523536028747135266249775724709369995957496696763 // https://oeis.org/A001113 + Pi = 3.14159265358979323846264338327950288419716939937510582097494459 // https://oeis.org/A000796 + Phi = 1.61803398874989484820458683436563811772030917980576286213544862 // https://oeis.org/A001622 + + Sqrt2 = 1.41421356237309504880168872420969807856967187537694807317667974 // https://oeis.org/A002193 + SqrtE = 1.64872127070012814684865078781416357165377610071014801157507931 // https://oeis.org/A019774 + SqrtPi = 1.77245385090551602729816748334114518279754945612238712821380779 // https://oeis.org/A002161 + SqrtPhi = 1.27201964951406896425242246173749149171560804184009624861664038 // https://oeis.org/A139339 + + Ln2 = 0.693147180559945309417232121458176568075500134360255254120680009 // https://oeis.org/A002162 + Log2E = 1 / Ln2 + Ln10 = 2.30258509299404568401799145468436420760110148862877297603332790 // https://oeis.org/A002392 + Log10E = 1 / Ln10 +) + +// Floating-point limit values. +// Max is the largest finite value representable by the type. +// SmallestNonzero is the smallest positive, non-zero value representable by the type. +const ( + MaxFloat32 = 0x1p127 * (1 + (1 - 0x1p-23)) // 3.40282346638528859811704183484516925440e+38 + SmallestNonzeroFloat32 = 0x1p-126 * 0x1p-23 // 1.401298464324817070923729583289916131280e-45 + + MaxFloat64 = 0x1p1023 * (1 + (1 - 0x1p-52)) // 1.79769313486231570814527423731704356798070e+308 + SmallestNonzeroFloat64 = 0x1p-1022 * 0x1p-52 // 4.9406564584124654417656879286822137236505980e-324 +) + +// Integer limit values. +const ( + intSize = 32 << (^uint(0) >> 63) // 32 or 64 + + MaxInt = 1<<(intSize-1) - 1 + MinInt = -1 << (intSize - 1) + MaxInt8 = 1<<7 - 1 + MinInt8 = -1 << 7 + MaxInt16 = 1<<15 - 1 + MinInt16 = -1 << 15 + MaxInt32 = 1<<31 - 1 + MinInt32 = -1 << 31 + MaxInt64 = 1<<63 - 1 + MinInt64 = -1 << 63 + MaxUint = 1<<intSize - 1 + MaxUint8 = 1<<8 - 1 + MaxUint16 = 1<<16 - 1 + MaxUint32 = 1<<32 - 1 + MaxUint64 = 1<<64 - 1 +) diff --git a/src/math/const_test.go b/src/math/const_test.go new file mode 100644 index 0000000..170ba6a --- /dev/null +++ b/src/math/const_test.go @@ -0,0 +1,47 @@ +// Copyright 2021 The Go Authors. All rights reserved. +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. + +package math_test + +import ( + "testing" + + . "math" +) + +func TestMaxUint(t *testing.T) { + if v := uint(MaxUint); v+1 != 0 { + t.Errorf("MaxUint should wrap around to zero: %d", v+1) + } + if v := uint8(MaxUint8); v+1 != 0 { + t.Errorf("MaxUint8 should wrap around to zero: %d", v+1) + } + if v := uint16(MaxUint16); v+1 != 0 { + t.Errorf("MaxUint16 should wrap around to zero: %d", v+1) + } + if v := uint32(MaxUint32); v+1 != 0 { + t.Errorf("MaxUint32 should wrap around to zero: %d", v+1) + } + if v := uint64(MaxUint64); v+1 != 0 { + t.Errorf("MaxUint64 should wrap around to zero: %d", v+1) + } +} + +func TestMaxInt(t *testing.T) { + if v := int(MaxInt); v+1 != MinInt { + t.Errorf("MaxInt should wrap around to MinInt: %d", v+1) + } + if v := int8(MaxInt8); v+1 != MinInt8 { + t.Errorf("MaxInt8 should wrap around to MinInt8: %d", v+1) + } + if v := int16(MaxInt16); v+1 != MinInt16 { + t.Errorf("MaxInt16 should wrap around to MinInt16: %d", v+1) + } + if v := int32(MaxInt32); v+1 != MinInt32 { + t.Errorf("MaxInt32 should wrap around to MinInt32: %d", v+1) + } + if v := int64(MaxInt64); v+1 != MinInt64 { + t.Errorf("MaxInt64 should wrap around to MinInt64: %d", v+1) + } +} diff --git a/src/math/copysign.go b/src/math/copysign.go new file mode 100644 index 0000000..719c64b --- /dev/null +++ b/src/math/copysign.go @@ -0,0 +1,12 @@ +// Copyright 2010 The Go Authors. All rights reserved. +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. + +package math + +// Copysign returns a value with the magnitude +// of x and the sign of y. +func Copysign(x, y float64) float64 { + const sign = 1 << 63 + return Float64frombits(Float64bits(x)&^sign | Float64bits(y)&sign) +} diff --git a/src/math/cosh_s390x.s b/src/math/cosh_s390x.s new file mode 100644 index 0000000..ca1d86e --- /dev/null +++ b/src/math/cosh_s390x.s @@ -0,0 +1,211 @@ +// Copyright 2016 The Go Authors. All rights reserved. +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. + +#include "textflag.h" + +// Constants +DATA coshrodataL23<>+0(SB)/8, $0.231904681384629956E-16 +DATA coshrodataL23<>+8(SB)/8, $0.693147180559945286E+00 +DATA coshrodataL23<>+16(SB)/8, $0.144269504088896339E+01 +DATA coshrodataL23<>+24(SB)/8, $704.E0 +GLOBL coshrodataL23<>+0(SB), RODATA, $32 +DATA coshxinf<>+0(SB)/8, $0x7FF0000000000000 +GLOBL coshxinf<>+0(SB), RODATA, $8 +DATA coshxlim1<>+0(SB)/8, $800.E0 +GLOBL coshxlim1<>+0(SB), RODATA, $8 +DATA coshxaddhy<>+0(SB)/8, $0xc2f0000100003fdf +GLOBL coshxaddhy<>+0(SB), RODATA, $8 +DATA coshx4ff<>+0(SB)/8, $0x4ff0000000000000 +GLOBL coshx4ff<>+0(SB), RODATA, $8 +DATA coshe1<>+0(SB)/8, $0x3ff000000000000a +GLOBL coshe1<>+0(SB), RODATA, $8 + +// Log multiplier table +DATA coshtab<>+0(SB)/8, $0.442737824274138381E-01 +DATA coshtab<>+8(SB)/8, $0.263602189790660309E-01 +DATA coshtab<>+16(SB)/8, $0.122565642281703586E-01 +DATA coshtab<>+24(SB)/8, $0.143757052860721398E-02 +DATA coshtab<>+32(SB)/8, $-.651375034121276075E-02 +DATA coshtab<>+40(SB)/8, $-.119317678849450159E-01 +DATA coshtab<>+48(SB)/8, $-.150868749549871069E-01 +DATA coshtab<>+56(SB)/8, $-.161992609578469234E-01 +DATA coshtab<>+64(SB)/8, $-.154492360403337917E-01 +DATA coshtab<>+72(SB)/8, $-.129850717389178721E-01 +DATA coshtab<>+80(SB)/8, $-.892902649276657891E-02 +DATA coshtab<>+88(SB)/8, $-.338202636596794887E-02 +DATA coshtab<>+96(SB)/8, $0.357266307045684762E-02 +DATA coshtab<>+104(SB)/8, $0.118665304327406698E-01 +DATA coshtab<>+112(SB)/8, $0.214434994118118914E-01 +DATA coshtab<>+120(SB)/8, $0.322580645161290314E-01 +GLOBL coshtab<>+0(SB), RODATA, $128 + +// Minimax polynomial approximations +DATA coshe2<>+0(SB)/8, $0.500000000000004237e+00 +GLOBL coshe2<>+0(SB), RODATA, $8 +DATA coshe3<>+0(SB)/8, $0.166666666630345592e+00 +GLOBL coshe3<>+0(SB), RODATA, $8 +DATA coshe4<>+0(SB)/8, $0.416666664838056960e-01 +GLOBL coshe4<>+0(SB), RODATA, $8 +DATA coshe5<>+0(SB)/8, $0.833349307718286047e-02 +GLOBL coshe5<>+0(SB), RODATA, $8 +DATA coshe6<>+0(SB)/8, $0.138926439368309441e-02 +GLOBL coshe6<>+0(SB), RODATA, $8 + +// Cosh returns the hyperbolic cosine of x. +// +// Special cases are: +// Cosh(±0) = 1 +// Cosh(±Inf) = +Inf +// Cosh(NaN) = NaN +// The algorithm used is minimax polynomial approximation +// with coefficients determined with a Remez exchange algorithm. + +TEXT ·coshAsm(SB),NOSPLIT,$0-16 + FMOVD x+0(FP), F0 + MOVD $coshrodataL23<>+0(SB), R9 + LTDBR F0, F0 + MOVD $0x4086000000000000, R2 + MOVD $0x4086000000000000, R3 + BLTU L19 + FMOVD F0, F4 +L2: + WORD $0xED409018 //cdb %f4,.L24-.L23(%r9) + BYTE $0x00 + BYTE $0x19 + BGE L14 //jnl .L14 + BVS L14 + WFCEDBS V4, V4, V2 + BEQ L20 +L1: + FMOVD F0, ret+8(FP) + RET + +L14: + WFCEDBS V4, V4, V2 + BVS L1 + MOVD $coshxlim1<>+0(SB), R1 + FMOVD 0(R1), F2 + WFCHEDBS V4, V2, V2 + BEQ L21 + MOVD $coshxaddhy<>+0(SB), R1 + FMOVD coshrodataL23<>+16(SB), F5 + FMOVD 0(R1), F2 + WFMSDB V0, V5, V2, V5 + FMOVD coshrodataL23<>+8(SB), F3 + FADD F5, F2 + MOVD $coshe6<>+0(SB), R1 + WFMSDB V2, V3, V0, V3 + FMOVD 0(R1), F6 + WFMDB V3, V3, V1 + MOVD $coshe4<>+0(SB), R1 + FMOVD coshrodataL23<>+0(SB), F7 + WFMADB V2, V7, V3, V2 + FMOVD 0(R1), F3 + MOVD $coshe5<>+0(SB), R1 + WFMADB V1, V6, V3, V6 + FMOVD 0(R1), F7 + MOVD $coshe3<>+0(SB), R1 + FMOVD 0(R1), F3 + WFMADB V1, V7, V3, V7 + FNEG F2, F3 + LGDR F5, R1 + MOVD $coshe2<>+0(SB), R3 + WFCEDBS V4, V0, V0 + FMOVD 0(R3), F5 + MOVD $coshe1<>+0(SB), R3 + WFMADB V1, V6, V5, V6 + FMOVD 0(R3), F5 + RISBGN $0, $15, $48, R1, R2 + WFMADB V1, V7, V5, V1 + BVS L22 + RISBGZ $57, $60, $3, R1, R4 + MOVD $coshtab<>+0(SB), R3 + WFMADB V3, V6, V1, V6 + WORD $0x68043000 //ld %f0,0(%r4,%r3) + FMSUB F0, F3, F2 + WORD $0xA71AF000 //ahi %r1,-4096 + WFMADB V2, V6, V0, V6 +L17: + RISBGN $0, $15, $48, R1, R2 + LDGR R2, F2 + FMADD F2, F6, F2 + MOVD $coshx4ff<>+0(SB), R1 + FMOVD 0(R1), F0 + FMUL F2, F0 + FMOVD F0, ret+8(FP) + RET + +L19: + FNEG F0, F4 + BR L2 +L20: + MOVD $coshxaddhy<>+0(SB), R1 + FMOVD coshrodataL23<>+16(SB), F3 + FMOVD 0(R1), F2 + WFMSDB V0, V3, V2, V3 + FMOVD coshrodataL23<>+8(SB), F4 + FADD F3, F2 + MOVD $coshe6<>+0(SB), R1 + FMSUB F4, F2, F0 + FMOVD 0(R1), F6 + WFMDB V0, V0, V1 + MOVD $coshe4<>+0(SB), R1 + FMOVD 0(R1), F4 + MOVD $coshe5<>+0(SB), R1 + FMOVD coshrodataL23<>+0(SB), F5 + WFMADB V1, V6, V4, V6 + FMADD F5, F2, F0 + FMOVD 0(R1), F2 + MOVD $coshe3<>+0(SB), R1 + FMOVD 0(R1), F4 + WFMADB V1, V2, V4, V2 + MOVD $coshe2<>+0(SB), R1 + FMOVD 0(R1), F5 + FNEG F0, F4 + WFMADB V1, V6, V5, V6 + MOVD $coshe1<>+0(SB), R1 + FMOVD 0(R1), F5 + WFMADB V1, V2, V5, V1 + LGDR F3, R1 + MOVD $coshtab<>+0(SB), R5 + WFMADB V4, V6, V1, V3 + RISBGZ $57, $60, $3, R1, R4 + WFMSDB V4, V6, V1, V6 + WORD $0x68145000 //ld %f1,0(%r4,%r5) + WFMSDB V4, V1, V0, V2 + WORD $0xA7487FBE //lhi %r4,32702 + FMADD F3, F2, F1 + SUBW R1, R4 + RISBGZ $57, $60, $3, R4, R12 + WORD $0x682C5000 //ld %f2,0(%r12,%r5) + FMSUB F2, F4, F0 + RISBGN $0, $15, $48, R1, R2 + WFMADB V0, V6, V2, V6 + RISBGN $0, $15, $48, R4, R3 + LDGR R2, F2 + LDGR R3, F0 + FMADD F2, F1, F2 + FMADD F0, F6, F0 + FADD F2, F0 + FMOVD F0, ret+8(FP) + RET + +L22: + WORD $0xA7387FBE //lhi %r3,32702 + MOVD $coshtab<>+0(SB), R4 + SUBW R1, R3 + WFMSDB V3, V6, V1, V6 + RISBGZ $57, $60, $3, R3, R3 + WORD $0x68034000 //ld %f0,0(%r3,%r4) + FMSUB F0, F3, F2 + WORD $0xA7386FBE //lhi %r3,28606 + WFMADB V2, V6, V0, V6 + SUBW R1, R3, R1 + BR L17 +L21: + MOVD $coshxinf<>+0(SB), R1 + FMOVD 0(R1), F0 + FMOVD F0, ret+8(FP) + RET + diff --git a/src/math/dim.go b/src/math/dim.go new file mode 100644 index 0000000..6a857bb --- /dev/null +++ b/src/math/dim.go @@ -0,0 +1,91 @@ +// Copyright 2010 The Go Authors. All rights reserved. +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. + +package math + +// Dim returns the maximum of x-y or 0. +// +// Special cases are: +// Dim(+Inf, +Inf) = NaN +// Dim(-Inf, -Inf) = NaN +// Dim(x, NaN) = Dim(NaN, x) = NaN +func Dim(x, y float64) float64 { + // The special cases result in NaN after the subtraction: + // +Inf - +Inf = NaN + // -Inf - -Inf = NaN + // NaN - y = NaN + // x - NaN = NaN + v := x - y + if v <= 0 { + // v is negative or 0 + return 0 + } + // v is positive or NaN + return v +} + +// Max returns the larger of x or y. +// +// Special cases are: +// Max(x, +Inf) = Max(+Inf, x) = +Inf +// Max(x, NaN) = Max(NaN, x) = NaN +// Max(+0, ±0) = Max(±0, +0) = +0 +// Max(-0, -0) = -0 +func Max(x, y float64) float64 { + if haveArchMax { + return archMax(x, y) + } + return max(x, y) +} + +func max(x, y float64) float64 { + // special cases + switch { + case IsInf(x, 1) || IsInf(y, 1): + return Inf(1) + case IsNaN(x) || IsNaN(y): + return NaN() + case x == 0 && x == y: + if Signbit(x) { + return y + } + return x + } + if x > y { + return x + } + return y +} + +// Min returns the smaller of x or y. +// +// Special cases are: +// Min(x, -Inf) = Min(-Inf, x) = -Inf +// Min(x, NaN) = Min(NaN, x) = NaN +// Min(-0, ±0) = Min(±0, -0) = -0 +func Min(x, y float64) float64 { + if haveArchMin { + return archMin(x, y) + } + return min(x, y) +} + +func min(x, y float64) float64 { + // special cases + switch { + case IsInf(x, -1) || IsInf(y, -1): + return Inf(-1) + case IsNaN(x) || IsNaN(y): + return NaN() + case x == 0 && x == y: + if Signbit(x) { + return x + } + return y + } + if x < y { + return x + } + return y +} diff --git a/src/math/dim_amd64.s b/src/math/dim_amd64.s new file mode 100644 index 0000000..253f03b --- /dev/null +++ b/src/math/dim_amd64.s @@ -0,0 +1,98 @@ +// Copyright 2010 The Go Authors. All rights reserved. +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. + +#include "textflag.h" + +#define PosInf 0x7FF0000000000000 +#define NaN 0x7FF8000000000001 +#define NegInf 0xFFF0000000000000 + +// func ·archMax(x, y float64) float64 +TEXT ·archMax(SB),NOSPLIT,$0 + // +Inf special cases + MOVQ $PosInf, AX + MOVQ x+0(FP), R8 + CMPQ AX, R8 + JEQ isPosInf + MOVQ y+8(FP), R9 + CMPQ AX, R9 + JEQ isPosInf + // NaN special cases + MOVQ $~(1<<63), DX // bit mask + MOVQ $PosInf, AX + MOVQ R8, BX + ANDQ DX, BX // x = |x| + CMPQ AX, BX + JLT isMaxNaN + MOVQ R9, CX + ANDQ DX, CX // y = |y| + CMPQ AX, CX + JLT isMaxNaN + // ±0 special cases + ORQ CX, BX + JEQ isMaxZero + + MOVQ R8, X0 + MOVQ R9, X1 + MAXSD X1, X0 + MOVSD X0, ret+16(FP) + RET +isMaxNaN: // return NaN + MOVQ $NaN, AX +isPosInf: // return +Inf + MOVQ AX, ret+16(FP) + RET +isMaxZero: + MOVQ $(1<<63), AX // -0.0 + CMPQ AX, R8 + JEQ +3(PC) + MOVQ R8, ret+16(FP) // return 0 + RET + MOVQ R9, ret+16(FP) // return other 0 + RET + +// func archMin(x, y float64) float64 +TEXT ·archMin(SB),NOSPLIT,$0 + // -Inf special cases + MOVQ $NegInf, AX + MOVQ x+0(FP), R8 + CMPQ AX, R8 + JEQ isNegInf + MOVQ y+8(FP), R9 + CMPQ AX, R9 + JEQ isNegInf + // NaN special cases + MOVQ $~(1<<63), DX + MOVQ $PosInf, AX + MOVQ R8, BX + ANDQ DX, BX // x = |x| + CMPQ AX, BX + JLT isMinNaN + MOVQ R9, CX + ANDQ DX, CX // y = |y| + CMPQ AX, CX + JLT isMinNaN + // ±0 special cases + ORQ CX, BX + JEQ isMinZero + + MOVQ R8, X0 + MOVQ R9, X1 + MINSD X1, X0 + MOVSD X0, ret+16(FP) + RET +isMinNaN: // return NaN + MOVQ $NaN, AX +isNegInf: // return -Inf + MOVQ AX, ret+16(FP) + RET +isMinZero: + MOVQ $(1<<63), AX // -0.0 + CMPQ AX, R8 + JEQ +3(PC) + MOVQ R9, ret+16(FP) // return other 0 + RET + MOVQ R8, ret+16(FP) // return -0 + RET + diff --git a/src/math/dim_arm64.s b/src/math/dim_arm64.s new file mode 100644 index 0000000..f112003 --- /dev/null +++ b/src/math/dim_arm64.s @@ -0,0 +1,49 @@ +// Copyright 2016 The Go Authors. All rights reserved. +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. + +#include "textflag.h" + +#define PosInf 0x7FF0000000000000 +#define NaN 0x7FF8000000000001 +#define NegInf 0xFFF0000000000000 + +// func ·archMax(x, y float64) float64 +TEXT ·archMax(SB),NOSPLIT,$0 + // +Inf special cases + MOVD $PosInf, R0 + MOVD x+0(FP), R1 + CMP R0, R1 + BEQ isPosInf + MOVD y+8(FP), R2 + CMP R0, R2 + BEQ isPosInf + // normal case + FMOVD R1, F0 + FMOVD R2, F1 + FMAXD F0, F1, F0 + FMOVD F0, ret+16(FP) + RET +isPosInf: // return +Inf + MOVD R0, ret+16(FP) + RET + +// func archMin(x, y float64) float64 +TEXT ·archMin(SB),NOSPLIT,$0 + // -Inf special cases + MOVD $NegInf, R0 + MOVD x+0(FP), R1 + CMP R0, R1 + BEQ isNegInf + MOVD y+8(FP), R2 + CMP R0, R2 + BEQ isNegInf + // normal case + FMOVD R1, F0 + FMOVD R2, F1 + FMIND F0, F1, F0 + FMOVD F0, ret+16(FP) + RET +isNegInf: // return -Inf + MOVD R0, ret+16(FP) + RET diff --git a/src/math/dim_asm.go b/src/math/dim_asm.go new file mode 100644 index 0000000..f4adbd0 --- /dev/null +++ b/src/math/dim_asm.go @@ -0,0 +1,15 @@ +// Copyright 2021 The Go Authors. All rights reserved. +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. + +//go:build amd64 || arm64 || riscv64 || s390x + +package math + +const haveArchMax = true + +func archMax(x, y float64) float64 + +const haveArchMin = true + +func archMin(x, y float64) float64 diff --git a/src/math/dim_noasm.go b/src/math/dim_noasm.go new file mode 100644 index 0000000..5b9e06f --- /dev/null +++ b/src/math/dim_noasm.go @@ -0,0 +1,19 @@ +// Copyright 2021 The Go Authors. All rights reserved. +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. + +//go:build !amd64 && !arm64 && !riscv64 && !s390x + +package math + +const haveArchMax = false + +func archMax(x, y float64) float64 { + panic("not implemented") +} + +const haveArchMin = false + +func archMin(x, y float64) float64 { + panic("not implemented") +} diff --git a/src/math/dim_riscv64.s b/src/math/dim_riscv64.s new file mode 100644 index 0000000..5b2fd3d --- /dev/null +++ b/src/math/dim_riscv64.s @@ -0,0 +1,70 @@ +// Copyright 2020 The Go Authors. All rights reserved. +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. + +#include "textflag.h" + +// Values returned from an FCLASS instruction. +#define NegInf 0x001 +#define PosInf 0x080 +#define NaN 0x200 + +// func archMax(x, y float64) float64 +TEXT ·archMax(SB),NOSPLIT,$0 + MOVD x+0(FP), F0 + MOVD y+8(FP), F1 + FCLASSD F0, X5 + FCLASSD F1, X6 + + // +Inf special cases + MOV $PosInf, X7 + BEQ X7, X5, isMaxX + BEQ X7, X6, isMaxY + + // NaN special cases + MOV $NaN, X7 + BEQ X7, X5, isMaxX + BEQ X7, X6, isMaxY + + // normal case + FMAXD F0, F1, F0 + MOVD F0, ret+16(FP) + RET + +isMaxX: // return x + MOVD F0, ret+16(FP) + RET + +isMaxY: // return y + MOVD F1, ret+16(FP) + RET + +// func archMin(x, y float64) float64 +TEXT ·archMin(SB),NOSPLIT,$0 + MOVD x+0(FP), F0 + MOVD y+8(FP), F1 + FCLASSD F0, X5 + FCLASSD F1, X6 + + // -Inf special cases + MOV $NegInf, X7 + BEQ X7, X5, isMinX + BEQ X7, X6, isMinY + + // NaN special cases + MOV $NaN, X7 + BEQ X7, X5, isMinX + BEQ X7, X6, isMinY + + // normal case + FMIND F0, F1, F0 + MOVD F0, ret+16(FP) + RET + +isMinX: // return x + MOVD F0, ret+16(FP) + RET + +isMinY: // return y + MOVD F1, ret+16(FP) + RET diff --git a/src/math/dim_s390x.s b/src/math/dim_s390x.s new file mode 100644 index 0000000..1277026 --- /dev/null +++ b/src/math/dim_s390x.s @@ -0,0 +1,96 @@ +// Copyright 2016 The Go Authors. All rights reserved. +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. + +// Based on dim_amd64.s + +#include "textflag.h" + +#define PosInf 0x7FF0000000000000 +#define NaN 0x7FF8000000000001 +#define NegInf 0xFFF0000000000000 + +// func ·Max(x, y float64) float64 +TEXT ·archMax(SB),NOSPLIT,$0 + // +Inf special cases + MOVD $PosInf, R4 + MOVD x+0(FP), R8 + CMPUBEQ R4, R8, isPosInf + MOVD y+8(FP), R9 + CMPUBEQ R4, R9, isPosInf + // NaN special cases + MOVD $~(1<<63), R5 // bit mask + MOVD $PosInf, R4 + MOVD R8, R2 + AND R5, R2 // x = |x| + CMPUBLT R4, R2, isMaxNaN + MOVD R9, R3 + AND R5, R3 // y = |y| + CMPUBLT R4, R3, isMaxNaN + // ±0 special cases + OR R3, R2 + BEQ isMaxZero + + FMOVD x+0(FP), F1 + FMOVD y+8(FP), F2 + FCMPU F2, F1 + BGT +3(PC) + FMOVD F1, ret+16(FP) + RET + FMOVD F2, ret+16(FP) + RET +isMaxNaN: // return NaN + MOVD $NaN, R4 +isPosInf: // return +Inf + MOVD R4, ret+16(FP) + RET +isMaxZero: + MOVD $(1<<63), R4 // -0.0 + CMPUBEQ R4, R8, +3(PC) + MOVD R8, ret+16(FP) // return 0 + RET + MOVD R9, ret+16(FP) // return other 0 + RET + +// func archMin(x, y float64) float64 +TEXT ·archMin(SB),NOSPLIT,$0 + // -Inf special cases + MOVD $NegInf, R4 + MOVD x+0(FP), R8 + CMPUBEQ R4, R8, isNegInf + MOVD y+8(FP), R9 + CMPUBEQ R4, R9, isNegInf + // NaN special cases + MOVD $~(1<<63), R5 + MOVD $PosInf, R4 + MOVD R8, R2 + AND R5, R2 // x = |x| + CMPUBLT R4, R2, isMinNaN + MOVD R9, R3 + AND R5, R3 // y = |y| + CMPUBLT R4, R3, isMinNaN + // ±0 special cases + OR R3, R2 + BEQ isMinZero + + FMOVD x+0(FP), F1 + FMOVD y+8(FP), F2 + FCMPU F2, F1 + BLT +3(PC) + FMOVD F1, ret+16(FP) + RET + FMOVD F2, ret+16(FP) + RET +isMinNaN: // return NaN + MOVD $NaN, R4 +isNegInf: // return -Inf + MOVD R4, ret+16(FP) + RET +isMinZero: + MOVD $(1<<63), R4 // -0.0 + CMPUBEQ R4, R8, +3(PC) + MOVD R9, ret+16(FP) // return other 0 + RET + MOVD R8, ret+16(FP) // return -0 + RET + diff --git a/src/math/erf.go b/src/math/erf.go new file mode 100644 index 0000000..4d6fe47 --- /dev/null +++ b/src/math/erf.go @@ -0,0 +1,349 @@ +// Copyright 2010 The Go Authors. All rights reserved. +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. + +package math + +/* + Floating-point error function and complementary error function. +*/ + +// The original C code and the long comment below are +// from FreeBSD's /usr/src/lib/msun/src/s_erf.c and +// came with this notice. The go code is a simplified +// version of the original C. +// +// ==================================================== +// Copyright (C) 1993 by Sun Microsystems, Inc. All rights reserved. +// +// Developed at SunPro, a Sun Microsystems, Inc. business. +// Permission to use, copy, modify, and distribute this +// software is freely granted, provided that this notice +// is preserved. +// ==================================================== +// +// +// double erf(double x) +// double erfc(double x) +// x +// 2 |\ +// erf(x) = --------- | exp(-t*t)dt +// sqrt(pi) \| +// 0 +// +// erfc(x) = 1-erf(x) +// Note that +// erf(-x) = -erf(x) +// erfc(-x) = 2 - erfc(x) +// +// Method: +// 1. For |x| in [0, 0.84375] +// erf(x) = x + x*R(x**2) +// erfc(x) = 1 - erf(x) if x in [-.84375,0.25] +// = 0.5 + ((0.5-x)-x*R) if x in [0.25,0.84375] +// where R = P/Q where P is an odd poly of degree 8 and +// Q is an odd poly of degree 10. +// -57.90 +// | R - (erf(x)-x)/x | <= 2 +// +// +// Remark. The formula is derived by noting +// erf(x) = (2/sqrt(pi))*(x - x**3/3 + x**5/10 - x**7/42 + ....) +// and that +// 2/sqrt(pi) = 1.128379167095512573896158903121545171688 +// is close to one. The interval is chosen because the fix +// point of erf(x) is near 0.6174 (i.e., erf(x)=x when x is +// near 0.6174), and by some experiment, 0.84375 is chosen to +// guarantee the error is less than one ulp for erf. +// +// 2. For |x| in [0.84375,1.25], let s = |x| - 1, and +// c = 0.84506291151 rounded to single (24 bits) +// erf(x) = sign(x) * (c + P1(s)/Q1(s)) +// erfc(x) = (1-c) - P1(s)/Q1(s) if x > 0 +// 1+(c+P1(s)/Q1(s)) if x < 0 +// |P1/Q1 - (erf(|x|)-c)| <= 2**-59.06 +// Remark: here we use the taylor series expansion at x=1. +// erf(1+s) = erf(1) + s*Poly(s) +// = 0.845.. + P1(s)/Q1(s) +// That is, we use rational approximation to approximate +// erf(1+s) - (c = (single)0.84506291151) +// Note that |P1/Q1|< 0.078 for x in [0.84375,1.25] +// where +// P1(s) = degree 6 poly in s +// Q1(s) = degree 6 poly in s +// +// 3. For x in [1.25,1/0.35(~2.857143)], +// erfc(x) = (1/x)*exp(-x*x-0.5625+R1/S1) +// erf(x) = 1 - erfc(x) +// where +// R1(z) = degree 7 poly in z, (z=1/x**2) +// S1(z) = degree 8 poly in z +// +// 4. For x in [1/0.35,28] +// erfc(x) = (1/x)*exp(-x*x-0.5625+R2/S2) if x > 0 +// = 2.0 - (1/x)*exp(-x*x-0.5625+R2/S2) if -6<x<0 +// = 2.0 - tiny (if x <= -6) +// erf(x) = sign(x)*(1.0 - erfc(x)) if x < 6, else +// erf(x) = sign(x)*(1.0 - tiny) +// where +// R2(z) = degree 6 poly in z, (z=1/x**2) +// S2(z) = degree 7 poly in z +// +// Note1: +// To compute exp(-x*x-0.5625+R/S), let s be a single +// precision number and s := x; then +// -x*x = -s*s + (s-x)*(s+x) +// exp(-x*x-0.5626+R/S) = +// exp(-s*s-0.5625)*exp((s-x)*(s+x)+R/S); +// Note2: +// Here 4 and 5 make use of the asymptotic series +// exp(-x*x) +// erfc(x) ~ ---------- * ( 1 + Poly(1/x**2) ) +// x*sqrt(pi) +// We use rational approximation to approximate +// g(s)=f(1/x**2) = log(erfc(x)*x) - x*x + 0.5625 +// Here is the error bound for R1/S1 and R2/S2 +// |R1/S1 - f(x)| < 2**(-62.57) +// |R2/S2 - f(x)| < 2**(-61.52) +// +// 5. For inf > x >= 28 +// erf(x) = sign(x) *(1 - tiny) (raise inexact) +// erfc(x) = tiny*tiny (raise underflow) if x > 0 +// = 2 - tiny if x<0 +// +// 7. Special case: +// erf(0) = 0, erf(inf) = 1, erf(-inf) = -1, +// erfc(0) = 1, erfc(inf) = 0, erfc(-inf) = 2, +// erfc/erf(NaN) is NaN + +const ( + erx = 8.45062911510467529297e-01 // 0x3FEB0AC160000000 + // Coefficients for approximation to erf in [0, 0.84375] + efx = 1.28379167095512586316e-01 // 0x3FC06EBA8214DB69 + efx8 = 1.02703333676410069053e+00 // 0x3FF06EBA8214DB69 + pp0 = 1.28379167095512558561e-01 // 0x3FC06EBA8214DB68 + pp1 = -3.25042107247001499370e-01 // 0xBFD4CD7D691CB913 + pp2 = -2.84817495755985104766e-02 // 0xBF9D2A51DBD7194F + pp3 = -5.77027029648944159157e-03 // 0xBF77A291236668E4 + pp4 = -2.37630166566501626084e-05 // 0xBEF8EAD6120016AC + qq1 = 3.97917223959155352819e-01 // 0x3FD97779CDDADC09 + qq2 = 6.50222499887672944485e-02 // 0x3FB0A54C5536CEBA + qq3 = 5.08130628187576562776e-03 // 0x3F74D022C4D36B0F + qq4 = 1.32494738004321644526e-04 // 0x3F215DC9221C1A10 + qq5 = -3.96022827877536812320e-06 // 0xBED09C4342A26120 + // Coefficients for approximation to erf in [0.84375, 1.25] + pa0 = -2.36211856075265944077e-03 // 0xBF6359B8BEF77538 + pa1 = 4.14856118683748331666e-01 // 0x3FDA8D00AD92B34D + pa2 = -3.72207876035701323847e-01 // 0xBFD7D240FBB8C3F1 + pa3 = 3.18346619901161753674e-01 // 0x3FD45FCA805120E4 + pa4 = -1.10894694282396677476e-01 // 0xBFBC63983D3E28EC + pa5 = 3.54783043256182359371e-02 // 0x3FA22A36599795EB + pa6 = -2.16637559486879084300e-03 // 0xBF61BF380A96073F + qa1 = 1.06420880400844228286e-01 // 0x3FBB3E6618EEE323 + qa2 = 5.40397917702171048937e-01 // 0x3FE14AF092EB6F33 + qa3 = 7.18286544141962662868e-02 // 0x3FB2635CD99FE9A7 + qa4 = 1.26171219808761642112e-01 // 0x3FC02660E763351F + qa5 = 1.36370839120290507362e-02 // 0x3F8BEDC26B51DD1C + qa6 = 1.19844998467991074170e-02 // 0x3F888B545735151D + // Coefficients for approximation to erfc in [1.25, 1/0.35] + ra0 = -9.86494403484714822705e-03 // 0xBF843412600D6435 + ra1 = -6.93858572707181764372e-01 // 0xBFE63416E4BA7360 + ra2 = -1.05586262253232909814e+01 // 0xC0251E0441B0E726 + ra3 = -6.23753324503260060396e+01 // 0xC04F300AE4CBA38D + ra4 = -1.62396669462573470355e+02 // 0xC0644CB184282266 + ra5 = -1.84605092906711035994e+02 // 0xC067135CEBCCABB2 + ra6 = -8.12874355063065934246e+01 // 0xC054526557E4D2F2 + ra7 = -9.81432934416914548592e+00 // 0xC023A0EFC69AC25C + sa1 = 1.96512716674392571292e+01 // 0x4033A6B9BD707687 + sa2 = 1.37657754143519042600e+02 // 0x4061350C526AE721 + sa3 = 4.34565877475229228821e+02 // 0x407B290DD58A1A71 + sa4 = 6.45387271733267880336e+02 // 0x40842B1921EC2868 + sa5 = 4.29008140027567833386e+02 // 0x407AD02157700314 + sa6 = 1.08635005541779435134e+02 // 0x405B28A3EE48AE2C + sa7 = 6.57024977031928170135e+00 // 0x401A47EF8E484A93 + sa8 = -6.04244152148580987438e-02 // 0xBFAEEFF2EE749A62 + // Coefficients for approximation to erfc in [1/.35, 28] + rb0 = -9.86494292470009928597e-03 // 0xBF84341239E86F4A + rb1 = -7.99283237680523006574e-01 // 0xBFE993BA70C285DE + rb2 = -1.77579549177547519889e+01 // 0xC031C209555F995A + rb3 = -1.60636384855821916062e+02 // 0xC064145D43C5ED98 + rb4 = -6.37566443368389627722e+02 // 0xC083EC881375F228 + rb5 = -1.02509513161107724954e+03 // 0xC09004616A2E5992 + rb6 = -4.83519191608651397019e+02 // 0xC07E384E9BDC383F + sb1 = 3.03380607434824582924e+01 // 0x403E568B261D5190 + sb2 = 3.25792512996573918826e+02 // 0x40745CAE221B9F0A + sb3 = 1.53672958608443695994e+03 // 0x409802EB189D5118 + sb4 = 3.19985821950859553908e+03 // 0x40A8FFB7688C246A + sb5 = 2.55305040643316442583e+03 // 0x40A3F219CEDF3BE6 + sb6 = 4.74528541206955367215e+02 // 0x407DA874E79FE763 + sb7 = -2.24409524465858183362e+01 // 0xC03670E242712D62 +) + +// Erf returns the error function of x. +// +// Special cases are: +// Erf(+Inf) = 1 +// Erf(-Inf) = -1 +// Erf(NaN) = NaN +func Erf(x float64) float64 { + if haveArchErf { + return archErf(x) + } + return erf(x) +} + +func erf(x float64) float64 { + const ( + VeryTiny = 2.848094538889218e-306 // 0x0080000000000000 + Small = 1.0 / (1 << 28) // 2**-28 + ) + // special cases + switch { + case IsNaN(x): + return NaN() + case IsInf(x, 1): + return 1 + case IsInf(x, -1): + return -1 + } + sign := false + if x < 0 { + x = -x + sign = true + } + if x < 0.84375 { // |x| < 0.84375 + var temp float64 + if x < Small { // |x| < 2**-28 + if x < VeryTiny { + temp = 0.125 * (8.0*x + efx8*x) // avoid underflow + } else { + temp = x + efx*x + } + } else { + z := x * x + r := pp0 + z*(pp1+z*(pp2+z*(pp3+z*pp4))) + s := 1 + z*(qq1+z*(qq2+z*(qq3+z*(qq4+z*qq5)))) + y := r / s + temp = x + x*y + } + if sign { + return -temp + } + return temp + } + if x < 1.25 { // 0.84375 <= |x| < 1.25 + s := x - 1 + P := pa0 + s*(pa1+s*(pa2+s*(pa3+s*(pa4+s*(pa5+s*pa6))))) + Q := 1 + s*(qa1+s*(qa2+s*(qa3+s*(qa4+s*(qa5+s*qa6))))) + if sign { + return -erx - P/Q + } + return erx + P/Q + } + if x >= 6 { // inf > |x| >= 6 + if sign { + return -1 + } + return 1 + } + s := 1 / (x * x) + var R, S float64 + if x < 1/0.35 { // |x| < 1 / 0.35 ~ 2.857143 + R = ra0 + s*(ra1+s*(ra2+s*(ra3+s*(ra4+s*(ra5+s*(ra6+s*ra7)))))) + S = 1 + s*(sa1+s*(sa2+s*(sa3+s*(sa4+s*(sa5+s*(sa6+s*(sa7+s*sa8))))))) + } else { // |x| >= 1 / 0.35 ~ 2.857143 + R = rb0 + s*(rb1+s*(rb2+s*(rb3+s*(rb4+s*(rb5+s*rb6))))) + S = 1 + s*(sb1+s*(sb2+s*(sb3+s*(sb4+s*(sb5+s*(sb6+s*sb7)))))) + } + z := Float64frombits(Float64bits(x) & 0xffffffff00000000) // pseudo-single (20-bit) precision x + r := Exp(-z*z-0.5625) * Exp((z-x)*(z+x)+R/S) + if sign { + return r/x - 1 + } + return 1 - r/x +} + +// Erfc returns the complementary error function of x. +// +// Special cases are: +// Erfc(+Inf) = 0 +// Erfc(-Inf) = 2 +// Erfc(NaN) = NaN +func Erfc(x float64) float64 { + if haveArchErfc { + return archErfc(x) + } + return erfc(x) +} + +func erfc(x float64) float64 { + const Tiny = 1.0 / (1 << 56) // 2**-56 + // special cases + switch { + case IsNaN(x): + return NaN() + case IsInf(x, 1): + return 0 + case IsInf(x, -1): + return 2 + } + sign := false + if x < 0 { + x = -x + sign = true + } + if x < 0.84375 { // |x| < 0.84375 + var temp float64 + if x < Tiny { // |x| < 2**-56 + temp = x + } else { + z := x * x + r := pp0 + z*(pp1+z*(pp2+z*(pp3+z*pp4))) + s := 1 + z*(qq1+z*(qq2+z*(qq3+z*(qq4+z*qq5)))) + y := r / s + if x < 0.25 { // |x| < 1/4 + temp = x + x*y + } else { + temp = 0.5 + (x*y + (x - 0.5)) + } + } + if sign { + return 1 + temp + } + return 1 - temp + } + if x < 1.25 { // 0.84375 <= |x| < 1.25 + s := x - 1 + P := pa0 + s*(pa1+s*(pa2+s*(pa3+s*(pa4+s*(pa5+s*pa6))))) + Q := 1 + s*(qa1+s*(qa2+s*(qa3+s*(qa4+s*(qa5+s*qa6))))) + if sign { + return 1 + erx + P/Q + } + return 1 - erx - P/Q + + } + if x < 28 { // |x| < 28 + s := 1 / (x * x) + var R, S float64 + if x < 1/0.35 { // |x| < 1 / 0.35 ~ 2.857143 + R = ra0 + s*(ra1+s*(ra2+s*(ra3+s*(ra4+s*(ra5+s*(ra6+s*ra7)))))) + S = 1 + s*(sa1+s*(sa2+s*(sa3+s*(sa4+s*(sa5+s*(sa6+s*(sa7+s*sa8))))))) + } else { // |x| >= 1 / 0.35 ~ 2.857143 + if sign && x > 6 { + return 2 // x < -6 + } + R = rb0 + s*(rb1+s*(rb2+s*(rb3+s*(rb4+s*(rb5+s*rb6))))) + S = 1 + s*(sb1+s*(sb2+s*(sb3+s*(sb4+s*(sb5+s*(sb6+s*sb7)))))) + } + z := Float64frombits(Float64bits(x) & 0xffffffff00000000) // pseudo-single (20-bit) precision x + r := Exp(-z*z-0.5625) * Exp((z-x)*(z+x)+R/S) + if sign { + return 2 - r/x + } + return r / x + } + if sign { + return 2 + } + return 0 +} diff --git a/src/math/erf_s390x.s b/src/math/erf_s390x.s new file mode 100644 index 0000000..99ab436 --- /dev/null +++ b/src/math/erf_s390x.s @@ -0,0 +1,293 @@ +// Copyright 2017 The Go Authors. All rights reserved. +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. + +#include "textflag.h" + +// Minimax polynomial coefficients and other constants +DATA ·erfrodataL13<> + 0(SB)/8, $0.243673229298474689E+01 +DATA ·erfrodataL13<> + 8(SB)/8, $-.654905018503145600E+00 +DATA ·erfrodataL13<> + 16(SB)/8, $0.404669310217538718E+01 +DATA ·erfrodataL13<> + 24(SB)/8, $-.564189219162765367E+00 +DATA ·erfrodataL13<> + 32(SB)/8, $-.200104300906596851E+01 +DATA ·erfrodataL13<> + 40(SB)/8, $0.5 +DATA ·erfrodataL13<> + 48(SB)/8, $0.144070097650207154E+00 +DATA ·erfrodataL13<> + 56(SB)/8, $-.116697735205906191E+00 +DATA ·erfrodataL13<> + 64(SB)/8, $0.256847684882319665E-01 +DATA ·erfrodataL13<> + 72(SB)/8, $-.510805169106229148E-02 +DATA ·erfrodataL13<> + 80(SB)/8, $0.885258164825590267E-03 +DATA ·erfrodataL13<> + 88(SB)/8, $-.133861989591931411E-03 +DATA ·erfrodataL13<> + 96(SB)/8, $0.178294867340272534E-04 +DATA ·erfrodataL13<> + 104(SB)/8, $-.211436095674019218E-05 +DATA ·erfrodataL13<> + 112(SB)/8, $0.225503753499344434E-06 +DATA ·erfrodataL13<> + 120(SB)/8, $-.218247939190783624E-07 +DATA ·erfrodataL13<> + 128(SB)/8, $0.193179206264594029E-08 +DATA ·erfrodataL13<> + 136(SB)/8, $-.157440643541715319E-09 +DATA ·erfrodataL13<> + 144(SB)/8, $0.118878583237342616E-10 +DATA ·erfrodataL13<> + 152(SB)/8, $0.554289288424588473E-13 +DATA ·erfrodataL13<> + 160(SB)/8, $-.277649758489502214E-14 +DATA ·erfrodataL13<> + 168(SB)/8, $-.839318416990049443E-12 +DATA ·erfrodataL13<> + 176(SB)/8, $-2.25 +DATA ·erfrodataL13<> + 184(SB)/8, $.12837916709551258632 +DATA ·erfrodataL13<> + 192(SB)/8, $1.0 +DATA ·erfrodataL13<> + 200(SB)/8, $0.500000000000004237e+00 +DATA ·erfrodataL13<> + 208(SB)/8, $1.0 +DATA ·erfrodataL13<> + 216(SB)/8, $0.416666664838056960e-01 +DATA ·erfrodataL13<> + 224(SB)/8, $0.166666666630345592e+00 +DATA ·erfrodataL13<> + 232(SB)/8, $0.138926439368309441e-02 +DATA ·erfrodataL13<> + 240(SB)/8, $0.833349307718286047e-02 +DATA ·erfrodataL13<> + 248(SB)/8, $-.693147180559945286e+00 +DATA ·erfrodataL13<> + 256(SB)/8, $-.144269504088896339e+01 +DATA ·erfrodataL13<> + 264(SB)/8, $281475245147134.9375 +DATA ·erfrodataL13<> + 272(SB)/8, $0.358256136398192529E+01 +DATA ·erfrodataL13<> + 280(SB)/8, $-.554084396500738270E+00 +DATA ·erfrodataL13<> + 288(SB)/8, $0.203630123025312046E+02 +DATA ·erfrodataL13<> + 296(SB)/8, $-.735750304705934424E+01 +DATA ·erfrodataL13<> + 304(SB)/8, $0.250491598091071797E+02 +DATA ·erfrodataL13<> + 312(SB)/8, $-.118955882760959931E+02 +DATA ·erfrodataL13<> + 320(SB)/8, $0.942903335085524187E+01 +DATA ·erfrodataL13<> + 328(SB)/8, $-.564189522219085689E+00 +DATA ·erfrodataL13<> + 336(SB)/8, $-.503767199403555540E+01 +DATA ·erfrodataL13<> + 344(SB)/8, $0xbbc79ca10c924223 +DATA ·erfrodataL13<> + 352(SB)/8, $0.004099975562609307E+01 +DATA ·erfrodataL13<> + 360(SB)/8, $-.324434353381296556E+00 +DATA ·erfrodataL13<> + 368(SB)/8, $0.945204812084476250E-01 +DATA ·erfrodataL13<> + 376(SB)/8, $-.221407443830058214E-01 +DATA ·erfrodataL13<> + 384(SB)/8, $0.426072376238804349E-02 +DATA ·erfrodataL13<> + 392(SB)/8, $-.692229229127016977E-03 +DATA ·erfrodataL13<> + 400(SB)/8, $0.971111253652087188E-04 +DATA ·erfrodataL13<> + 408(SB)/8, $-.119752226272050504E-04 +DATA ·erfrodataL13<> + 416(SB)/8, $0.131662993588532278E-05 +DATA ·erfrodataL13<> + 424(SB)/8, $0.115776482315851236E-07 +DATA ·erfrodataL13<> + 432(SB)/8, $-.780118522218151687E-09 +DATA ·erfrodataL13<> + 440(SB)/8, $-.130465975877241088E-06 +DATA ·erfrodataL13<> + 448(SB)/8, $-0.25 +GLOBL ·erfrodataL13<> + 0(SB), RODATA, $456 + +// Table of log correction terms +DATA ·erftab2066<> + 0(SB)/8, $0.442737824274138381e-01 +DATA ·erftab2066<> + 8(SB)/8, $0.263602189790660309e-01 +DATA ·erftab2066<> + 16(SB)/8, $0.122565642281703586e-01 +DATA ·erftab2066<> + 24(SB)/8, $0.143757052860721398e-02 +DATA ·erftab2066<> + 32(SB)/8, $-.651375034121276075e-02 +DATA ·erftab2066<> + 40(SB)/8, $-.119317678849450159e-01 +DATA ·erftab2066<> + 48(SB)/8, $-.150868749549871069e-01 +DATA ·erftab2066<> + 56(SB)/8, $-.161992609578469234e-01 +DATA ·erftab2066<> + 64(SB)/8, $-.154492360403337917e-01 +DATA ·erftab2066<> + 72(SB)/8, $-.129850717389178721e-01 +DATA ·erftab2066<> + 80(SB)/8, $-.892902649276657891e-02 +DATA ·erftab2066<> + 88(SB)/8, $-.338202636596794887e-02 +DATA ·erftab2066<> + 96(SB)/8, $0.357266307045684762e-02 +DATA ·erftab2066<> + 104(SB)/8, $0.118665304327406698e-01 +DATA ·erftab2066<> + 112(SB)/8, $0.214434994118118914e-01 +DATA ·erftab2066<> + 120(SB)/8, $0.322580645161290314e-01 +GLOBL ·erftab2066<> + 0(SB), RODATA, $128 + +// Table of +/- 1.0 +DATA ·erftab12067<> + 0(SB)/8, $1.0 +DATA ·erftab12067<> + 8(SB)/8, $-1.0 +GLOBL ·erftab12067<> + 0(SB), RODATA, $16 + +// Erf returns the error function of the argument. +// +// Special cases are: +// Erf(+Inf) = 1 +// Erf(-Inf) = -1 +// Erf(NaN) = NaN +// The algorithm used is minimax polynomial approximation +// with coefficients determined with a Remez exchange algorithm. + +TEXT ·erfAsm(SB), NOSPLIT, $0-16 + FMOVD x+0(FP), F0 + MOVD $·erfrodataL13<>+0(SB), R5 + LGDR F0, R1 + FMOVD F0, F6 + SRAD $48, R1 + MOVH $16383, R3 + RISBGZ $49, $63, $0, R1, R2 + MOVW R2, R6 + MOVW R3, R7 + CMPBGT R6, R7, L2 + MOVH $12287, R1 + MOVW R1, R7 + CMPBLE R6, R7 ,L12 + MOVH $16367, R1 + MOVW R1, R7 + CMPBGT R6, R7, L5 + FMOVD 448(R5), F4 + FMADD F0, F0, F4 + FMOVD 440(R5), F3 + WFMDB V4, V4, V2 + FMOVD 432(R5), F0 + FMOVD 424(R5), F1 + WFMADB V2, V0, V3, V0 + FMOVD 416(R5), F3 + WFMADB V2, V1, V3, V1 + FMOVD 408(R5), F5 + FMOVD 400(R5), F3 + WFMADB V2, V0, V5, V0 + WFMADB V2, V1, V3, V1 + FMOVD 392(R5), F5 + FMOVD 384(R5), F3 + WFMADB V2, V0, V5, V0 + WFMADB V2, V1, V3, V1 + FMOVD 376(R5), F5 + FMOVD 368(R5), F3 + WFMADB V2, V0, V5, V0 + WFMADB V2, V1, V3, V1 + FMOVD 360(R5), F5 + FMOVD 352(R5), F3 + WFMADB V2, V0, V5, V0 + WFMADB V2, V1, V3, V2 + WFMADB V4, V0, V2, V0 + WFMADB V6, V0, V6, V0 +L1: + FMOVD F0, ret+8(FP) + RET +L2: + MOVH R1, R1 + MOVH $16407, R3 + SRW $31, R1, R1 + MOVW R2, R6 + MOVW R3, R7 + CMPBLE R6, R7, L6 + MOVW R1, R1 + SLD $3, R1, R1 + MOVD $·erftab12067<>+0(SB), R3 + WORD $0x68013000 //ld %f0,0(%r1,%r3) + MOVH $32751, R1 + MOVW R1, R7 + CMPBGT R6, R7, L7 + FMOVD 344(R5), F2 + FMADD F2, F0, F0 +L7: + WFCEDBS V6, V6, V2 + BEQ L1 + FMOVD F6, F0 + FMOVD F0, ret+8(FP) + RET + +L6: + MOVW R1, R1 + SLD $3, R1, R1 + MOVD $·erftab12067<>+0(SB), R4 + WFMDB V0, V0, V1 + MOVH $0x0, R3 + WORD $0x68014000 //ld %f0,0(%r1,%r4) + MOVH $16399, R1 + MOVW R2, R6 + MOVW R1, R7 + CMPBGT R6, R7, L8 + FMOVD 336(R5), F3 + FMOVD 328(R5), F2 + FMOVD F1, F4 + WFMADB V1, V2, V3, V2 + WORD $0xED405140 //adb %f4,.L30-.L13(%r5) + BYTE $0x00 + BYTE $0x1A + FMOVD 312(R5), F3 + WFMADB V1, V2, V3, V2 + FMOVD 304(R5), F3 + WFMADB V1, V4, V3, V4 + FMOVD 296(R5), F3 + WFMADB V1, V2, V3, V2 + FMOVD 288(R5), F3 + WFMADB V1, V4, V3, V4 + FMOVD 280(R5), F3 + WFMADB V1, V2, V3, V2 + FMOVD 272(R5), F3 + WFMADB V1, V4, V3, V4 +L9: + FMOVD 264(R5), F3 + FMUL F4, F6 + FMOVD 256(R5), F4 + WFMADB V1, V4, V3, V4 + FDIV F6, F2 + LGDR F4, R1 + FSUB F3, F4 + FMOVD 248(R5), F6 + WFMSDB V4, V6, V1, V4 + FMOVD 240(R5), F1 + FMOVD 232(R5), F6 + WFMADB V4, V6, V1, V6 + FMOVD 224(R5), F1 + FMOVD 216(R5), F3 + WFMADB V4, V3, V1, V3 + WFMDB V4, V4, V1 + FMOVD 208(R5), F5 + WFMADB V6, V1, V3, V6 + FMOVD 200(R5), F3 + MOVH R1,R1 + WFMADB V4, V3, V5, V3 + RISBGZ $57, $60, $3, R1, R2 + WFMADB V1, V6, V3, V6 + RISBGN $0, $15, $48, R1, R3 + MOVD $·erftab2066<>+0(SB), R1 + FMOVD 192(R5), F1 + LDGR R3, F3 + WORD $0xED221000 //madb %f2,%f2,0(%r2,%r1) + BYTE $0x20 + BYTE $0x1E + WFMADB V4, V6, V1, V4 + FMUL F3, F2 + FMADD F4, F2, F0 + FMOVD F0, ret+8(FP) + RET +L12: + FMOVD 184(R5), F0 + WFMADB V6, V0, V6, V0 + FMOVD F0, ret+8(FP) + RET +L5: + FMOVD 176(R5), F1 + FMADD F0, F0, F1 + FMOVD 168(R5), F3 + WFMDB V1, V1, V2 + FMOVD 160(R5), F0 + FMOVD 152(R5), F4 + WFMADB V2, V0, V3, V0 + FMOVD 144(R5), F3 + WFMADB V2, V4, V3, V4 + FMOVD 136(R5), F5 + FMOVD 128(R5), F3 + WFMADB V2, V0, V5, V0 + WFMADB V2, V4, V3, V4 + FMOVD 120(R5), F5 + FMOVD 112(R5), F3 + WFMADB V2, V0, V5, V0 + WFMADB V2, V4, V3, V4 + FMOVD 104(R5), F5 + FMOVD 96(R5), F3 + WFMADB V2, V0, V5, V0 + WFMADB V2, V4, V3, V4 + FMOVD 88(R5), F5 + FMOVD 80(R5), F3 + WFMADB V2, V0, V5, V0 + WFMADB V2, V4, V3, V4 + FMOVD 72(R5), F5 + FMOVD 64(R5), F3 + WFMADB V2, V0, V5, V0 + WFMADB V2, V4, V3, V4 + FMOVD 56(R5), F5 + FMOVD 48(R5), F3 + WFMADB V2, V0, V5, V0 + WFMADB V2, V4, V3, V2 + FMOVD 40(R5), F4 + WFMADB V1, V0, V2, V0 + FMUL F6, F0 + FMADD F4, F6, F0 + FMOVD F0, ret+8(FP) + RET +L8: + FMOVD 32(R5), F3 + FMOVD 24(R5), F2 + FMOVD F1, F4 + WFMADB V1, V2, V3, V2 + WORD $0xED405010 //adb %f4,.L68-.L13(%r5) + BYTE $0x00 + BYTE $0x1A + FMOVD 8(R5), F3 + WFMADB V1, V2, V3, V2 + FMOVD ·erfrodataL13<>+0(SB), F3 + WFMADB V1, V4, V3, V4 + BR L9 diff --git a/src/math/erfc_s390x.s b/src/math/erfc_s390x.s new file mode 100644 index 0000000..7e9d469 --- /dev/null +++ b/src/math/erfc_s390x.s @@ -0,0 +1,527 @@ +// Copyright 2017 The Go Authors. All rights reserved. +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. + +#include "textflag.h" + +#define Neg2p11 0xC000E147AE147AE1 +#define Pos15 0x402E + +// Minimax polynomial coefficients and other constants +DATA ·erfcrodataL38<> + 0(SB)/8, $.234875460637085087E-01 +DATA ·erfcrodataL38<> + 8(SB)/8, $.234469449299256284E-01 +DATA ·erfcrodataL38<> + 16(SB)/8, $-.606918710392844955E-04 +DATA ·erfcrodataL38<> + 24(SB)/8, $-.198827088077636213E-04 +DATA ·erfcrodataL38<> + 32(SB)/8, $.257805645845475331E-06 +DATA ·erfcrodataL38<> + 40(SB)/8, $-.184427218110620284E-09 +DATA ·erfcrodataL38<> + 48(SB)/8, $.122408098288933181E-10 +DATA ·erfcrodataL38<> + 56(SB)/8, $.484691106751495392E-07 +DATA ·erfcrodataL38<> + 64(SB)/8, $-.150147637632890281E-08 +DATA ·erfcrodataL38<> + 72(SB)/8, $23.999999999973521625 +DATA ·erfcrodataL38<> + 80(SB)/8, $27.226017111108365754 +DATA ·erfcrodataL38<> + 88(SB)/8, $-2.0 +DATA ·erfcrodataL38<> + 96(SB)/8, $0.100108802034478228E+00 +DATA ·erfcrodataL38<> + 104(SB)/8, $0.244588413746558125E+00 +DATA ·erfcrodataL38<> + 112(SB)/8, $-.669188879646637174E-01 +DATA ·erfcrodataL38<> + 120(SB)/8, $0.151311447000953551E-01 +DATA ·erfcrodataL38<> + 128(SB)/8, $-.284720833493302061E-02 +DATA ·erfcrodataL38<> + 136(SB)/8, $0.455491239358743212E-03 +DATA ·erfcrodataL38<> + 144(SB)/8, $-.631850539280720949E-04 +DATA ·erfcrodataL38<> + 152(SB)/8, $0.772532660726086679E-05 +DATA ·erfcrodataL38<> + 160(SB)/8, $-.843706007150936940E-06 +DATA ·erfcrodataL38<> + 168(SB)/8, $-.735330214904227472E-08 +DATA ·erfcrodataL38<> + 176(SB)/8, $0.753002008837084967E-09 +DATA ·erfcrodataL38<> + 184(SB)/8, $0.832482036660624637E-07 +DATA ·erfcrodataL38<> + 192(SB)/8, $-0.75 +DATA ·erfcrodataL38<> + 200(SB)/8, $.927765678007128609E-01 +DATA ·erfcrodataL38<> + 208(SB)/8, $.903621209344751506E-01 +DATA ·erfcrodataL38<> + 216(SB)/8, $-.344203375025257265E-02 +DATA ·erfcrodataL38<> + 224(SB)/8, $-.869243428221791329E-03 +DATA ·erfcrodataL38<> + 232(SB)/8, $.174699813107105603E-03 +DATA ·erfcrodataL38<> + 240(SB)/8, $.649481036316130000E-05 +DATA ·erfcrodataL38<> + 248(SB)/8, $-.895265844897118382E-05 +DATA ·erfcrodataL38<> + 256(SB)/8, $.135970046909529513E-05 +DATA ·erfcrodataL38<> + 264(SB)/8, $.277617717014748015E-06 +DATA ·erfcrodataL38<> + 272(SB)/8, $.810628018408232910E-08 +DATA ·erfcrodataL38<> + 280(SB)/8, $.210430084693497985E-07 +DATA ·erfcrodataL38<> + 288(SB)/8, $-.342138077525615091E-08 +DATA ·erfcrodataL38<> + 296(SB)/8, $-.165467946798610800E-06 +DATA ·erfcrodataL38<> + 304(SB)/8, $5.999999999988412824 +DATA ·erfcrodataL38<> + 312(SB)/8, $.468542210149072159E-01 +DATA ·erfcrodataL38<> + 320(SB)/8, $.465343528567604256E-01 +DATA ·erfcrodataL38<> + 328(SB)/8, $-.473338083650201733E-03 +DATA ·erfcrodataL38<> + 336(SB)/8, $-.147220659069079156E-03 +DATA ·erfcrodataL38<> + 344(SB)/8, $.755284723554388339E-05 +DATA ·erfcrodataL38<> + 352(SB)/8, $.116158570631428789E-05 +DATA ·erfcrodataL38<> + 360(SB)/8, $-.155445501551602389E-06 +DATA ·erfcrodataL38<> + 368(SB)/8, $-.616940119847805046E-10 +DATA ·erfcrodataL38<> + 376(SB)/8, $-.728705590727563158E-10 +DATA ·erfcrodataL38<> + 384(SB)/8, $-.983452460354586779E-08 +DATA ·erfcrodataL38<> + 392(SB)/8, $.365156164194346316E-08 +DATA ·erfcrodataL38<> + 400(SB)/8, $11.999999999996530775 +DATA ·erfcrodataL38<> + 408(SB)/8, $0.467773498104726584E-02 +DATA ·erfcrodataL38<> + 416(SB)/8, $0.206669853540920535E-01 +DATA ·erfcrodataL38<> + 424(SB)/8, $0.413339707081841473E-01 +DATA ·erfcrodataL38<> + 432(SB)/8, $0.482229658262131320E-01 +DATA ·erfcrodataL38<> + 440(SB)/8, $0.344449755901841897E-01 +DATA ·erfcrodataL38<> + 448(SB)/8, $0.130890907240765465E-01 +DATA ·erfcrodataL38<> + 456(SB)/8, $-.459266344100642687E-03 +DATA ·erfcrodataL38<> + 464(SB)/8, $-.337888800856913728E-02 +DATA ·erfcrodataL38<> + 472(SB)/8, $-.159103061687062373E-02 +DATA ·erfcrodataL38<> + 480(SB)/8, $-.501128905515922644E-04 +DATA ·erfcrodataL38<> + 488(SB)/8, $0.262775855852903132E-03 +DATA ·erfcrodataL38<> + 496(SB)/8, $0.103860982197462436E-03 +DATA ·erfcrodataL38<> + 504(SB)/8, $-.548835785414200775E-05 +DATA ·erfcrodataL38<> + 512(SB)/8, $-.157075054646618214E-04 +DATA ·erfcrodataL38<> + 520(SB)/8, $-.480056366276045110E-05 +DATA ·erfcrodataL38<> + 528(SB)/8, $0.198263013759701555E-05 +DATA ·erfcrodataL38<> + 536(SB)/8, $-.224394262958888780E-06 +DATA ·erfcrodataL38<> + 544(SB)/8, $-.321853693146683428E-06 +DATA ·erfcrodataL38<> + 552(SB)/8, $0.445073894984683537E-07 +DATA ·erfcrodataL38<> + 560(SB)/8, $0.660425940000555729E-06 +DATA ·erfcrodataL38<> + 568(SB)/8, $2.0 +DATA ·erfcrodataL38<> + 576(SB)/8, $8.63616855509444462538e-78 +DATA ·erfcrodataL38<> + 584(SB)/8, $1.00000000000000222044 +DATA ·erfcrodataL38<> + 592(SB)/8, $0.500000000000004237e+00 +DATA ·erfcrodataL38<> + 600(SB)/8, $0.416666664838056960e-01 +DATA ·erfcrodataL38<> + 608(SB)/8, $0.166666666630345592e+00 +DATA ·erfcrodataL38<> + 616(SB)/8, $0.138926439368309441e-02 +DATA ·erfcrodataL38<> + 624(SB)/8, $0.833349307718286047e-02 +DATA ·erfcrodataL38<> + 632(SB)/8, $-.693147180558298714e+00 +DATA ·erfcrodataL38<> + 640(SB)/8, $-.164659495826017651e-11 +DATA ·erfcrodataL38<> + 648(SB)/8, $.179001151181866548E+00 +DATA ·erfcrodataL38<> + 656(SB)/8, $-.144269504088896339e+01 +DATA ·erfcrodataL38<> + 664(SB)/8, $+281475245147134.9375 +DATA ·erfcrodataL38<> + 672(SB)/8, $.163116780021877404E+00 +DATA ·erfcrodataL38<> + 680(SB)/8, $-.201574395828120710E-01 +DATA ·erfcrodataL38<> + 688(SB)/8, $-.185726336009394125E-02 +DATA ·erfcrodataL38<> + 696(SB)/8, $.199349204957273749E-02 +DATA ·erfcrodataL38<> + 704(SB)/8, $-.554902415532606242E-03 +DATA ·erfcrodataL38<> + 712(SB)/8, $-.638914789660242846E-05 +DATA ·erfcrodataL38<> + 720(SB)/8, $-.424441522653742898E-04 +DATA ·erfcrodataL38<> + 728(SB)/8, $.827967511921486190E-04 +DATA ·erfcrodataL38<> + 736(SB)/8, $.913965446284062654E-05 +DATA ·erfcrodataL38<> + 744(SB)/8, $.277344791076320853E-05 +DATA ·erfcrodataL38<> + 752(SB)/8, $-.467239678927239526E-06 +DATA ·erfcrodataL38<> + 760(SB)/8, $.344814065920419986E-07 +DATA ·erfcrodataL38<> + 768(SB)/8, $-.366013491552527132E-05 +DATA ·erfcrodataL38<> + 776(SB)/8, $.181242810023783439E-05 +DATA ·erfcrodataL38<> + 784(SB)/8, $2.999999999991234567 +DATA ·erfcrodataL38<> + 792(SB)/8, $1.0 +GLOBL ·erfcrodataL38<> + 0(SB), RODATA, $800 + +// Table of log correction terms +DATA ·erfctab2069<> + 0(SB)/8, $0.442737824274138381e-01 +DATA ·erfctab2069<> + 8(SB)/8, $0.263602189790660309e-01 +DATA ·erfctab2069<> + 16(SB)/8, $0.122565642281703586e-01 +DATA ·erfctab2069<> + 24(SB)/8, $0.143757052860721398e-02 +DATA ·erfctab2069<> + 32(SB)/8, $-.651375034121276075e-02 +DATA ·erfctab2069<> + 40(SB)/8, $-.119317678849450159e-01 +DATA ·erfctab2069<> + 48(SB)/8, $-.150868749549871069e-01 +DATA ·erfctab2069<> + 56(SB)/8, $-.161992609578469234e-01 +DATA ·erfctab2069<> + 64(SB)/8, $-.154492360403337917e-01 +DATA ·erfctab2069<> + 72(SB)/8, $-.129850717389178721e-01 +DATA ·erfctab2069<> + 80(SB)/8, $-.892902649276657891e-02 +DATA ·erfctab2069<> + 88(SB)/8, $-.338202636596794887e-02 +DATA ·erfctab2069<> + 96(SB)/8, $0.357266307045684762e-02 +DATA ·erfctab2069<> + 104(SB)/8, $0.118665304327406698e-01 +DATA ·erfctab2069<> + 112(SB)/8, $0.214434994118118914e-01 +DATA ·erfctab2069<> + 120(SB)/8, $0.322580645161290314e-01 +GLOBL ·erfctab2069<> + 0(SB), RODATA, $128 + +// Erfc returns the complementary error function of the argument. +// +// Special cases are: +// Erfc(+Inf) = 0 +// Erfc(-Inf) = 2 +// Erfc(NaN) = NaN +// The algorithm used is minimax polynomial approximation +// with coefficients determined with a Remez exchange algorithm. +// This assembly implementation handles inputs in the range [-2.11, +15]. +// For all other inputs we call the generic Go implementation. + +TEXT ·erfcAsm(SB), NOSPLIT|NOFRAME, $0-16 + MOVD x+0(FP), R1 + MOVD $Neg2p11, R2 + CMPUBGT R1, R2, usego + + FMOVD x+0(FP), F0 + MOVD $·erfcrodataL38<>+0(SB), R9 + FMOVD F0, F2 + SRAD $48, R1 + MOVH R1, R2 + ANDW $0x7FFF, R1 + MOVH $Pos15, R3 + CMPW R1, R3 + BGT usego + MOVH $0x3FFF, R3 + MOVW R1, R6 + MOVW R3, R7 + CMPBGT R6, R7, L2 + MOVH $0x3FEF, R3 + MOVW R3, R7 + CMPBGT R6, R7, L3 + MOVH $0x2FFF, R2 + MOVW R2, R7 + CMPBGT R6, R7, L4 + FMOVD 792(R9), F0 + WFSDB V2, V0, V2 + FMOVD F2, ret+8(FP) + RET + +L2: + LTDBR F0, F0 + MOVH $0x0, R4 + BLTU L3 + FMOVD F0, F1 +L9: + MOVH $0x400F, R3 + MOVW R1, R6 + MOVW R3, R7 + CMPBGT R6, R7, L10 + FMOVD 784(R9), F3 + FSUB F1, F3 + VLEG $0, 776(R9), V20 + WFDDB V1, V3, V6 + VLEG $0, 768(R9), V18 + FMOVD 760(R9), F7 + FMOVD 752(R9), F5 + VLEG $0, 744(R9), V16 + FMOVD 736(R9), F3 + FMOVD 728(R9), F2 + FMOVD 720(R9), F4 + WFMDB V6, V6, V1 + FMUL F0, F0 + MOVH $0x0, R3 + WFMADB V1, V7, V20, V7 + WFMADB V1, V5, V18, V5 + WFMADB V1, V7, V16, V7 + WFMADB V1, V5, V3, V5 + WFMADB V1, V7, V4, V7 + WFMADB V1, V5, V2, V5 + FMOVD 712(R9), F2 + WFMADB V1, V7, V2, V7 + FMOVD 704(R9), F2 + WFMADB V1, V5, V2, V5 + FMOVD 696(R9), F2 + WFMADB V1, V7, V2, V7 + FMOVD 688(R9), F2 + MOVH $0x0, R1 + WFMADB V1, V5, V2, V5 + FMOVD 680(R9), F2 + WFMADB V1, V7, V2, V7 + FMOVD 672(R9), F2 + WFMADB V1, V5, V2, V1 + FMOVD 664(R9), F3 + WFMADB V6, V7, V1, V7 + FMOVD 656(R9), F5 + FMOVD 648(R9), F2 + WFMADB V0, V5, V3, V5 + WFMADB V6, V7, V2, V7 +L11: + LGDR F5, R6 + WFSDB V0, V0, V2 + WORD $0xED509298 //sdb %f5,.L55-.L38(%r9) + BYTE $0x00 + BYTE $0x1B + FMOVD 640(R9), F6 + FMOVD 632(R9), F4 + WFMSDB V5, V6, V2, V6 + WFMSDB V5, V4, V0, V4 + FMOVD 624(R9), F2 + FADD F6, F4 + FMOVD 616(R9), F0 + FMOVD 608(R9), F6 + WFMADB V4, V0, V2, V0 + FMOVD 600(R9), F3 + WFMDB V4, V4, V2 + MOVH R6,R6 + ADD R6, R3 + WFMADB V4, V3, V6, V3 + FMOVD 592(R9), F6 + WFMADB V0, V2, V3, V0 + FMOVD 584(R9), F3 + WFMADB V4, V6, V3, V6 + RISBGZ $57, $60, $3, R3, R12 + WFMADB V2, V0, V6, V0 + MOVD $·erfctab2069<>+0(SB), R5 + WORD $0x682C5000 //ld %f2,0(%r12,%r5) + FMADD F2, F4, F4 + RISBGN $0, $15, $48, R3, R4 + WFMADB V4, V0, V2, V4 + LDGR R4, F2 + FMADD F4, F2, F2 + MOVW R2, R6 + CMPBLE R6, $0, L20 + MOVW R1, R6 + CMPBEQ R6, $0, L21 + WORD $0xED709240 //mdb %f7,.L66-.L38(%r9) + BYTE $0x00 + BYTE $0x1C +L21: + FMUL F7, F2 +L1: + FMOVD F2, ret+8(FP) + RET +L3: + LTDBR F0, F0 + BLTU L30 + FMOVD 568(R9), F2 + WFSDB V0, V2, V0 +L8: + WFMDB V0, V0, V4 + FMOVD 560(R9), F2 + FMOVD 552(R9), F6 + FMOVD 544(R9), F1 + WFMADB V4, V6, V2, V6 + FMOVD 536(R9), F2 + WFMADB V4, V1, V2, V1 + FMOVD 528(R9), F3 + FMOVD 520(R9), F2 + WFMADB V4, V6, V3, V6 + WFMADB V4, V1, V2, V1 + FMOVD 512(R9), F3 + FMOVD 504(R9), F2 + WFMADB V4, V6, V3, V6 + WFMADB V4, V1, V2, V1 + FMOVD 496(R9), F3 + FMOVD 488(R9), F2 + WFMADB V4, V6, V3, V6 + WFMADB V4, V1, V2, V1 + FMOVD 480(R9), F3 + FMOVD 472(R9), F2 + WFMADB V4, V6, V3, V6 + WFMADB V4, V1, V2, V1 + FMOVD 464(R9), F3 + FMOVD 456(R9), F2 + WFMADB V4, V6, V3, V6 + WFMADB V4, V1, V2, V1 + FMOVD 448(R9), F3 + FMOVD 440(R9), F2 + WFMADB V4, V6, V3, V6 + WFMADB V4, V1, V2, V1 + FMOVD 432(R9), F3 + FMOVD 424(R9), F2 + WFMADB V4, V6, V3, V6 + WFMADB V4, V1, V2, V1 + FMOVD 416(R9), F3 + FMOVD 408(R9), F2 + WFMADB V4, V6, V3, V6 + FMADD F1, F4, F2 + FMADD F6, F0, F2 + MOVW R2, R6 + CMPBGE R6, $0, L1 + FMOVD 568(R9), F0 + WFSDB V2, V0, V2 + BR L1 +L10: + MOVH $0x401F, R3 + MOVW R1, R6 + MOVW R3, R7 + CMPBLE R6, R7, L36 + MOVH $0x402F, R3 + MOVW R3, R7 + CMPBGT R6, R7, L13 + FMOVD 400(R9), F3 + FSUB F1, F3 + VLEG $0, 392(R9), V20 + WFDDB V1, V3, V6 + VLEG $0, 384(R9), V18 + FMOVD 376(R9), F2 + FMOVD 368(R9), F4 + VLEG $0, 360(R9), V16 + FMOVD 352(R9), F7 + FMOVD 344(R9), F3 + FMUL F0, F0 + WFMDB V6, V6, V1 + FMOVD 656(R9), F5 + MOVH $0x0, R3 + WFMADB V1, V2, V20, V2 + WFMADB V1, V4, V18, V4 + WFMADB V1, V2, V16, V2 + WFMADB V1, V4, V7, V4 + WFMADB V1, V2, V3, V2 + FMOVD 336(R9), F3 + WFMADB V1, V4, V3, V4 + FMOVD 328(R9), F3 + WFMADB V1, V2, V3, V2 + FMOVD 320(R9), F3 + WFMADB V1, V4, V3, V1 + FMOVD 312(R9), F7 + WFMADB V6, V2, V1, V2 + MOVH $0x0, R1 + FMOVD 664(R9), F3 + FMADD F2, F6, F7 + WFMADB V0, V5, V3, V5 + BR L11 +L35: + WORD $0xB3130010 //lcdbr %f1,%f0 + BR L9 +L36: + FMOVD 304(R9), F3 + FSUB F1, F3 + VLEG $0, 296(R9), V20 + WFDDB V1, V3, V6 + FMOVD 288(R9), F5 + FMOVD 280(R9), F1 + FMOVD 272(R9), F2 + VLEG $0, 264(R9), V18 + VLEG $0, 256(R9), V16 + FMOVD 248(R9), F3 + FMOVD 240(R9), F4 + WFMDB V6, V6, V7 + FMUL F0, F0 + MOVH $0x0, R3 + FMADD F5, F7, F1 + WFMADB V7, V2, V20, V2 + WFMADB V7, V1, V18, V1 + WFMADB V7, V2, V16, V2 + WFMADB V7, V1, V3, V1 + WFMADB V7, V2, V4, V2 + FMOVD 232(R9), F4 + WFMADB V7, V1, V4, V1 + FMOVD 224(R9), F4 + WFMADB V7, V2, V4, V2 + FMOVD 216(R9), F4 + WFMADB V7, V1, V4, V1 + FMOVD 208(R9), F4 + MOVH $0x0, R1 + WFMADB V7, V2, V4, V7 + FMOVD 656(R9), F5 + WFMADB V6, V1, V7, V1 + FMOVD 664(R9), F3 + FMOVD 200(R9), F7 + WFMADB V0, V5, V3, V5 + FMADD F1, F6, F7 + BR L11 +L4: + FMOVD 192(R9), F1 + FMADD F0, F0, F1 + FMOVD 184(R9), F3 + WFMDB V1, V1, V0 + FMOVD 176(R9), F4 + FMOVD 168(R9), F6 + WFMADB V0, V4, V3, V4 + FMOVD 160(R9), F3 + WFMADB V0, V6, V3, V6 + FMOVD 152(R9), F5 + FMOVD 144(R9), F3 + WFMADB V0, V4, V5, V4 + WFMADB V0, V6, V3, V6 + FMOVD 136(R9), F5 + FMOVD 128(R9), F3 + WFMADB V0, V4, V5, V4 + WFMADB V0, V6, V3, V6 + FMOVD 120(R9), F5 + FMOVD 112(R9), F3 + WFMADB V0, V4, V5, V4 + WFMADB V0, V6, V3, V6 + FMOVD 104(R9), F5 + FMOVD 96(R9), F3 + WFMADB V0, V4, V5, V4 + WFMADB V0, V6, V3, V0 + FMOVD F2, F6 + FMADD F4, F1, F0 + WORD $0xED609318 //sdb %f6,.L39-.L38(%r9) + BYTE $0x00 + BYTE $0x1B + WFMSDB V2, V0, V6, V2 + FMOVD F2, ret+8(FP) + RET +L30: + WORD $0xED009238 //adb %f0,.L67-.L38(%r9) + BYTE $0x00 + BYTE $0x1A + BR L8 +L20: + FMOVD 88(R9), F0 + WFMADB V7, V2, V0, V2 + WORD $0xB3130022 //lcdbr %f2,%f2 + FMOVD F2, ret+8(FP) + RET +L13: + MOVH $0x403A, R3 + MOVW R1, R6 + MOVW R3, R7 + CMPBLE R6, R7, L4 + WORD $0xED109050 //cdb %f1,.L128-.L38(%r9) + BYTE $0x00 + BYTE $0x19 + BGE L37 + BVS L37 + FMOVD 72(R9), F6 + FSUB F1, F6 + MOVH $0x1000, R3 + FDIV F1, F6 + MOVH $0x1000, R1 +L17: + WFMDB V6, V6, V1 + FMOVD 64(R9), F2 + FMOVD 56(R9), F4 + FMOVD 48(R9), F3 + WFMADB V1, V3, V2, V3 + FMOVD 40(R9), F2 + WFMADB V1, V2, V4, V2 + FMOVD 32(R9), F4 + WFMADB V1, V3, V4, V3 + FMOVD 24(R9), F4 + WFMADB V1, V2, V4, V2 + FMOVD 16(R9), F4 + WFMADB V1, V3, V4, V3 + FMOVD 8(R9), F4 + WFMADB V1, V2, V4, V1 + FMUL F0, F0 + WFMADB V3, V6, V1, V3 + FMOVD 656(R9), F5 + FMOVD 664(R9), F4 + FMOVD 0(R9), F7 + WFMADB V0, V5, V4, V5 + FMADD F6, F3, F7 + BR L11 +L14: + FMOVD 72(R9), F6 + FSUB F1, F6 + MOVH $0x403A, R3 + FDIV F1, F6 + MOVW R1, R6 + MOVW R3, R7 + CMPBEQ R6, R7, L23 + MOVH $0x0, R3 + MOVH $0x0, R1 + BR L17 +L37: + WFCEDBS V0, V0, V0 + BVS L1 + MOVW R2, R6 + CMPBLE R6, $0, L18 + MOVH $0x7FEF, R2 + MOVW R1, R6 + MOVW R2, R7 + CMPBGT R6, R7, L24 + + WORD $0xA5400010 //iihh %r4,16 + LDGR R4, F2 + FMUL F2, F2 + BR L1 +L23: + MOVH $0x1000, R3 + MOVH $0x1000, R1 + BR L17 +L24: + FMOVD $0, F2 + BR L1 +L18: + MOVH $0x7FEF, R2 + MOVW R1, R6 + MOVW R2, R7 + CMPBGT R6, R7, L25 + WORD $0xA5408010 //iihh %r4,32784 + FMOVD 568(R9), F2 + LDGR R4, F0 + FMADD F2, F0, F2 + BR L1 +L25: + FMOVD 568(R9), F2 + BR L1 +usego: + BR ·erfc(SB) diff --git a/src/math/erfinv.go b/src/math/erfinv.go new file mode 100644 index 0000000..ee423d3 --- /dev/null +++ b/src/math/erfinv.go @@ -0,0 +1,127 @@ +// Copyright 2017 The Go Authors. All rights reserved. +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. + +package math + +/* + Inverse of the floating-point error function. +*/ + +// This implementation is based on the rational approximation +// of percentage points of normal distribution available from +// https://www.jstor.org/stable/2347330. + +const ( + // Coefficients for approximation to erf in |x| <= 0.85 + a0 = 1.1975323115670912564578e0 + a1 = 4.7072688112383978012285e1 + a2 = 6.9706266534389598238465e2 + a3 = 4.8548868893843886794648e3 + a4 = 1.6235862515167575384252e4 + a5 = 2.3782041382114385731252e4 + a6 = 1.1819493347062294404278e4 + a7 = 8.8709406962545514830200e2 + b0 = 1.0000000000000000000e0 + b1 = 4.2313330701600911252e1 + b2 = 6.8718700749205790830e2 + b3 = 5.3941960214247511077e3 + b4 = 2.1213794301586595867e4 + b5 = 3.9307895800092710610e4 + b6 = 2.8729085735721942674e4 + b7 = 5.2264952788528545610e3 + // Coefficients for approximation to erf in 0.85 < |x| <= 1-2*exp(-25) + c0 = 1.42343711074968357734e0 + c1 = 4.63033784615654529590e0 + c2 = 5.76949722146069140550e0 + c3 = 3.64784832476320460504e0 + c4 = 1.27045825245236838258e0 + c5 = 2.41780725177450611770e-1 + c6 = 2.27238449892691845833e-2 + c7 = 7.74545014278341407640e-4 + d0 = 1.4142135623730950488016887e0 + d1 = 2.9036514445419946173133295e0 + d2 = 2.3707661626024532365971225e0 + d3 = 9.7547832001787427186894837e-1 + d4 = 2.0945065210512749128288442e-1 + d5 = 2.1494160384252876777097297e-2 + d6 = 7.7441459065157709165577218e-4 + d7 = 1.4859850019840355905497876e-9 + // Coefficients for approximation to erf in 1-2*exp(-25) < |x| < 1 + e0 = 6.65790464350110377720e0 + e1 = 5.46378491116411436990e0 + e2 = 1.78482653991729133580e0 + e3 = 2.96560571828504891230e-1 + e4 = 2.65321895265761230930e-2 + e5 = 1.24266094738807843860e-3 + e6 = 2.71155556874348757815e-5 + e7 = 2.01033439929228813265e-7 + f0 = 1.414213562373095048801689e0 + f1 = 8.482908416595164588112026e-1 + f2 = 1.936480946950659106176712e-1 + f3 = 2.103693768272068968719679e-2 + f4 = 1.112800997078859844711555e-3 + f5 = 2.611088405080593625138020e-5 + f6 = 2.010321207683943062279931e-7 + f7 = 2.891024605872965461538222e-15 +) + +// Erfinv returns the inverse error function of x. +// +// Special cases are: +// Erfinv(1) = +Inf +// Erfinv(-1) = -Inf +// Erfinv(x) = NaN if x < -1 or x > 1 +// Erfinv(NaN) = NaN +func Erfinv(x float64) float64 { + // special cases + if IsNaN(x) || x <= -1 || x >= 1 { + if x == -1 || x == 1 { + return Inf(int(x)) + } + return NaN() + } + + sign := false + if x < 0 { + x = -x + sign = true + } + + var ans float64 + if x <= 0.85 { // |x| <= 0.85 + r := 0.180625 - 0.25*x*x + z1 := ((((((a7*r+a6)*r+a5)*r+a4)*r+a3)*r+a2)*r+a1)*r + a0 + z2 := ((((((b7*r+b6)*r+b5)*r+b4)*r+b3)*r+b2)*r+b1)*r + b0 + ans = (x * z1) / z2 + } else { + var z1, z2 float64 + r := Sqrt(Ln2 - Log(1.0-x)) + if r <= 5.0 { + r -= 1.6 + z1 = ((((((c7*r+c6)*r+c5)*r+c4)*r+c3)*r+c2)*r+c1)*r + c0 + z2 = ((((((d7*r+d6)*r+d5)*r+d4)*r+d3)*r+d2)*r+d1)*r + d0 + } else { + r -= 5.0 + z1 = ((((((e7*r+e6)*r+e5)*r+e4)*r+e3)*r+e2)*r+e1)*r + e0 + z2 = ((((((f7*r+f6)*r+f5)*r+f4)*r+f3)*r+f2)*r+f1)*r + f0 + } + ans = z1 / z2 + } + + if sign { + return -ans + } + return ans +} + +// Erfcinv returns the inverse of Erfc(x). +// +// Special cases are: +// Erfcinv(0) = +Inf +// Erfcinv(2) = -Inf +// Erfcinv(x) = NaN if x < 0 or x > 2 +// Erfcinv(NaN) = NaN +func Erfcinv(x float64) float64 { + return Erfinv(1 - x) +} diff --git a/src/math/example_test.go b/src/math/example_test.go new file mode 100644 index 0000000..a26d8cb --- /dev/null +++ b/src/math/example_test.go @@ -0,0 +1,245 @@ +// Copyright 2017 The Go Authors. All rights reserved. +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. + +package math_test + +import ( + "fmt" + "math" +) + +func ExampleAcos() { + fmt.Printf("%.2f", math.Acos(1)) + // Output: 0.00 +} + +func ExampleAcosh() { + fmt.Printf("%.2f", math.Acosh(1)) + // Output: 0.00 +} + +func ExampleAsin() { + fmt.Printf("%.2f", math.Asin(0)) + // Output: 0.00 +} + +func ExampleAsinh() { + fmt.Printf("%.2f", math.Asinh(0)) + // Output: 0.00 +} + +func ExampleAtan() { + fmt.Printf("%.2f", math.Atan(0)) + // Output: 0.00 +} + +func ExampleAtan2() { + fmt.Printf("%.2f", math.Atan2(0, 0)) + // Output: 0.00 +} + +func ExampleAtanh() { + fmt.Printf("%.2f", math.Atanh(0)) + // Output: 0.00 +} + +func ExampleCopysign() { + fmt.Printf("%.2f", math.Copysign(3.2, -1)) + // Output: -3.20 +} + +func ExampleCos() { + fmt.Printf("%.2f", math.Cos(math.Pi/2)) + // Output: 0.00 +} + +func ExampleCosh() { + fmt.Printf("%.2f", math.Cosh(0)) + // Output: 1.00 +} + +func ExampleSin() { + fmt.Printf("%.2f", math.Sin(math.Pi)) + // Output: 0.00 +} + +func ExampleSincos() { + sin, cos := math.Sincos(0) + fmt.Printf("%.2f, %.2f", sin, cos) + // Output: 0.00, 1.00 +} + +func ExampleSinh() { + fmt.Printf("%.2f", math.Sinh(0)) + // Output: 0.00 +} + +func ExampleTan() { + fmt.Printf("%.2f", math.Tan(0)) + // Output: 0.00 +} + +func ExampleTanh() { + fmt.Printf("%.2f", math.Tanh(0)) + // Output: 0.00 +} + +func ExampleSqrt() { + const ( + a = 3 + b = 4 + ) + c := math.Sqrt(a*a + b*b) + fmt.Printf("%.1f", c) + // Output: 5.0 +} + +func ExampleCeil() { + c := math.Ceil(1.49) + fmt.Printf("%.1f", c) + // Output: 2.0 +} + +func ExampleFloor() { + c := math.Floor(1.51) + fmt.Printf("%.1f", c) + // Output: 1.0 +} + +func ExamplePow() { + c := math.Pow(2, 3) + fmt.Printf("%.1f", c) + // Output: 8.0 +} + +func ExamplePow10() { + c := math.Pow10(2) + fmt.Printf("%.1f", c) + // Output: 100.0 +} + +func ExampleRound() { + p := math.Round(10.5) + fmt.Printf("%.1f\n", p) + + n := math.Round(-10.5) + fmt.Printf("%.1f\n", n) + // Output: + // 11.0 + // -11.0 +} + +func ExampleRoundToEven() { + u := math.RoundToEven(11.5) + fmt.Printf("%.1f\n", u) + + d := math.RoundToEven(12.5) + fmt.Printf("%.1f\n", d) + // Output: + // 12.0 + // 12.0 +} + +func ExampleLog() { + x := math.Log(1) + fmt.Printf("%.1f\n", x) + + y := math.Log(2.7183) + fmt.Printf("%.1f\n", y) + // Output: + // 0.0 + // 1.0 +} + +func ExampleLog2() { + fmt.Printf("%.1f", math.Log2(256)) + // Output: 8.0 +} + +func ExampleLog10() { + fmt.Printf("%.1f", math.Log10(100)) + // Output: 2.0 +} + +func ExampleRemainder() { + fmt.Printf("%.1f", math.Remainder(100, 30)) + // Output: 10.0 +} + +func ExampleMod() { + c := math.Mod(7, 4) + fmt.Printf("%.1f", c) + // Output: 3.0 +} + +func ExampleAbs() { + x := math.Abs(-2) + fmt.Printf("%.1f\n", x) + + y := math.Abs(2) + fmt.Printf("%.1f\n", y) + // Output: + // 2.0 + // 2.0 +} +func ExampleDim() { + fmt.Printf("%.2f\n", math.Dim(4, -2)) + fmt.Printf("%.2f\n", math.Dim(-4, 2)) + // Output: + // 6.00 + // 0.00 +} + +func ExampleExp() { + fmt.Printf("%.2f\n", math.Exp(1)) + fmt.Printf("%.2f\n", math.Exp(2)) + fmt.Printf("%.2f\n", math.Exp(-1)) + // Output: + // 2.72 + // 7.39 + // 0.37 +} + +func ExampleExp2() { + fmt.Printf("%.2f\n", math.Exp2(1)) + fmt.Printf("%.2f\n", math.Exp2(-3)) + // Output: + // 2.00 + // 0.12 +} + +func ExampleExpm1() { + fmt.Printf("%.6f\n", math.Expm1(0.01)) + fmt.Printf("%.6f\n", math.Expm1(-1)) + // Output: + // 0.010050 + // -0.632121 +} + +func ExampleTrunc() { + fmt.Printf("%.2f\n", math.Trunc(math.Pi)) + fmt.Printf("%.2f\n", math.Trunc(-1.2345)) + // Output: + // 3.00 + // -1.00 +} + +func ExampleCbrt() { + fmt.Printf("%.2f\n", math.Cbrt(8)) + fmt.Printf("%.2f\n", math.Cbrt(27)) + // Output: + // 2.00 + // 3.00 +} + +func ExampleModf() { + int, frac := math.Modf(3.14) + fmt.Printf("%.2f, %.2f\n", int, frac) + + int, frac = math.Modf(-2.71) + fmt.Printf("%.2f, %.2f\n", int, frac) + // Output: + // 3.00, 0.14 + // -2.00, -0.71 +} diff --git a/src/math/exp.go b/src/math/exp.go new file mode 100644 index 0000000..d05eb91 --- /dev/null +++ b/src/math/exp.go @@ -0,0 +1,201 @@ +// Copyright 2009 The Go Authors. All rights reserved. +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. + +package math + +// Exp returns e**x, the base-e exponential of x. +// +// Special cases are: +// Exp(+Inf) = +Inf +// Exp(NaN) = NaN +// Very large values overflow to 0 or +Inf. +// Very small values underflow to 1. +func Exp(x float64) float64 { + if haveArchExp { + return archExp(x) + } + return exp(x) +} + +// The original C code, the long comment, and the constants +// below are from FreeBSD's /usr/src/lib/msun/src/e_exp.c +// and came with this notice. The go code is a simplified +// version of the original C. +// +// ==================================================== +// Copyright (C) 2004 by Sun Microsystems, Inc. All rights reserved. +// +// Permission to use, copy, modify, and distribute this +// software is freely granted, provided that this notice +// is preserved. +// ==================================================== +// +// +// exp(x) +// Returns the exponential of x. +// +// Method +// 1. Argument reduction: +// Reduce x to an r so that |r| <= 0.5*ln2 ~ 0.34658. +// Given x, find r and integer k such that +// +// x = k*ln2 + r, |r| <= 0.5*ln2. +// +// Here r will be represented as r = hi-lo for better +// accuracy. +// +// 2. Approximation of exp(r) by a special rational function on +// the interval [0,0.34658]: +// Write +// R(r**2) = r*(exp(r)+1)/(exp(r)-1) = 2 + r*r/6 - r**4/360 + ... +// We use a special Remez algorithm on [0,0.34658] to generate +// a polynomial of degree 5 to approximate R. The maximum error +// of this polynomial approximation is bounded by 2**-59. In +// other words, +// R(z) ~ 2.0 + P1*z + P2*z**2 + P3*z**3 + P4*z**4 + P5*z**5 +// (where z=r*r, and the values of P1 to P5 are listed below) +// and +// | 5 | -59 +// | 2.0+P1*z+...+P5*z - R(z) | <= 2 +// | | +// The computation of exp(r) thus becomes +// 2*r +// exp(r) = 1 + ------- +// R - r +// r*R1(r) +// = 1 + r + ----------- (for better accuracy) +// 2 - R1(r) +// where +// 2 4 10 +// R1(r) = r - (P1*r + P2*r + ... + P5*r ). +// +// 3. Scale back to obtain exp(x): +// From step 1, we have +// exp(x) = 2**k * exp(r) +// +// Special cases: +// exp(INF) is INF, exp(NaN) is NaN; +// exp(-INF) is 0, and +// for finite argument, only exp(0)=1 is exact. +// +// Accuracy: +// according to an error analysis, the error is always less than +// 1 ulp (unit in the last place). +// +// Misc. info. +// For IEEE double +// if x > 7.09782712893383973096e+02 then exp(x) overflow +// if x < -7.45133219101941108420e+02 then exp(x) underflow +// +// Constants: +// The hexadecimal values are the intended ones for the following +// constants. The decimal values may be used, provided that the +// compiler will convert from decimal to binary accurately enough +// to produce the hexadecimal values shown. + +func exp(x float64) float64 { + const ( + Ln2Hi = 6.93147180369123816490e-01 + Ln2Lo = 1.90821492927058770002e-10 + Log2e = 1.44269504088896338700e+00 + + Overflow = 7.09782712893383973096e+02 + Underflow = -7.45133219101941108420e+02 + NearZero = 1.0 / (1 << 28) // 2**-28 + ) + + // special cases + switch { + case IsNaN(x) || IsInf(x, 1): + return x + case IsInf(x, -1): + return 0 + case x > Overflow: + return Inf(1) + case x < Underflow: + return 0 + case -NearZero < x && x < NearZero: + return 1 + x + } + + // reduce; computed as r = hi - lo for extra precision. + var k int + switch { + case x < 0: + k = int(Log2e*x - 0.5) + case x > 0: + k = int(Log2e*x + 0.5) + } + hi := x - float64(k)*Ln2Hi + lo := float64(k) * Ln2Lo + + // compute + return expmulti(hi, lo, k) +} + +// Exp2 returns 2**x, the base-2 exponential of x. +// +// Special cases are the same as Exp. +func Exp2(x float64) float64 { + if haveArchExp2 { + return archExp2(x) + } + return exp2(x) +} + +func exp2(x float64) float64 { + const ( + Ln2Hi = 6.93147180369123816490e-01 + Ln2Lo = 1.90821492927058770002e-10 + + Overflow = 1.0239999999999999e+03 + Underflow = -1.0740e+03 + ) + + // special cases + switch { + case IsNaN(x) || IsInf(x, 1): + return x + case IsInf(x, -1): + return 0 + case x > Overflow: + return Inf(1) + case x < Underflow: + return 0 + } + + // argument reduction; x = r×lg(e) + k with |r| ≤ ln(2)/2. + // computed as r = hi - lo for extra precision. + var k int + switch { + case x > 0: + k = int(x + 0.5) + case x < 0: + k = int(x - 0.5) + } + t := x - float64(k) + hi := t * Ln2Hi + lo := -t * Ln2Lo + + // compute + return expmulti(hi, lo, k) +} + +// exp1 returns e**r × 2**k where r = hi - lo and |r| ≤ ln(2)/2. +func expmulti(hi, lo float64, k int) float64 { + const ( + P1 = 1.66666666666666657415e-01 /* 0x3FC55555; 0x55555555 */ + P2 = -2.77777777770155933842e-03 /* 0xBF66C16C; 0x16BEBD93 */ + P3 = 6.61375632143793436117e-05 /* 0x3F11566A; 0xAF25DE2C */ + P4 = -1.65339022054652515390e-06 /* 0xBEBBBD41; 0xC5D26BF1 */ + P5 = 4.13813679705723846039e-08 /* 0x3E663769; 0x72BEA4D0 */ + ) + + r := hi - lo + t := r * r + c := r - t*(P1+t*(P2+t*(P3+t*(P4+t*P5)))) + y := 1 - ((lo - (r*c)/(2-c)) - hi) + // TODO(rsc): make sure Ldexp can handle boundary k + return Ldexp(y, k) +} diff --git a/src/math/exp2_asm.go b/src/math/exp2_asm.go new file mode 100644 index 0000000..c26b2c3 --- /dev/null +++ b/src/math/exp2_asm.go @@ -0,0 +1,11 @@ +// Copyright 2021 The Go Authors. All rights reserved. +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. + +//go:build arm64 + +package math + +const haveArchExp2 = true + +func archExp2(x float64) float64 diff --git a/src/math/exp2_noasm.go b/src/math/exp2_noasm.go new file mode 100644 index 0000000..c2b4093 --- /dev/null +++ b/src/math/exp2_noasm.go @@ -0,0 +1,13 @@ +// Copyright 2021 The Go Authors. All rights reserved. +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. + +//go:build !arm64 + +package math + +const haveArchExp2 = false + +func archExp2(x float64) float64 { + panic("not implemented") +} diff --git a/src/math/exp_amd64.go b/src/math/exp_amd64.go new file mode 100644 index 0000000..0f701b1 --- /dev/null +++ b/src/math/exp_amd64.go @@ -0,0 +1,11 @@ +// Copyright 2017 The Go Authors. All rights reserved. +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. + +//go:build amd64 + +package math + +import "internal/cpu" + +var useFMA = cpu.X86.HasAVX && cpu.X86.HasFMA diff --git a/src/math/exp_amd64.s b/src/math/exp_amd64.s new file mode 100644 index 0000000..02b71c8 --- /dev/null +++ b/src/math/exp_amd64.s @@ -0,0 +1,159 @@ +// Copyright 2010 The Go Authors. All rights reserved. +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. + +#include "textflag.h" + +// The method is based on a paper by Naoki Shibata: "Efficient evaluation +// methods of elementary functions suitable for SIMD computation", Proc. +// of International Supercomputing Conference 2010 (ISC'10), pp. 25 -- 32 +// (May 2010). The paper is available at +// https://link.springer.com/article/10.1007/s00450-010-0108-2 +// +// The original code and the constants below are from the author's +// implementation available at http://freshmeat.net/projects/sleef. +// The README file says, "The software is in public domain. +// You can use the software without any obligation." +// +// This code is a simplified version of the original. + +#define LN2 0.6931471805599453094172321214581766 // log_e(2) +#define LOG2E 1.4426950408889634073599246810018920 // 1/LN2 +#define LN2U 0.69314718055966295651160180568695068359375 // upper half LN2 +#define LN2L 0.28235290563031577122588448175013436025525412068e-12 // lower half LN2 +#define PosInf 0x7FF0000000000000 +#define NegInf 0xFFF0000000000000 +#define Overflow 7.09782712893384e+02 + +DATA exprodata<>+0(SB)/8, $0.5 +DATA exprodata<>+8(SB)/8, $1.0 +DATA exprodata<>+16(SB)/8, $2.0 +DATA exprodata<>+24(SB)/8, $1.6666666666666666667e-1 +DATA exprodata<>+32(SB)/8, $4.1666666666666666667e-2 +DATA exprodata<>+40(SB)/8, $8.3333333333333333333e-3 +DATA exprodata<>+48(SB)/8, $1.3888888888888888889e-3 +DATA exprodata<>+56(SB)/8, $1.9841269841269841270e-4 +DATA exprodata<>+64(SB)/8, $2.4801587301587301587e-5 +GLOBL exprodata<>+0(SB), RODATA, $72 + +// func Exp(x float64) float64 +TEXT ·archExp(SB),NOSPLIT,$0 + // test bits for not-finite + MOVQ x+0(FP), BX + MOVQ $~(1<<63), AX // sign bit mask + MOVQ BX, DX + ANDQ AX, DX + MOVQ $PosInf, AX + CMPQ AX, DX + JLE notFinite + // check if argument will overflow + MOVQ BX, X0 + MOVSD $Overflow, X1 + COMISD X1, X0 + JA overflow + MOVSD $LOG2E, X1 + MULSD X0, X1 + CVTSD2SL X1, BX // BX = exponent + CVTSL2SD BX, X1 + CMPB ·useFMA(SB), $1 + JE avxfma + MOVSD $LN2U, X2 + MULSD X1, X2 + SUBSD X2, X0 + MOVSD $LN2L, X2 + MULSD X1, X2 + SUBSD X2, X0 + // reduce argument + MULSD $0.0625, X0 + // Taylor series evaluation + MOVSD exprodata<>+64(SB), X1 + MULSD X0, X1 + ADDSD exprodata<>+56(SB), X1 + MULSD X0, X1 + ADDSD exprodata<>+48(SB), X1 + MULSD X0, X1 + ADDSD exprodata<>+40(SB), X1 + MULSD X0, X1 + ADDSD exprodata<>+32(SB), X1 + MULSD X0, X1 + ADDSD exprodata<>+24(SB), X1 + MULSD X0, X1 + ADDSD exprodata<>+0(SB), X1 + MULSD X0, X1 + ADDSD exprodata<>+8(SB), X1 + MULSD X1, X0 + MOVSD exprodata<>+16(SB), X1 + ADDSD X0, X1 + MULSD X1, X0 + MOVSD exprodata<>+16(SB), X1 + ADDSD X0, X1 + MULSD X1, X0 + MOVSD exprodata<>+16(SB), X1 + ADDSD X0, X1 + MULSD X1, X0 + MOVSD exprodata<>+16(SB), X1 + ADDSD X0, X1 + MULSD X1, X0 + ADDSD exprodata<>+8(SB), X0 + // return fr * 2**exponent +ldexp: + ADDL $0x3FF, BX // add bias + JLE denormal + CMPL BX, $0x7FF + JGE overflow +lastStep: + SHLQ $52, BX + MOVQ BX, X1 + MULSD X1, X0 + MOVSD X0, ret+8(FP) + RET +notFinite: + // test bits for -Inf + MOVQ $NegInf, AX + CMPQ AX, BX + JNE notNegInf + // -Inf, return 0 +underflow: // return 0 + MOVQ $0, ret+8(FP) + RET +overflow: // return +Inf + MOVQ $PosInf, BX +notNegInf: // NaN or +Inf, return x + MOVQ BX, ret+8(FP) + RET +denormal: + CMPL BX, $-52 + JL underflow + ADDL $0x3FE, BX // add bias - 1 + SHLQ $52, BX + MOVQ BX, X1 + MULSD X1, X0 + MOVQ $1, BX + JMP lastStep + +avxfma: + MOVSD $LN2U, X2 + VFNMADD231SD X2, X1, X0 + MOVSD $LN2L, X2 + VFNMADD231SD X2, X1, X0 + // reduce argument + MULSD $0.0625, X0 + // Taylor series evaluation + MOVSD exprodata<>+64(SB), X1 + VFMADD213SD exprodata<>+56(SB), X0, X1 + VFMADD213SD exprodata<>+48(SB), X0, X1 + VFMADD213SD exprodata<>+40(SB), X0, X1 + VFMADD213SD exprodata<>+32(SB), X0, X1 + VFMADD213SD exprodata<>+24(SB), X0, X1 + VFMADD213SD exprodata<>+0(SB), X0, X1 + VFMADD213SD exprodata<>+8(SB), X0, X1 + MULSD X1, X0 + VADDSD exprodata<>+16(SB), X0, X1 + MULSD X1, X0 + VADDSD exprodata<>+16(SB), X0, X1 + MULSD X1, X0 + VADDSD exprodata<>+16(SB), X0, X1 + MULSD X1, X0 + VADDSD exprodata<>+16(SB), X0, X1 + VFMADD213SD exprodata<>+8(SB), X1, X0 + JMP ldexp diff --git a/src/math/exp_arm64.s b/src/math/exp_arm64.s new file mode 100644 index 0000000..44673ab --- /dev/null +++ b/src/math/exp_arm64.s @@ -0,0 +1,182 @@ +// Copyright 2017 The Go Authors. All rights reserved. +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. + +#define Ln2Hi 6.93147180369123816490e-01 +#define Ln2Lo 1.90821492927058770002e-10 +#define Log2e 1.44269504088896338700e+00 +#define Overflow 7.09782712893383973096e+02 +#define Underflow -7.45133219101941108420e+02 +#define Overflow2 1.0239999999999999e+03 +#define Underflow2 -1.0740e+03 +#define NearZero 0x3e30000000000000 // 2**-28 +#define PosInf 0x7ff0000000000000 +#define FracMask 0x000fffffffffffff +#define C1 0x3cb0000000000000 // 2**-52 +#define P1 1.66666666666666657415e-01 // 0x3FC55555; 0x55555555 +#define P2 -2.77777777770155933842e-03 // 0xBF66C16C; 0x16BEBD93 +#define P3 6.61375632143793436117e-05 // 0x3F11566A; 0xAF25DE2C +#define P4 -1.65339022054652515390e-06 // 0xBEBBBD41; 0xC5D26BF1 +#define P5 4.13813679705723846039e-08 // 0x3E663769; 0x72BEA4D0 + +// Exp returns e**x, the base-e exponential of x. +// This is an assembly implementation of the method used for function Exp in file exp.go. +// +// func Exp(x float64) float64 +TEXT ·archExp(SB),$0-16 + FMOVD x+0(FP), F0 // F0 = x + FCMPD F0, F0 + BNE isNaN // x = NaN, return NaN + FMOVD $Overflow, F1 + FCMPD F1, F0 + BGT overflow // x > Overflow, return PosInf + FMOVD $Underflow, F1 + FCMPD F1, F0 + BLT underflow // x < Underflow, return 0 + MOVD $NearZero, R0 + FMOVD R0, F2 + FABSD F0, F3 + FMOVD $1.0, F1 // F1 = 1.0 + FCMPD F2, F3 + BLT nearzero // fabs(x) < NearZero, return 1 + x + // argument reduction, x = k*ln2 + r, |r| <= 0.5*ln2 + // computed as r = hi - lo for extra precision. + FMOVD $Log2e, F2 + FMOVD $0.5, F3 + FNMSUBD F0, F3, F2, F4 // Log2e*x - 0.5 + FMADDD F0, F3, F2, F3 // Log2e*x + 0.5 + FCMPD $0.0, F0 + FCSELD LT, F4, F3, F3 // F3 = k + FCVTZSD F3, R1 // R1 = int(k) + SCVTFD R1, F3 // F3 = float64(int(k)) + FMOVD $Ln2Hi, F4 // F4 = Ln2Hi + FMOVD $Ln2Lo, F5 // F5 = Ln2Lo + FMSUBD F3, F0, F4, F4 // F4 = hi = x - float64(int(k))*Ln2Hi + FMULD F3, F5 // F5 = lo = float64(int(k)) * Ln2Lo + FSUBD F5, F4, F6 // F6 = r = hi - lo + FMULD F6, F6, F7 // F7 = t = r * r + // compute y + FMOVD $P5, F8 // F8 = P5 + FMOVD $P4, F9 // F9 = P4 + FMADDD F7, F9, F8, F13 // P4+t*P5 + FMOVD $P3, F10 // F10 = P3 + FMADDD F7, F10, F13, F13 // P3+t*(P4+t*P5) + FMOVD $P2, F11 // F11 = P2 + FMADDD F7, F11, F13, F13 // P2+t*(P3+t*(P4+t*P5)) + FMOVD $P1, F12 // F12 = P1 + FMADDD F7, F12, F13, F13 // P1+t*(P2+t*(P3+t*(P4+t*P5))) + FMSUBD F7, F6, F13, F13 // F13 = c = r - t*(P1+t*(P2+t*(P3+t*(P4+t*P5)))) + FMOVD $2.0, F14 + FSUBD F13, F14 + FMULD F6, F13, F15 + FDIVD F14, F15 // F15 = (r*c)/(2-c) + FSUBD F15, F5, F15 // lo-(r*c)/(2-c) + FSUBD F4, F15, F15 // (lo-(r*c)/(2-c))-hi + FSUBD F15, F1, F16 // F16 = y = 1-((lo-(r*c)/(2-c))-hi) + // inline Ldexp(y, k), benefit: + // 1, no parameter pass overhead. + // 2, skip unnecessary checks for Inf/NaN/Zero + FMOVD F16, R0 + AND $FracMask, R0, R2 // fraction + LSR $52, R0, R5 // exponent + ADD R1, R5 // R1 = int(k) + CMP $1, R5 + BGE normal + ADD $52, R5 // denormal + MOVD $C1, R8 + FMOVD R8, F1 // m = 2**-52 +normal: + ORR R5<<52, R2, R0 + FMOVD R0, F0 + FMULD F1, F0 // return m * x + FMOVD F0, ret+8(FP) + RET +nearzero: + FADDD F1, F0 +isNaN: + FMOVD F0, ret+8(FP) + RET +underflow: + MOVD ZR, ret+8(FP) + RET +overflow: + MOVD $PosInf, R0 + MOVD R0, ret+8(FP) + RET + + +// Exp2 returns 2**x, the base-2 exponential of x. +// This is an assembly implementation of the method used for function Exp2 in file exp.go. +// +// func Exp2(x float64) float64 +TEXT ·archExp2(SB),$0-16 + FMOVD x+0(FP), F0 // F0 = x + FCMPD F0, F0 + BNE isNaN // x = NaN, return NaN + FMOVD $Overflow2, F1 + FCMPD F1, F0 + BGT overflow // x > Overflow, return PosInf + FMOVD $Underflow2, F1 + FCMPD F1, F0 + BLT underflow // x < Underflow, return 0 + // argument reduction; x = r*lg(e) + k with |r| <= ln(2)/2 + // computed as r = hi - lo for extra precision. + FMOVD $0.5, F2 + FSUBD F2, F0, F3 // x + 0.5 + FADDD F2, F0, F4 // x - 0.5 + FCMPD $0.0, F0 + FCSELD LT, F3, F4, F3 // F3 = k + FCVTZSD F3, R1 // R1 = int(k) + SCVTFD R1, F3 // F3 = float64(int(k)) + FSUBD F3, F0, F3 // t = x - float64(int(k)) + FMOVD $Ln2Hi, F4 // F4 = Ln2Hi + FMOVD $Ln2Lo, F5 // F5 = Ln2Lo + FMULD F3, F4 // F4 = hi = t * Ln2Hi + FNMULD F3, F5 // F5 = lo = -t * Ln2Lo + FSUBD F5, F4, F6 // F6 = r = hi - lo + FMULD F6, F6, F7 // F7 = t = r * r + // compute y + FMOVD $P5, F8 // F8 = P5 + FMOVD $P4, F9 // F9 = P4 + FMADDD F7, F9, F8, F13 // P4+t*P5 + FMOVD $P3, F10 // F10 = P3 + FMADDD F7, F10, F13, F13 // P3+t*(P4+t*P5) + FMOVD $P2, F11 // F11 = P2 + FMADDD F7, F11, F13, F13 // P2+t*(P3+t*(P4+t*P5)) + FMOVD $P1, F12 // F12 = P1 + FMADDD F7, F12, F13, F13 // P1+t*(P2+t*(P3+t*(P4+t*P5))) + FMSUBD F7, F6, F13, F13 // F13 = c = r - t*(P1+t*(P2+t*(P3+t*(P4+t*P5)))) + FMOVD $2.0, F14 + FSUBD F13, F14 + FMULD F6, F13, F15 + FDIVD F14, F15 // F15 = (r*c)/(2-c) + FMOVD $1.0, F1 // F1 = 1.0 + FSUBD F15, F5, F15 // lo-(r*c)/(2-c) + FSUBD F4, F15, F15 // (lo-(r*c)/(2-c))-hi + FSUBD F15, F1, F16 // F16 = y = 1-((lo-(r*c)/(2-c))-hi) + // inline Ldexp(y, k), benefit: + // 1, no parameter pass overhead. + // 2, skip unnecessary checks for Inf/NaN/Zero + FMOVD F16, R0 + AND $FracMask, R0, R2 // fraction + LSR $52, R0, R5 // exponent + ADD R1, R5 // R1 = int(k) + CMP $1, R5 + BGE normal + ADD $52, R5 // denormal + MOVD $C1, R8 + FMOVD R8, F1 // m = 2**-52 +normal: + ORR R5<<52, R2, R0 + FMOVD R0, F0 + FMULD F1, F0 // return m * x +isNaN: + FMOVD F0, ret+8(FP) + RET +underflow: + MOVD ZR, ret+8(FP) + RET +overflow: + MOVD $PosInf, R0 + MOVD R0, ret+8(FP) + RET diff --git a/src/math/exp_asm.go b/src/math/exp_asm.go new file mode 100644 index 0000000..4244428 --- /dev/null +++ b/src/math/exp_asm.go @@ -0,0 +1,11 @@ +// Copyright 2021 The Go Authors. All rights reserved. +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. + +//go:build amd64 || arm64 || s390x + +package math + +const haveArchExp = true + +func archExp(x float64) float64 diff --git a/src/math/exp_noasm.go b/src/math/exp_noasm.go new file mode 100644 index 0000000..bd3f024 --- /dev/null +++ b/src/math/exp_noasm.go @@ -0,0 +1,13 @@ +// Copyright 2021 The Go Authors. All rights reserved. +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. + +//go:build !amd64 && !arm64 && !s390x + +package math + +const haveArchExp = false + +func archExp(x float64) float64 { + panic("not implemented") +} diff --git a/src/math/exp_s390x.s b/src/math/exp_s390x.s new file mode 100644 index 0000000..e0ec823 --- /dev/null +++ b/src/math/exp_s390x.s @@ -0,0 +1,177 @@ +// Copyright 2017 The Go Authors. All rights reserved. +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. + +#include "textflag.h" + +// Minimax polynomial approximation and other constants +DATA ·exprodataL22<> + 0(SB)/8, $800.0E+00 +DATA ·exprodataL22<> + 8(SB)/8, $1.0000000000000022e+00 +DATA ·exprodataL22<> + 16(SB)/8, $0.500000000000004237e+00 +DATA ·exprodataL22<> + 24(SB)/8, $0.166666666630345592e+00 +DATA ·exprodataL22<> + 32(SB)/8, $0.138926439368309441e-02 +DATA ·exprodataL22<> + 40(SB)/8, $0.833349307718286047e-02 +DATA ·exprodataL22<> + 48(SB)/8, $0.416666664838056960e-01 +DATA ·exprodataL22<> + 56(SB)/8, $-.231904681384629956E-16 +DATA ·exprodataL22<> + 64(SB)/8, $-.693147180559945286E+00 +DATA ·exprodataL22<> + 72(SB)/8, $0.144269504088896339E+01 +DATA ·exprodataL22<> + 80(SB)/8, $704.0E+00 +GLOBL ·exprodataL22<> + 0(SB), RODATA, $88 + +DATA ·expxinf<> + 0(SB)/8, $0x7ff0000000000000 +GLOBL ·expxinf<> + 0(SB), RODATA, $8 +DATA ·expx4ff<> + 0(SB)/8, $0x4ff0000000000000 +GLOBL ·expx4ff<> + 0(SB), RODATA, $8 +DATA ·expx2ff<> + 0(SB)/8, $0x2ff0000000000000 +GLOBL ·expx2ff<> + 0(SB), RODATA, $8 +DATA ·expxaddexp<> + 0(SB)/8, $0xc2f0000100003fef +GLOBL ·expxaddexp<> + 0(SB), RODATA, $8 + +// Log multipliers table +DATA ·exptexp<> + 0(SB)/8, $0.442737824274138381E-01 +DATA ·exptexp<> + 8(SB)/8, $0.263602189790660309E-01 +DATA ·exptexp<> + 16(SB)/8, $0.122565642281703586E-01 +DATA ·exptexp<> + 24(SB)/8, $0.143757052860721398E-02 +DATA ·exptexp<> + 32(SB)/8, $-.651375034121276075E-02 +DATA ·exptexp<> + 40(SB)/8, $-.119317678849450159E-01 +DATA ·exptexp<> + 48(SB)/8, $-.150868749549871069E-01 +DATA ·exptexp<> + 56(SB)/8, $-.161992609578469234E-01 +DATA ·exptexp<> + 64(SB)/8, $-.154492360403337917E-01 +DATA ·exptexp<> + 72(SB)/8, $-.129850717389178721E-01 +DATA ·exptexp<> + 80(SB)/8, $-.892902649276657891E-02 +DATA ·exptexp<> + 88(SB)/8, $-.338202636596794887E-02 +DATA ·exptexp<> + 96(SB)/8, $0.357266307045684762E-02 +DATA ·exptexp<> + 104(SB)/8, $0.118665304327406698E-01 +DATA ·exptexp<> + 112(SB)/8, $0.214434994118118914E-01 +DATA ·exptexp<> + 120(SB)/8, $0.322580645161290314E-01 +GLOBL ·exptexp<> + 0(SB), RODATA, $128 + +// Exp returns e**x, the base-e exponential of x. +// +// Special cases are: +// Exp(+Inf) = +Inf +// Exp(NaN) = NaN +// Very large values overflow to 0 or +Inf. +// Very small values underflow to 1. +// The algorithm used is minimax polynomial approximation using a table of +// polynomial coefficients determined with a Remez exchange algorithm. + +TEXT ·expAsm(SB), NOSPLIT, $0-16 + FMOVD x+0(FP), F0 + MOVD $·exprodataL22<>+0(SB), R5 + LTDBR F0, F0 + BLTU L20 + FMOVD F0, F2 +L2: + WORD $0xED205050 //cdb %f2,.L23-.L22(%r5) + BYTE $0x00 + BYTE $0x19 + BGE L16 + BVS L16 + WFCEDBS V2, V2, V2 + BVS LEXITTAGexp + MOVD $·expxaddexp<>+0(SB), R1 + FMOVD 72(R5), F6 + FMOVD 0(R1), F2 + WFMSDB V0, V6, V2, V6 + FMOVD 64(R5), F4 + FADD F6, F2 + FMOVD 56(R5), F1 + FMADD F4, F2, F0 + FMOVD 48(R5), F3 + WFMADB V2, V1, V0, V2 + FMOVD 40(R5), F1 + FMOVD 32(R5), F4 + FMUL F0, F0 + WFMADB V2, V4, V1, V4 + LGDR F6, R1 + FMOVD 24(R5), F1 + WFMADB V2, V3, V1, V3 + FMOVD 16(R5), F1 + WFMADB V0, V4, V3, V4 + FMOVD 8(R5), F3 + WFMADB V2, V1, V3, V1 + RISBGZ $57, $60, $3, R1, R3 + WFMADB V0, V4, V1, V0 + MOVD $·exptexp<>+0(SB), R2 + WORD $0x68432000 //ld %f4,0(%r3,%r2) + FMADD F4, F2, F2 + SLD $48, R1, R2 + WFMADB V2, V0, V4, V2 + LDGR R2, F0 + FMADD F0, F2, F0 + FMOVD F0, ret+8(FP) + RET +L16: + WFCEDBS V2, V2, V4 + BVS LEXITTAGexp + WORD $0xED205000 //cdb %f2,.L33-.L22(%r5) + BYTE $0x00 + BYTE $0x19 + BLT L6 + WFCEDBS V2, V0, V0 + BVS L13 + MOVD $·expxinf<>+0(SB), R1 + FMOVD 0(R1), F0 + FMOVD F0, ret+8(FP) + RET +L20: + WORD $0xB3130020 //lcdbr %f2,%f0 + BR L2 +L6: + MOVD $·expxaddexp<>+0(SB), R1 + FMOVD 72(R5), F3 + FMOVD 0(R1), F4 + WFMSDB V0, V3, V4, V3 + FMOVD 64(R5), F6 + FADD F3, F4 + FMOVD 56(R5), F5 + WFMADB V4, V6, V0, V6 + FMOVD 32(R5), F1 + WFMADB V4, V5, V6, V4 + FMOVD 40(R5), F5 + FMUL F6, F6 + WFMADB V4, V1, V5, V1 + FMOVD 48(R5), F7 + LGDR F3, R1 + FMOVD 24(R5), F5 + WFMADB V4, V7, V5, V7 + FMOVD 16(R5), F5 + WFMADB V6, V1, V7, V1 + FMOVD 8(R5), F7 + WFMADB V4, V5, V7, V5 + RISBGZ $57, $60, $3, R1, R3 + WFMADB V6, V1, V5, V6 + MOVD $·exptexp<>+0(SB), R2 + WFCHDBS V2, V0, V0 + WORD $0x68132000 //ld %f1,0(%r3,%r2) + FMADD F1, F4, F4 + MOVD $0x4086000000000000, R2 + WFMADB V4, V6, V1, V4 + BEQ L21 + ADDW $0xF000, R1 + RISBGN $0, $15, $48, R1, R2 + LDGR R2, F0 + FMADD F0, F4, F0 + MOVD $·expx4ff<>+0(SB), R3 + FMOVD 0(R3), F2 + FMUL F2, F0 + FMOVD F0, ret+8(FP) + RET +L13: + FMOVD $0, F0 + FMOVD F0, ret+8(FP) + RET +L21: + ADDW $0x1000, R1 + RISBGN $0, $15, $48, R1, R2 + LDGR R2, F0 + FMADD F0, F4, F0 + MOVD $·expx2ff<>+0(SB), R3 + FMOVD 0(R3), F2 + FMUL F2, F0 + FMOVD F0, ret+8(FP) + RET +LEXITTAGexp: + FMOVD F0, ret+8(FP) + RET diff --git a/src/math/expm1.go b/src/math/expm1.go new file mode 100644 index 0000000..66d3421 --- /dev/null +++ b/src/math/expm1.go @@ -0,0 +1,242 @@ +// Copyright 2010 The Go Authors. All rights reserved. +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. + +package math + +// The original C code, the long comment, and the constants +// below are from FreeBSD's /usr/src/lib/msun/src/s_expm1.c +// and came with this notice. The go code is a simplified +// version of the original C. +// +// ==================================================== +// Copyright (C) 1993 by Sun Microsystems, Inc. All rights reserved. +// +// Developed at SunPro, a Sun Microsystems, Inc. business. +// Permission to use, copy, modify, and distribute this +// software is freely granted, provided that this notice +// is preserved. +// ==================================================== +// +// expm1(x) +// Returns exp(x)-1, the exponential of x minus 1. +// +// Method +// 1. Argument reduction: +// Given x, find r and integer k such that +// +// x = k*ln2 + r, |r| <= 0.5*ln2 ~ 0.34658 +// +// Here a correction term c will be computed to compensate +// the error in r when rounded to a floating-point number. +// +// 2. Approximating expm1(r) by a special rational function on +// the interval [0,0.34658]: +// Since +// r*(exp(r)+1)/(exp(r)-1) = 2+ r**2/6 - r**4/360 + ... +// we define R1(r*r) by +// r*(exp(r)+1)/(exp(r)-1) = 2+ r**2/6 * R1(r*r) +// That is, +// R1(r**2) = 6/r *((exp(r)+1)/(exp(r)-1) - 2/r) +// = 6/r * ( 1 + 2.0*(1/(exp(r)-1) - 1/r)) +// = 1 - r**2/60 + r**4/2520 - r**6/100800 + ... +// We use a special Reme algorithm on [0,0.347] to generate +// a polynomial of degree 5 in r*r to approximate R1. The +// maximum error of this polynomial approximation is bounded +// by 2**-61. In other words, +// R1(z) ~ 1.0 + Q1*z + Q2*z**2 + Q3*z**3 + Q4*z**4 + Q5*z**5 +// where Q1 = -1.6666666666666567384E-2, +// Q2 = 3.9682539681370365873E-4, +// Q3 = -9.9206344733435987357E-6, +// Q4 = 2.5051361420808517002E-7, +// Q5 = -6.2843505682382617102E-9; +// (where z=r*r, and the values of Q1 to Q5 are listed below) +// with error bounded by +// | 5 | -61 +// | 1.0+Q1*z+...+Q5*z - R1(z) | <= 2 +// | | +// +// expm1(r) = exp(r)-1 is then computed by the following +// specific way which minimize the accumulation rounding error: +// 2 3 +// r r [ 3 - (R1 + R1*r/2) ] +// expm1(r) = r + --- + --- * [--------------------] +// 2 2 [ 6 - r*(3 - R1*r/2) ] +// +// To compensate the error in the argument reduction, we use +// expm1(r+c) = expm1(r) + c + expm1(r)*c +// ~ expm1(r) + c + r*c +// Thus c+r*c will be added in as the correction terms for +// expm1(r+c). Now rearrange the term to avoid optimization +// screw up: +// ( 2 2 ) +// ({ ( r [ R1 - (3 - R1*r/2) ] ) } r ) +// expm1(r+c)~r - ({r*(--- * [--------------------]-c)-c} - --- ) +// ({ ( 2 [ 6 - r*(3 - R1*r/2) ] ) } 2 ) +// ( ) +// +// = r - E +// 3. Scale back to obtain expm1(x): +// From step 1, we have +// expm1(x) = either 2**k*[expm1(r)+1] - 1 +// = or 2**k*[expm1(r) + (1-2**-k)] +// 4. Implementation notes: +// (A). To save one multiplication, we scale the coefficient Qi +// to Qi*2**i, and replace z by (x**2)/2. +// (B). To achieve maximum accuracy, we compute expm1(x) by +// (i) if x < -56*ln2, return -1.0, (raise inexact if x!=inf) +// (ii) if k=0, return r-E +// (iii) if k=-1, return 0.5*(r-E)-0.5 +// (iv) if k=1 if r < -0.25, return 2*((r+0.5)- E) +// else return 1.0+2.0*(r-E); +// (v) if (k<-2||k>56) return 2**k(1-(E-r)) - 1 (or exp(x)-1) +// (vi) if k <= 20, return 2**k((1-2**-k)-(E-r)), else +// (vii) return 2**k(1-((E+2**-k)-r)) +// +// Special cases: +// expm1(INF) is INF, expm1(NaN) is NaN; +// expm1(-INF) is -1, and +// for finite argument, only expm1(0)=0 is exact. +// +// Accuracy: +// according to an error analysis, the error is always less than +// 1 ulp (unit in the last place). +// +// Misc. info. +// For IEEE double +// if x > 7.09782712893383973096e+02 then expm1(x) overflow +// +// Constants: +// The hexadecimal values are the intended ones for the following +// constants. The decimal values may be used, provided that the +// compiler will convert from decimal to binary accurately enough +// to produce the hexadecimal values shown. +// + +// Expm1 returns e**x - 1, the base-e exponential of x minus 1. +// It is more accurate than Exp(x) - 1 when x is near zero. +// +// Special cases are: +// Expm1(+Inf) = +Inf +// Expm1(-Inf) = -1 +// Expm1(NaN) = NaN +// Very large values overflow to -1 or +Inf. +func Expm1(x float64) float64 { + if haveArchExpm1 { + return archExpm1(x) + } + return expm1(x) +} + +func expm1(x float64) float64 { + const ( + Othreshold = 7.09782712893383973096e+02 // 0x40862E42FEFA39EF + Ln2X56 = 3.88162421113569373274e+01 // 0x4043687a9f1af2b1 + Ln2HalfX3 = 1.03972077083991796413e+00 // 0x3ff0a2b23f3bab73 + Ln2Half = 3.46573590279972654709e-01 // 0x3fd62e42fefa39ef + Ln2Hi = 6.93147180369123816490e-01 // 0x3fe62e42fee00000 + Ln2Lo = 1.90821492927058770002e-10 // 0x3dea39ef35793c76 + InvLn2 = 1.44269504088896338700e+00 // 0x3ff71547652b82fe + Tiny = 1.0 / (1 << 54) // 2**-54 = 0x3c90000000000000 + // scaled coefficients related to expm1 + Q1 = -3.33333333333331316428e-02 // 0xBFA11111111110F4 + Q2 = 1.58730158725481460165e-03 // 0x3F5A01A019FE5585 + Q3 = -7.93650757867487942473e-05 // 0xBF14CE199EAADBB7 + Q4 = 4.00821782732936239552e-06 // 0x3ED0CFCA86E65239 + Q5 = -2.01099218183624371326e-07 // 0xBE8AFDB76E09C32D + ) + + // special cases + switch { + case IsInf(x, 1) || IsNaN(x): + return x + case IsInf(x, -1): + return -1 + } + + absx := x + sign := false + if x < 0 { + absx = -absx + sign = true + } + + // filter out huge argument + if absx >= Ln2X56 { // if |x| >= 56 * ln2 + if sign { + return -1 // x < -56*ln2, return -1 + } + if absx >= Othreshold { // if |x| >= 709.78... + return Inf(1) + } + } + + // argument reduction + var c float64 + var k int + if absx > Ln2Half { // if |x| > 0.5 * ln2 + var hi, lo float64 + if absx < Ln2HalfX3 { // and |x| < 1.5 * ln2 + if !sign { + hi = x - Ln2Hi + lo = Ln2Lo + k = 1 + } else { + hi = x + Ln2Hi + lo = -Ln2Lo + k = -1 + } + } else { + if !sign { + k = int(InvLn2*x + 0.5) + } else { + k = int(InvLn2*x - 0.5) + } + t := float64(k) + hi = x - t*Ln2Hi // t * Ln2Hi is exact here + lo = t * Ln2Lo + } + x = hi - lo + c = (hi - x) - lo + } else if absx < Tiny { // when |x| < 2**-54, return x + return x + } else { + k = 0 + } + + // x is now in primary range + hfx := 0.5 * x + hxs := x * hfx + r1 := 1 + hxs*(Q1+hxs*(Q2+hxs*(Q3+hxs*(Q4+hxs*Q5)))) + t := 3 - r1*hfx + e := hxs * ((r1 - t) / (6.0 - x*t)) + if k == 0 { + return x - (x*e - hxs) // c is 0 + } + e = (x*(e-c) - c) + e -= hxs + switch { + case k == -1: + return 0.5*(x-e) - 0.5 + case k == 1: + if x < -0.25 { + return -2 * (e - (x + 0.5)) + } + return 1 + 2*(x-e) + case k <= -2 || k > 56: // suffice to return exp(x)-1 + y := 1 - (e - x) + y = Float64frombits(Float64bits(y) + uint64(k)<<52) // add k to y's exponent + return y - 1 + } + if k < 20 { + t := Float64frombits(0x3ff0000000000000 - (0x20000000000000 >> uint(k))) // t=1-2**-k + y := t - (e - x) + y = Float64frombits(Float64bits(y) + uint64(k)<<52) // add k to y's exponent + return y + } + t = Float64frombits(uint64(0x3ff-k) << 52) // 2**-k + y := x - (e + t) + y++ + y = Float64frombits(Float64bits(y) + uint64(k)<<52) // add k to y's exponent + return y +} diff --git a/src/math/expm1_s390x.s b/src/math/expm1_s390x.s new file mode 100644 index 0000000..16c861b --- /dev/null +++ b/src/math/expm1_s390x.s @@ -0,0 +1,194 @@ +// Copyright 2017 The Go Authors. All rights reserved. +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. + +#include "textflag.h" + +// Minimax polynomial approximation and other constants +DATA ·expm1rodataL22<> + 0(SB)/8, $-1.0 +DATA ·expm1rodataL22<> + 8(SB)/8, $800.0E+00 +DATA ·expm1rodataL22<> + 16(SB)/8, $1.0 +DATA ·expm1rodataL22<> + 24(SB)/8, $-.231904681384629956E-16 +DATA ·expm1rodataL22<> + 32(SB)/8, $0.50000000000000029671E+00 +DATA ·expm1rodataL22<> + 40(SB)/8, $0.16666666666666676570E+00 +DATA ·expm1rodataL22<> + 48(SB)/8, $0.83333333323590973444E-02 +DATA ·expm1rodataL22<> + 56(SB)/8, $0.13889096526400683566E-02 +DATA ·expm1rodataL22<> + 64(SB)/8, $0.41666666661701152924E-01 +DATA ·expm1rodataL22<> + 72(SB)/8, $0.19841562053987360264E-03 +DATA ·expm1rodataL22<> + 80(SB)/8, $-.693147180559945286E+00 +DATA ·expm1rodataL22<> + 88(SB)/8, $0.144269504088896339E+01 +DATA ·expm1rodataL22<> + 96(SB)/8, $704.0E+00 +GLOBL ·expm1rodataL22<> + 0(SB), RODATA, $104 + +DATA ·expm1xmone<> + 0(SB)/8, $0xbff0000000000000 +GLOBL ·expm1xmone<> + 0(SB), RODATA, $8 +DATA ·expm1xinf<> + 0(SB)/8, $0x7ff0000000000000 +GLOBL ·expm1xinf<> + 0(SB), RODATA, $8 +DATA ·expm1x4ff<> + 0(SB)/8, $0x4ff0000000000000 +GLOBL ·expm1x4ff<> + 0(SB), RODATA, $8 +DATA ·expm1x2ff<> + 0(SB)/8, $0x2ff0000000000000 +GLOBL ·expm1x2ff<> + 0(SB), RODATA, $8 +DATA ·expm1xaddexp<> + 0(SB)/8, $0xc2f0000100003ff0 +GLOBL ·expm1xaddexp<> + 0(SB), RODATA, $8 + +// Log multipliers table +DATA ·expm1tab<> + 0(SB)/8, $0.0 +DATA ·expm1tab<> + 8(SB)/8, $-.171540871271399150E-01 +DATA ·expm1tab<> + 16(SB)/8, $-.306597931864376363E-01 +DATA ·expm1tab<> + 24(SB)/8, $-.410200970469965021E-01 +DATA ·expm1tab<> + 32(SB)/8, $-.486343079978231466E-01 +DATA ·expm1tab<> + 40(SB)/8, $-.538226193725835820E-01 +DATA ·expm1tab<> + 48(SB)/8, $-.568439602538111520E-01 +DATA ·expm1tab<> + 56(SB)/8, $-.579091847395528847E-01 +DATA ·expm1tab<> + 64(SB)/8, $-.571909584179366341E-01 +DATA ·expm1tab<> + 72(SB)/8, $-.548312665987204407E-01 +DATA ·expm1tab<> + 80(SB)/8, $-.509471843643441085E-01 +DATA ·expm1tab<> + 88(SB)/8, $-.456353588448863359E-01 +DATA ·expm1tab<> + 96(SB)/8, $-.389755254243262365E-01 +DATA ·expm1tab<> + 104(SB)/8, $-.310332908285244231E-01 +DATA ·expm1tab<> + 112(SB)/8, $-.218623539150173528E-01 +DATA ·expm1tab<> + 120(SB)/8, $-.115062908917949451E-01 +GLOBL ·expm1tab<> + 0(SB), RODATA, $128 + +// Expm1 returns e**x - 1, the base-e exponential of x minus 1. +// It is more accurate than Exp(x) - 1 when x is near zero. +// +// Special cases are: +// Expm1(+Inf) = +Inf +// Expm1(-Inf) = -1 +// Expm1(NaN) = NaN +// Very large values overflow to -1 or +Inf. +// The algorithm used is minimax polynomial approximation using a table of +// polynomial coefficients determined with a Remez exchange algorithm. + +TEXT ·expm1Asm(SB), NOSPLIT, $0-16 + FMOVD x+0(FP), F0 + MOVD $·expm1rodataL22<>+0(SB), R5 + LTDBR F0, F0 + BLTU L20 + FMOVD F0, F2 +L2: + WORD $0xED205060 //cdb %f2,.L23-.L22(%r5) + BYTE $0x00 + BYTE $0x19 + BGE L16 + BVS L16 + WFCEDBS V2, V2, V2 + BVS LEXITTAGexpm1 + MOVD $·expm1xaddexp<>+0(SB), R1 + FMOVD 88(R5), F1 + FMOVD 0(R1), F2 + WFMSDB V0, V1, V2, V1 + FMOVD 80(R5), F6 + WFADB V1, V2, V4 + FMOVD 72(R5), F2 + FMADD F6, F4, F0 + FMOVD 64(R5), F3 + FMOVD 56(R5), F6 + FMOVD 48(R5), F5 + FMADD F2, F0, F6 + WFMADB V0, V5, V3, V5 + WFMDB V0, V0, V2 + LGDR F1, R1 + WFMADB V6, V2, V5, V6 + FMOVD 40(R5), F3 + FMOVD 32(R5), F5 + WFMADB V0, V3, V5, V3 + FMOVD 24(R5), F5 + WFMADB V2, V6, V3, V2 + FMADD F5, F4, F0 + FMOVD 16(R5), F6 + WFMADB V0, V2, V6, V2 + RISBGZ $57, $60, $3, R1, R3 + WORD $0xB3130022 //lcdbr %f2,%f2 + MOVD $·expm1tab<>+0(SB), R2 + WORD $0x68432000 //ld %f4,0(%r3,%r2) + FMADD F4, F0, F0 + SLD $48, R1, R2 + WFMSDB V2, V0, V4, V0 + LDGR R2, F4 + WORD $0xB3130000 //lcdbr %f0,%f0 + FSUB F4, F6 + WFMSDB V0, V4, V6, V0 + FMOVD F0, ret+8(FP) + RET +L16: + WFCEDBS V2, V2, V4 + BVS LEXITTAGexpm1 + WORD $0xED205008 //cdb %f2,.L34-.L22(%r5) + BYTE $0x00 + BYTE $0x19 + BLT L6 + WFCEDBS V2, V0, V0 + BVS L7 + MOVD $·expm1xinf<>+0(SB), R1 + FMOVD 0(R1), F0 + FMOVD F0, ret+8(FP) + RET +L20: + WORD $0xB3130020 //lcdbr %f2,%f0 + BR L2 +L6: + MOVD $·expm1xaddexp<>+0(SB), R1 + FMOVD 88(R5), F5 + FMOVD 0(R1), F4 + WFMSDB V0, V5, V4, V5 + FMOVD 80(R5), F3 + WFADB V5, V4, V1 + VLEG $0, 48(R5), V16 + WFMADB V1, V3, V0, V3 + FMOVD 56(R5), F4 + FMOVD 64(R5), F7 + FMOVD 72(R5), F6 + WFMADB V3, V16, V7, V16 + WFMADB V3, V6, V4, V6 + WFMDB V3, V3, V4 + MOVD $·expm1tab<>+0(SB), R2 + WFMADB V6, V4, V16, V6 + VLEG $0, 32(R5), V16 + FMOVD 40(R5), F7 + WFMADB V3, V7, V16, V7 + VLEG $0, 24(R5), V16 + WFMADB V4, V6, V7, V4 + WFMADB V1, V16, V3, V1 + FMOVD 16(R5), F6 + FMADD F4, F1, F6 + LGDR F5, R1 + WORD $0xB3130066 //lcdbr %f6,%f6 + RISBGZ $57, $60, $3, R1, R3 + WORD $0x68432000 //ld %f4,0(%r3,%r2) + FMADD F4, F1, F1 + MOVD $0x4086000000000000, R2 + FMSUB F1, F6, F4 + WORD $0xB3130044 //lcdbr %f4,%f4 + WFCHDBS V2, V0, V0 + BEQ L21 + ADDW $0xF000, R1 + RISBGN $0, $15, $48, R1, R2 + LDGR R2, F0 + FMADD F0, F4, F0 + MOVD $·expm1x4ff<>+0(SB), R3 + FMOVD 0(R5), F4 + FMOVD 0(R3), F2 + WFMADB V2, V0, V4, V0 + FMOVD F0, ret+8(FP) + RET +L7: + MOVD $·expm1xmone<>+0(SB), R1 + FMOVD 0(R1), F0 + FMOVD F0, ret+8(FP) + RET +L21: + ADDW $0x1000, R1 + RISBGN $0, $15, $48, R1, R2 + LDGR R2, F0 + FMADD F0, F4, F0 + MOVD $·expm1x2ff<>+0(SB), R3 + FMOVD 0(R5), F4 + FMOVD 0(R3), F2 + WFMADB V2, V0, V4, V0 + FMOVD F0, ret+8(FP) + RET +LEXITTAGexpm1: + FMOVD F0, ret+8(FP) + RET diff --git a/src/math/export_s390x_test.go b/src/math/export_s390x_test.go new file mode 100644 index 0000000..827bf1c --- /dev/null +++ b/src/math/export_s390x_test.go @@ -0,0 +1,31 @@ +// Copyright 2016 The Go Authors. All rights reserved. +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. + +package math + +// Export internal functions and variable for testing. +var Log10NoVec = log10 +var CosNoVec = cos +var CoshNoVec = cosh +var SinNoVec = sin +var SinhNoVec = sinh +var TanhNoVec = tanh +var Log1pNovec = log1p +var AtanhNovec = atanh +var AcosNovec = acos +var AcoshNovec = acosh +var AsinNovec = asin +var AsinhNovec = asinh +var ErfNovec = erf +var ErfcNovec = erfc +var AtanNovec = atan +var Atan2Novec = atan2 +var CbrtNovec = cbrt +var LogNovec = log +var TanNovec = tan +var ExpNovec = exp +var Expm1Novec = expm1 +var PowNovec = pow +var HypotNovec = hypot +var HasVX = hasVX diff --git a/src/math/export_test.go b/src/math/export_test.go new file mode 100644 index 0000000..53d9205 --- /dev/null +++ b/src/math/export_test.go @@ -0,0 +1,14 @@ +// Copyright 2011 The Go Authors. All rights reserved. +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. + +package math + +// Export internal functions for testing. +var ExpGo = exp +var Exp2Go = exp2 +var HypotGo = hypot +var SqrtGo = sqrt +var TrigReduce = trigReduce + +const ReduceThreshold = reduceThreshold diff --git a/src/math/floor.go b/src/math/floor.go new file mode 100644 index 0000000..7913a90 --- /dev/null +++ b/src/math/floor.go @@ -0,0 +1,146 @@ +// Copyright 2009 The Go Authors. All rights reserved. +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. + +package math + +// Floor returns the greatest integer value less than or equal to x. +// +// Special cases are: +// Floor(±0) = ±0 +// Floor(±Inf) = ±Inf +// Floor(NaN) = NaN +func Floor(x float64) float64 { + if haveArchFloor { + return archFloor(x) + } + return floor(x) +} + +func floor(x float64) float64 { + if x == 0 || IsNaN(x) || IsInf(x, 0) { + return x + } + if x < 0 { + d, fract := Modf(-x) + if fract != 0.0 { + d = d + 1 + } + return -d + } + d, _ := Modf(x) + return d +} + +// Ceil returns the least integer value greater than or equal to x. +// +// Special cases are: +// Ceil(±0) = ±0 +// Ceil(±Inf) = ±Inf +// Ceil(NaN) = NaN +func Ceil(x float64) float64 { + if haveArchCeil { + return archCeil(x) + } + return ceil(x) +} + +func ceil(x float64) float64 { + return -Floor(-x) +} + +// Trunc returns the integer value of x. +// +// Special cases are: +// Trunc(±0) = ±0 +// Trunc(±Inf) = ±Inf +// Trunc(NaN) = NaN +func Trunc(x float64) float64 { + if haveArchTrunc { + return archTrunc(x) + } + return trunc(x) +} + +func trunc(x float64) float64 { + if x == 0 || IsNaN(x) || IsInf(x, 0) { + return x + } + d, _ := Modf(x) + return d +} + +// Round returns the nearest integer, rounding half away from zero. +// +// Special cases are: +// Round(±0) = ±0 +// Round(±Inf) = ±Inf +// Round(NaN) = NaN +func Round(x float64) float64 { + // Round is a faster implementation of: + // + // func Round(x float64) float64 { + // t := Trunc(x) + // if Abs(x-t) >= 0.5 { + // return t + Copysign(1, x) + // } + // return t + // } + bits := Float64bits(x) + e := uint(bits>>shift) & mask + if e < bias { + // Round abs(x) < 1 including denormals. + bits &= signMask // +-0 + if e == bias-1 { + bits |= uvone // +-1 + } + } else if e < bias+shift { + // Round any abs(x) >= 1 containing a fractional component [0,1). + // + // Numbers with larger exponents are returned unchanged since they + // must be either an integer, infinity, or NaN. + const half = 1 << (shift - 1) + e -= bias + bits += half >> e + bits &^= fracMask >> e + } + return Float64frombits(bits) +} + +// RoundToEven returns the nearest integer, rounding ties to even. +// +// Special cases are: +// RoundToEven(±0) = ±0 +// RoundToEven(±Inf) = ±Inf +// RoundToEven(NaN) = NaN +func RoundToEven(x float64) float64 { + // RoundToEven is a faster implementation of: + // + // func RoundToEven(x float64) float64 { + // t := math.Trunc(x) + // odd := math.Remainder(t, 2) != 0 + // if d := math.Abs(x - t); d > 0.5 || (d == 0.5 && odd) { + // return t + math.Copysign(1, x) + // } + // return t + // } + bits := Float64bits(x) + e := uint(bits>>shift) & mask + if e >= bias { + // Round abs(x) >= 1. + // - Large numbers without fractional components, infinity, and NaN are unchanged. + // - Add 0.499.. or 0.5 before truncating depending on whether the truncated + // number is even or odd (respectively). + const halfMinusULP = (1 << (shift - 1)) - 1 + e -= bias + bits += (halfMinusULP + (bits>>(shift-e))&1) >> e + bits &^= fracMask >> e + } else if e == bias-1 && bits&fracMask != 0 { + // Round 0.5 < abs(x) < 1. + bits = bits&signMask | uvone // +-1 + } else { + // Round abs(x) <= 0.5 including denormals. + bits &= signMask // +-0 + } + return Float64frombits(bits) +} diff --git a/src/math/floor_386.s b/src/math/floor_386.s new file mode 100644 index 0000000..1990cb0 --- /dev/null +++ b/src/math/floor_386.s @@ -0,0 +1,46 @@ +// Copyright 2010 The Go Authors. All rights reserved. +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. + +#include "textflag.h" + +// func archCeil(x float64) float64 +TEXT ·archCeil(SB),NOSPLIT,$0 + FMOVD x+0(FP), F0 // F0=x + FSTCW -2(SP) // save old Control Word + MOVW -2(SP), AX + ANDW $0xf3ff, AX + ORW $0x0800, AX // Rounding Control set to +Inf + MOVW AX, -4(SP) // store new Control Word + FLDCW -4(SP) // load new Control Word + FRNDINT // F0=Ceil(x) + FLDCW -2(SP) // load old Control Word + FMOVDP F0, ret+8(FP) + RET + +// func archFloor(x float64) float64 +TEXT ·archFloor(SB),NOSPLIT,$0 + FMOVD x+0(FP), F0 // F0=x + FSTCW -2(SP) // save old Control Word + MOVW -2(SP), AX + ANDW $0xf3ff, AX + ORW $0x0400, AX // Rounding Control set to -Inf + MOVW AX, -4(SP) // store new Control Word + FLDCW -4(SP) // load new Control Word + FRNDINT // F0=Floor(x) + FLDCW -2(SP) // load old Control Word + FMOVDP F0, ret+8(FP) + RET + +// func archTrunc(x float64) float64 +TEXT ·archTrunc(SB),NOSPLIT,$0 + FMOVD x+0(FP), F0 // F0=x + FSTCW -2(SP) // save old Control Word + MOVW -2(SP), AX + ORW $0x0c00, AX // Rounding Control set to truncate + MOVW AX, -4(SP) // store new Control Word + FLDCW -4(SP) // load new Control Word + FRNDINT // F0=Trunc(x) + FLDCW -2(SP) // load old Control Word + FMOVDP F0, ret+8(FP) + RET diff --git a/src/math/floor_amd64.s b/src/math/floor_amd64.s new file mode 100644 index 0000000..0880499 --- /dev/null +++ b/src/math/floor_amd64.s @@ -0,0 +1,76 @@ +// Copyright 2012 The Go Authors. All rights reserved. +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. + +#include "textflag.h" + +#define Big 0x4330000000000000 // 2**52 + +// func archFloor(x float64) float64 +TEXT ·archFloor(SB),NOSPLIT,$0 + MOVQ x+0(FP), AX + MOVQ $~(1<<63), DX // sign bit mask + ANDQ AX,DX // DX = |x| + SUBQ $1,DX + MOVQ $(Big - 1), CX // if |x| >= 2**52-1 or IsNaN(x) or |x| == 0, return x + CMPQ DX,CX + JAE isBig_floor + MOVQ AX, X0 // X0 = x + CVTTSD2SQ X0, AX + CVTSQ2SD AX, X1 // X1 = float(int(x)) + CMPSD X1, X0, 1 // compare LT; X0 = 0xffffffffffffffff or 0 + MOVSD $(-1.0), X2 + ANDPD X2, X0 // if x < float(int(x)) {X0 = -1} else {X0 = 0} + ADDSD X1, X0 + MOVSD X0, ret+8(FP) + RET +isBig_floor: + MOVQ AX, ret+8(FP) // return x + RET + +// func archCeil(x float64) float64 +TEXT ·archCeil(SB),NOSPLIT,$0 + MOVQ x+0(FP), AX + MOVQ $~(1<<63), DX // sign bit mask + MOVQ AX, BX // BX = copy of x + ANDQ DX, BX // BX = |x| + MOVQ $Big, CX // if |x| >= 2**52 or IsNaN(x), return x + CMPQ BX, CX + JAE isBig_ceil + MOVQ AX, X0 // X0 = x + MOVQ DX, X2 // X2 = sign bit mask + CVTTSD2SQ X0, AX + ANDNPD X0, X2 // X2 = sign + CVTSQ2SD AX, X1 // X1 = float(int(x)) + CMPSD X1, X0, 2 // compare LE; X0 = 0xffffffffffffffff or 0 + ORPD X2, X1 // if X1 = 0.0, incorporate sign + MOVSD $1.0, X3 + ANDNPD X3, X0 + ORPD X2, X0 // if float(int(x)) <= x {X0 = 1} else {X0 = -0} + ADDSD X1, X0 + MOVSD X0, ret+8(FP) + RET +isBig_ceil: + MOVQ AX, ret+8(FP) + RET + +// func archTrunc(x float64) float64 +TEXT ·archTrunc(SB),NOSPLIT,$0 + MOVQ x+0(FP), AX + MOVQ $~(1<<63), DX // sign bit mask + MOVQ AX, BX // BX = copy of x + ANDQ DX, BX // BX = |x| + MOVQ $Big, CX // if |x| >= 2**52 or IsNaN(x), return x + CMPQ BX, CX + JAE isBig_trunc + MOVQ AX, X0 + MOVQ DX, X2 // X2 = sign bit mask + CVTTSD2SQ X0, AX + ANDNPD X0, X2 // X2 = sign + CVTSQ2SD AX, X0 // X0 = float(int(x)) + ORPD X2, X0 // if X0 = 0.0, incorporate sign + MOVSD X0, ret+8(FP) + RET +isBig_trunc: + MOVQ AX, ret+8(FP) // return x + RET diff --git a/src/math/floor_arm64.s b/src/math/floor_arm64.s new file mode 100644 index 0000000..d9c5df7 --- /dev/null +++ b/src/math/floor_arm64.s @@ -0,0 +1,26 @@ +// Copyright 2016 The Go Authors. All rights reserved. +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. + +#include "textflag.h" + +// func archFloor(x float64) float64 +TEXT ·archFloor(SB),NOSPLIT,$0 + FMOVD x+0(FP), F0 + FRINTMD F0, F0 + FMOVD F0, ret+8(FP) + RET + +// func archCeil(x float64) float64 +TEXT ·archCeil(SB),NOSPLIT,$0 + FMOVD x+0(FP), F0 + FRINTPD F0, F0 + FMOVD F0, ret+8(FP) + RET + +// func archTrunc(x float64) float64 +TEXT ·archTrunc(SB),NOSPLIT,$0 + FMOVD x+0(FP), F0 + FRINTZD F0, F0 + FMOVD F0, ret+8(FP) + RET diff --git a/src/math/floor_asm.go b/src/math/floor_asm.go new file mode 100644 index 0000000..fb419d6 --- /dev/null +++ b/src/math/floor_asm.go @@ -0,0 +1,19 @@ +// Copyright 2021 The Go Authors. All rights reserved. +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. + +//go:build 386 || amd64 || arm64 || ppc64 || ppc64le || s390x || wasm + +package math + +const haveArchFloor = true + +func archFloor(x float64) float64 + +const haveArchCeil = true + +func archCeil(x float64) float64 + +const haveArchTrunc = true + +func archTrunc(x float64) float64 diff --git a/src/math/floor_noasm.go b/src/math/floor_noasm.go new file mode 100644 index 0000000..5641c7e --- /dev/null +++ b/src/math/floor_noasm.go @@ -0,0 +1,25 @@ +// Copyright 2021 The Go Authors. All rights reserved. +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. + +//go:build !386 && !amd64 && !arm64 && !ppc64 && !ppc64le && !s390x && !wasm + +package math + +const haveArchFloor = false + +func archFloor(x float64) float64 { + panic("not implemented") +} + +const haveArchCeil = false + +func archCeil(x float64) float64 { + panic("not implemented") +} + +const haveArchTrunc = false + +func archTrunc(x float64) float64 { + panic("not implemented") +} diff --git a/src/math/floor_ppc64x.s b/src/math/floor_ppc64x.s new file mode 100644 index 0000000..584c27e --- /dev/null +++ b/src/math/floor_ppc64x.s @@ -0,0 +1,26 @@ +// Copyright 2016 The Go Authors. All rights reserved. +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. + +//go:build ppc64 || ppc64le +// +build ppc64 ppc64le + +#include "textflag.h" + +TEXT ·archFloor(SB),NOSPLIT,$0 + FMOVD x+0(FP), F0 + FRIM F0, F0 + FMOVD F0, ret+8(FP) + RET + +TEXT ·archCeil(SB),NOSPLIT,$0 + FMOVD x+0(FP), F0 + FRIP F0, F0 + FMOVD F0, ret+8(FP) + RET + +TEXT ·archTrunc(SB),NOSPLIT,$0 + FMOVD x+0(FP), F0 + FRIZ F0, F0 + FMOVD F0, ret+8(FP) + RET diff --git a/src/math/floor_s390x.s b/src/math/floor_s390x.s new file mode 100644 index 0000000..b5dd462 --- /dev/null +++ b/src/math/floor_s390x.s @@ -0,0 +1,26 @@ +// Copyright 2016 The Go Authors. All rights reserved. +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. + +#include "textflag.h" + +// func archFloor(x float64) float64 +TEXT ·archFloor(SB),NOSPLIT,$0 + FMOVD x+0(FP), F0 + FIDBR $7, F0, F0 + FMOVD F0, ret+8(FP) + RET + +// func archCeil(x float64) float64 +TEXT ·archCeil(SB),NOSPLIT,$0 + FMOVD x+0(FP), F0 + FIDBR $6, F0, F0 + FMOVD F0, ret+8(FP) + RET + +// func archTrunc(x float64) float64 +TEXT ·archTrunc(SB),NOSPLIT,$0 + FMOVD x+0(FP), F0 + FIDBR $5, F0, F0 + FMOVD F0, ret+8(FP) + RET diff --git a/src/math/floor_wasm.s b/src/math/floor_wasm.s new file mode 100644 index 0000000..3751471 --- /dev/null +++ b/src/math/floor_wasm.s @@ -0,0 +1,26 @@ +// Copyright 2018 The Go Authors. All rights reserved. +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. + +#include "textflag.h" + +TEXT ·archFloor(SB),NOSPLIT,$0 + Get SP + F64Load x+0(FP) + F64Floor + F64Store ret+8(FP) + RET + +TEXT ·archCeil(SB),NOSPLIT,$0 + Get SP + F64Load x+0(FP) + F64Ceil + F64Store ret+8(FP) + RET + +TEXT ·archTrunc(SB),NOSPLIT,$0 + Get SP + F64Load x+0(FP) + F64Trunc + F64Store ret+8(FP) + RET diff --git a/src/math/fma.go b/src/math/fma.go new file mode 100644 index 0000000..ca0bf99 --- /dev/null +++ b/src/math/fma.go @@ -0,0 +1,170 @@ +// Copyright 2019 The Go Authors. All rights reserved. +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. + +package math + +import "math/bits" + +func zero(x uint64) uint64 { + if x == 0 { + return 1 + } + return 0 + // branchless: + // return ((x>>1 | x&1) - 1) >> 63 +} + +func nonzero(x uint64) uint64 { + if x != 0 { + return 1 + } + return 0 + // branchless: + // return 1 - ((x>>1|x&1)-1)>>63 +} + +func shl(u1, u2 uint64, n uint) (r1, r2 uint64) { + r1 = u1<<n | u2>>(64-n) | u2<<(n-64) + r2 = u2 << n + return +} + +func shr(u1, u2 uint64, n uint) (r1, r2 uint64) { + r2 = u2>>n | u1<<(64-n) | u1>>(n-64) + r1 = u1 >> n + return +} + +// shrcompress compresses the bottom n+1 bits of the two-word +// value into a single bit. the result is equal to the value +// shifted to the right by n, except the result's 0th bit is +// set to the bitwise OR of the bottom n+1 bits. +func shrcompress(u1, u2 uint64, n uint) (r1, r2 uint64) { + // TODO: Performance here is really sensitive to the + // order/placement of these branches. n == 0 is common + // enough to be in the fast path. Perhaps more measurement + // needs to be done to find the optimal order/placement? + switch { + case n == 0: + return u1, u2 + case n == 64: + return 0, u1 | nonzero(u2) + case n >= 128: + return 0, nonzero(u1 | u2) + case n < 64: + r1, r2 = shr(u1, u2, n) + r2 |= nonzero(u2 & (1<<n - 1)) + case n < 128: + r1, r2 = shr(u1, u2, n) + r2 |= nonzero(u1&(1<<(n-64)-1) | u2) + } + return +} + +func lz(u1, u2 uint64) (l int32) { + l = int32(bits.LeadingZeros64(u1)) + if l == 64 { + l += int32(bits.LeadingZeros64(u2)) + } + return l +} + +// split splits b into sign, biased exponent, and mantissa. +// It adds the implicit 1 bit to the mantissa for normal values, +// and normalizes subnormal values. +func split(b uint64) (sign uint32, exp int32, mantissa uint64) { + sign = uint32(b >> 63) + exp = int32(b>>52) & mask + mantissa = b & fracMask + + if exp == 0 { + // Normalize value if subnormal. + shift := uint(bits.LeadingZeros64(mantissa) - 11) + mantissa <<= shift + exp = 1 - int32(shift) + } else { + // Add implicit 1 bit + mantissa |= 1 << 52 + } + return +} + +// FMA returns x * y + z, computed with only one rounding. +// (That is, FMA returns the fused multiply-add of x, y, and z.) +func FMA(x, y, z float64) float64 { + bx, by, bz := Float64bits(x), Float64bits(y), Float64bits(z) + + // Inf or NaN or zero involved. At most one rounding will occur. + if x == 0.0 || y == 0.0 || z == 0.0 || bx&uvinf == uvinf || by&uvinf == uvinf { + return x*y + z + } + // Handle non-finite z separately. Evaluating x*y+z where + // x and y are finite, but z is infinite, should always result in z. + if bz&uvinf == uvinf { + return z + } + + // Inputs are (sub)normal. + // Split x, y, z into sign, exponent, mantissa. + xs, xe, xm := split(bx) + ys, ye, ym := split(by) + zs, ze, zm := split(bz) + + // Compute product p = x*y as sign, exponent, two-word mantissa. + // Start with exponent. "is normal" bit isn't subtracted yet. + pe := xe + ye - bias + 1 + + // pm1:pm2 is the double-word mantissa for the product p. + // Shift left to leave top bit in product. Effectively + // shifts the 106-bit product to the left by 21. + pm1, pm2 := bits.Mul64(xm<<10, ym<<11) + zm1, zm2 := zm<<10, uint64(0) + ps := xs ^ ys // product sign + + // normalize to 62nd bit + is62zero := uint((^pm1 >> 62) & 1) + pm1, pm2 = shl(pm1, pm2, is62zero) + pe -= int32(is62zero) + + // Swap addition operands so |p| >= |z| + if pe < ze || pe == ze && pm1 < zm1 { + ps, pe, pm1, pm2, zs, ze, zm1, zm2 = zs, ze, zm1, zm2, ps, pe, pm1, pm2 + } + + // Align significands + zm1, zm2 = shrcompress(zm1, zm2, uint(pe-ze)) + + // Compute resulting significands, normalizing if necessary. + var m, c uint64 + if ps == zs { + // Adding (pm1:pm2) + (zm1:zm2) + pm2, c = bits.Add64(pm2, zm2, 0) + pm1, _ = bits.Add64(pm1, zm1, c) + pe -= int32(^pm1 >> 63) + pm1, m = shrcompress(pm1, pm2, uint(64+pm1>>63)) + } else { + // Subtracting (pm1:pm2) - (zm1:zm2) + // TODO: should we special-case cancellation? + pm2, c = bits.Sub64(pm2, zm2, 0) + pm1, _ = bits.Sub64(pm1, zm1, c) + nz := lz(pm1, pm2) + pe -= nz + m, pm2 = shl(pm1, pm2, uint(nz-1)) + m |= nonzero(pm2) + } + + // Round and break ties to even + if pe > 1022+bias || pe == 1022+bias && (m+1<<9)>>63 == 1 { + // rounded value overflows exponent range + return Float64frombits(uint64(ps)<<63 | uvinf) + } + if pe < 0 { + n := uint(-pe) + m = m>>n | nonzero(m&(1<<n-1)) + pe = 0 + } + m = ((m + 1<<9) >> 10) & ^zero((m&(1<<10-1))^1<<9) + pe &= -int32(nonzero(m)) + return Float64frombits(uint64(ps)<<63 + uint64(pe)<<52 + m) +} diff --git a/src/math/frexp.go b/src/math/frexp.go new file mode 100644 index 0000000..3c8a909 --- /dev/null +++ b/src/math/frexp.go @@ -0,0 +1,38 @@ +// Copyright 2009 The Go Authors. All rights reserved. +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. + +package math + +// Frexp breaks f into a normalized fraction +// and an integral power of two. +// It returns frac and exp satisfying f == frac × 2**exp, +// with the absolute value of frac in the interval [½, 1). +// +// Special cases are: +// Frexp(±0) = ±0, 0 +// Frexp(±Inf) = ±Inf, 0 +// Frexp(NaN) = NaN, 0 +func Frexp(f float64) (frac float64, exp int) { + if haveArchFrexp { + return archFrexp(f) + } + return frexp(f) +} + +func frexp(f float64) (frac float64, exp int) { + // special cases + switch { + case f == 0: + return f, 0 // correctly return -0 + case IsInf(f, 0) || IsNaN(f): + return f, 0 + } + f, exp = normalize(f) + x := Float64bits(f) + exp += int((x>>shift)&mask) - bias + 1 + x &^= mask << shift + x |= (-1 + bias) << shift + frac = Float64frombits(x) + return +} diff --git a/src/math/gamma.go b/src/math/gamma.go new file mode 100644 index 0000000..cc9e869 --- /dev/null +++ b/src/math/gamma.go @@ -0,0 +1,221 @@ +// Copyright 2010 The Go Authors. All rights reserved. +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. + +package math + +// The original C code, the long comment, and the constants +// below are from http://netlib.sandia.gov/cephes/cprob/gamma.c. +// The go code is a simplified version of the original C. +// +// tgamma.c +// +// Gamma function +// +// SYNOPSIS: +// +// double x, y, tgamma(); +// extern int signgam; +// +// y = tgamma( x ); +// +// DESCRIPTION: +// +// Returns gamma function of the argument. The result is +// correctly signed, and the sign (+1 or -1) is also +// returned in a global (extern) variable named signgam. +// This variable is also filled in by the logarithmic gamma +// function lgamma(). +// +// Arguments |x| <= 34 are reduced by recurrence and the function +// approximated by a rational function of degree 6/7 in the +// interval (2,3). Large arguments are handled by Stirling's +// formula. Large negative arguments are made positive using +// a reflection formula. +// +// ACCURACY: +// +// Relative error: +// arithmetic domain # trials peak rms +// DEC -34, 34 10000 1.3e-16 2.5e-17 +// IEEE -170,-33 20000 2.3e-15 3.3e-16 +// IEEE -33, 33 20000 9.4e-16 2.2e-16 +// IEEE 33, 171.6 20000 2.3e-15 3.2e-16 +// +// Error for arguments outside the test range will be larger +// owing to error amplification by the exponential function. +// +// Cephes Math Library Release 2.8: June, 2000 +// Copyright 1984, 1987, 1989, 1992, 2000 by Stephen L. Moshier +// +// The readme file at http://netlib.sandia.gov/cephes/ says: +// Some software in this archive may be from the book _Methods and +// Programs for Mathematical Functions_ (Prentice-Hall or Simon & Schuster +// International, 1989) or from the Cephes Mathematical Library, a +// commercial product. In either event, it is copyrighted by the author. +// What you see here may be used freely but it comes with no support or +// guarantee. +// +// The two known misprints in the book are repaired here in the +// source listings for the gamma function and the incomplete beta +// integral. +// +// Stephen L. Moshier +// moshier@na-net.ornl.gov + +var _gamP = [...]float64{ + 1.60119522476751861407e-04, + 1.19135147006586384913e-03, + 1.04213797561761569935e-02, + 4.76367800457137231464e-02, + 2.07448227648435975150e-01, + 4.94214826801497100753e-01, + 9.99999999999999996796e-01, +} +var _gamQ = [...]float64{ + -2.31581873324120129819e-05, + 5.39605580493303397842e-04, + -4.45641913851797240494e-03, + 1.18139785222060435552e-02, + 3.58236398605498653373e-02, + -2.34591795718243348568e-01, + 7.14304917030273074085e-02, + 1.00000000000000000320e+00, +} +var _gamS = [...]float64{ + 7.87311395793093628397e-04, + -2.29549961613378126380e-04, + -2.68132617805781232825e-03, + 3.47222221605458667310e-03, + 8.33333333333482257126e-02, +} + +// Gamma function computed by Stirling's formula. +// The pair of results must be multiplied together to get the actual answer. +// The multiplication is left to the caller so that, if careful, the caller can avoid +// infinity for 172 <= x <= 180. +// The polynomial is valid for 33 <= x <= 172; larger values are only used +// in reciprocal and produce denormalized floats. The lower precision there +// masks any imprecision in the polynomial. +func stirling(x float64) (float64, float64) { + if x > 200 { + return Inf(1), 1 + } + const ( + SqrtTwoPi = 2.506628274631000502417 + MaxStirling = 143.01608 + ) + w := 1 / x + w = 1 + w*((((_gamS[0]*w+_gamS[1])*w+_gamS[2])*w+_gamS[3])*w+_gamS[4]) + y1 := Exp(x) + y2 := 1.0 + if x > MaxStirling { // avoid Pow() overflow + v := Pow(x, 0.5*x-0.25) + y1, y2 = v, v/y1 + } else { + y1 = Pow(x, x-0.5) / y1 + } + return y1, SqrtTwoPi * w * y2 +} + +// Gamma returns the Gamma function of x. +// +// Special cases are: +// Gamma(+Inf) = +Inf +// Gamma(+0) = +Inf +// Gamma(-0) = -Inf +// Gamma(x) = NaN for integer x < 0 +// Gamma(-Inf) = NaN +// Gamma(NaN) = NaN +func Gamma(x float64) float64 { + const Euler = 0.57721566490153286060651209008240243104215933593992 // A001620 + // special cases + switch { + case isNegInt(x) || IsInf(x, -1) || IsNaN(x): + return NaN() + case IsInf(x, 1): + return Inf(1) + case x == 0: + if Signbit(x) { + return Inf(-1) + } + return Inf(1) + } + q := Abs(x) + p := Floor(q) + if q > 33 { + if x >= 0 { + y1, y2 := stirling(x) + return y1 * y2 + } + // Note: x is negative but (checked above) not a negative integer, + // so x must be small enough to be in range for conversion to int64. + // If |x| were >= 2⁶³ it would have to be an integer. + signgam := 1 + if ip := int64(p); ip&1 == 0 { + signgam = -1 + } + z := q - p + if z > 0.5 { + p = p + 1 + z = q - p + } + z = q * Sin(Pi*z) + if z == 0 { + return Inf(signgam) + } + sq1, sq2 := stirling(q) + absz := Abs(z) + d := absz * sq1 * sq2 + if IsInf(d, 0) { + z = Pi / absz / sq1 / sq2 + } else { + z = Pi / d + } + return float64(signgam) * z + } + + // Reduce argument + z := 1.0 + for x >= 3 { + x = x - 1 + z = z * x + } + for x < 0 { + if x > -1e-09 { + goto small + } + z = z / x + x = x + 1 + } + for x < 2 { + if x < 1e-09 { + goto small + } + z = z / x + x = x + 1 + } + + if x == 2 { + return z + } + + x = x - 2 + p = (((((x*_gamP[0]+_gamP[1])*x+_gamP[2])*x+_gamP[3])*x+_gamP[4])*x+_gamP[5])*x + _gamP[6] + q = ((((((x*_gamQ[0]+_gamQ[1])*x+_gamQ[2])*x+_gamQ[3])*x+_gamQ[4])*x+_gamQ[5])*x+_gamQ[6])*x + _gamQ[7] + return z * p / q + +small: + if x == 0 { + return Inf(1) + } + return z / ((1 + Euler*x) * x) +} + +func isNegInt(x float64) bool { + if x < 0 { + _, xf := Modf(x) + return xf == 0 + } + return false +} diff --git a/src/math/huge_test.go b/src/math/huge_test.go new file mode 100644 index 0000000..bc28c6f --- /dev/null +++ b/src/math/huge_test.go @@ -0,0 +1,115 @@ +// Copyright 2018 The Go Authors. All rights reserved. +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. + +// Disabled for s390x because it uses assembly routines that are not +// accurate for huge arguments. + +//go:build !s390x + +package math_test + +import ( + . "math" + "testing" +) + +// Inputs to test trig_reduce +var trigHuge = []float64{ + 1 << 28, + 1 << 29, + 1 << 30, + 1 << 35, + 1 << 120, + 1 << 240, + 1 << 480, + 1234567891234567 << 180, + 1234567891234567 << 300, + MaxFloat64, +} + +// Results for trigHuge[i] calculated with https://github.com/robpike/ivy +// using 4096 bits of working precision. Values requiring less than +// 102 decimal digits (1 << 120, 1 << 240, 1 << 480, 1234567891234567 << 180) +// were confirmed via https://keisan.casio.com/ +var cosHuge = []float64{ + -0.16556897949057876, + -0.94517382606089662, + 0.78670712294118812, + -0.76466301249635305, + -0.92587902285483787, + 0.93601042593353793, + -0.28282777640193788, + -0.14616431394103619, + -0.79456058210671406, + -0.99998768942655994, +} + +var sinHuge = []float64{ + -0.98619821183697566, + 0.32656766301856334, + -0.61732641504604217, + -0.64443035102329113, + 0.37782010936075202, + -0.35197227524865778, + 0.95917070894368716, + 0.98926032637023618, + -0.60718488235646949, + 0.00496195478918406, +} + +var tanHuge = []float64{ + 5.95641897939639421, + -0.34551069233430392, + -0.78469661331920043, + 0.84276385870875983, + -0.40806638884180424, + -0.37603456702698076, + -3.39135965054779932, + -6.76813854009065030, + 0.76417695016604922, + -0.00496201587444489, +} + +// Check that trig values of huge angles return accurate results. +// This confirms that argument reduction works for very large values +// up to MaxFloat64. +func TestHugeCos(t *testing.T) { + for i := 0; i < len(trigHuge); i++ { + f1 := cosHuge[i] + f2 := Cos(trigHuge[i]) + if !close(f1, f2) { + t.Errorf("Cos(%g) = %g, want %g", trigHuge[i], f2, f1) + } + } +} + +func TestHugeSin(t *testing.T) { + for i := 0; i < len(trigHuge); i++ { + f1 := sinHuge[i] + f2 := Sin(trigHuge[i]) + if !close(f1, f2) { + t.Errorf("Sin(%g) = %g, want %g", trigHuge[i], f2, f1) + } + } +} + +func TestHugeSinCos(t *testing.T) { + for i := 0; i < len(trigHuge); i++ { + f1, g1 := sinHuge[i], cosHuge[i] + f2, g2 := Sincos(trigHuge[i]) + if !close(f1, f2) || !close(g1, g2) { + t.Errorf("Sincos(%g) = %g, %g, want %g, %g", trigHuge[i], f2, g2, f1, g1) + } + } +} + +func TestHugeTan(t *testing.T) { + for i := 0; i < len(trigHuge); i++ { + f1 := tanHuge[i] + f2 := Tan(trigHuge[i]) + if !close(f1, f2) { + t.Errorf("Tan(%g) = %g, want %g", trigHuge[i], f2, f1) + } + } +} diff --git a/src/math/hypot.go b/src/math/hypot.go new file mode 100644 index 0000000..12af177 --- /dev/null +++ b/src/math/hypot.go @@ -0,0 +1,43 @@ +// Copyright 2010 The Go Authors. All rights reserved. +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. + +package math + +/* + Hypot -- sqrt(p*p + q*q), but overflows only if the result does. +*/ + +// Hypot returns Sqrt(p*p + q*q), taking care to avoid +// unnecessary overflow and underflow. +// +// Special cases are: +// Hypot(±Inf, q) = +Inf +// Hypot(p, ±Inf) = +Inf +// Hypot(NaN, q) = NaN +// Hypot(p, NaN) = NaN +func Hypot(p, q float64) float64 { + if haveArchHypot { + return archHypot(p, q) + } + return hypot(p, q) +} + +func hypot(p, q float64) float64 { + // special cases + switch { + case IsInf(p, 0) || IsInf(q, 0): + return Inf(1) + case IsNaN(p) || IsNaN(q): + return NaN() + } + p, q = Abs(p), Abs(q) + if p < q { + p, q = q, p + } + if p == 0 { + return 0 + } + q = q / p + return p * Sqrt(1+q*q) +} diff --git a/src/math/hypot_386.s b/src/math/hypot_386.s new file mode 100644 index 0000000..80a8fd3 --- /dev/null +++ b/src/math/hypot_386.s @@ -0,0 +1,59 @@ +// Copyright 2010 The Go Authors. All rights reserved. +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. + +#include "textflag.h" + +// func archHypot(p, q float64) float64 +TEXT ·archHypot(SB),NOSPLIT,$0 +// test bits for not-finite + MOVL p_hi+4(FP), AX // high word p + ANDL $0x7ff00000, AX + CMPL AX, $0x7ff00000 + JEQ not_finite + MOVL q_hi+12(FP), AX // high word q + ANDL $0x7ff00000, AX + CMPL AX, $0x7ff00000 + JEQ not_finite + FMOVD p+0(FP), F0 // F0=p + FABS // F0=|p| + FMOVD q+8(FP), F0 // F0=q, F1=|p| + FABS // F0=|q|, F1=|p| + FUCOMI F0, F1 // compare F0 to F1 + JCC 2(PC) // jump if F0 >= F1 + FXCHD F0, F1 // F0=|p| (larger), F1=|q| (smaller) + FTST // compare F0 to 0 + FSTSW AX + ANDW $0x4000, AX + JNE 10(PC) // jump if F0 = 0 + FXCHD F0, F1 // F0=q (smaller), F1=p (larger) + FDIVD F1, F0 // F0=q(=q/p), F1=p + FMULD F0, F0 // F0=q*q, F1=p + FLD1 // F0=1, F1=q*q, F2=p + FADDDP F0, F1 // F0=1+q*q, F1=p + FSQRT // F0=sqrt(1+q*q), F1=p + FMULDP F0, F1 // F0=p*sqrt(1+q*q) + FMOVDP F0, ret+16(FP) + RET + FMOVDP F0, F1 // F0=0 + FMOVDP F0, ret+16(FP) + RET +not_finite: +// test bits for -Inf or +Inf + MOVL p_hi+4(FP), AX // high word p + ORL p_lo+0(FP), AX // low word p + ANDL $0x7fffffff, AX + CMPL AX, $0x7ff00000 + JEQ is_inf + MOVL q_hi+12(FP), AX // high word q + ORL q_lo+8(FP), AX // low word q + ANDL $0x7fffffff, AX + CMPL AX, $0x7ff00000 + JEQ is_inf + MOVL $0x7ff80000, ret_hi+20(FP) // return NaN = 0x7FF8000000000001 + MOVL $0x00000001, ret_lo+16(FP) + RET +is_inf: + MOVL AX, ret_hi+20(FP) // return +Inf = 0x7FF0000000000000 + MOVL $0x00000000, ret_lo+16(FP) + RET diff --git a/src/math/hypot_amd64.s b/src/math/hypot_amd64.s new file mode 100644 index 0000000..fe326c9 --- /dev/null +++ b/src/math/hypot_amd64.s @@ -0,0 +1,52 @@ +// Copyright 2010 The Go Authors. All rights reserved. +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. + +#include "textflag.h" + +#define PosInf 0x7FF0000000000000 +#define NaN 0x7FF8000000000001 + +// func archHypot(p, q float64) float64 +TEXT ·archHypot(SB),NOSPLIT,$0 + // test bits for special cases + MOVQ p+0(FP), BX + MOVQ $~(1<<63), AX + ANDQ AX, BX // p = |p| + MOVQ q+8(FP), CX + ANDQ AX, CX // q = |q| + MOVQ $PosInf, AX + CMPQ AX, BX + JLE isInfOrNaN + CMPQ AX, CX + JLE isInfOrNaN + // hypot = max * sqrt(1 + (min/max)**2) + MOVQ BX, X0 + MOVQ CX, X1 + ORQ CX, BX + JEQ isZero + MOVAPD X0, X2 + MAXSD X1, X0 + MINSD X2, X1 + DIVSD X0, X1 + MULSD X1, X1 + ADDSD $1.0, X1 + SQRTSD X1, X1 + MULSD X1, X0 + MOVSD X0, ret+16(FP) + RET +isInfOrNaN: + CMPQ AX, BX + JEQ isInf + CMPQ AX, CX + JEQ isInf + MOVQ $NaN, AX + MOVQ AX, ret+16(FP) // return NaN + RET +isInf: + MOVQ AX, ret+16(FP) // return +Inf + RET +isZero: + MOVQ $0, AX + MOVQ AX, ret+16(FP) // return 0 + RET diff --git a/src/math/hypot_asm.go b/src/math/hypot_asm.go new file mode 100644 index 0000000..8526910 --- /dev/null +++ b/src/math/hypot_asm.go @@ -0,0 +1,11 @@ +// Copyright 2021 The Go Authors. All rights reserved. +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. + +//go:build 386 || amd64 + +package math + +const haveArchHypot = true + +func archHypot(p, q float64) float64 diff --git a/src/math/hypot_noasm.go b/src/math/hypot_noasm.go new file mode 100644 index 0000000..8b64812 --- /dev/null +++ b/src/math/hypot_noasm.go @@ -0,0 +1,13 @@ +// Copyright 2021 The Go Authors. All rights reserved. +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. + +//go:build !386 && !amd64 + +package math + +const haveArchHypot = false + +func archHypot(p, q float64) float64 { + panic("not implemented") +} diff --git a/src/math/j0.go b/src/math/j0.go new file mode 100644 index 0000000..cb5f07b --- /dev/null +++ b/src/math/j0.go @@ -0,0 +1,427 @@ +// Copyright 2010 The Go Authors. All rights reserved. +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. + +package math + +/* + Bessel function of the first and second kinds of order zero. +*/ + +// The original C code and the long comment below are +// from FreeBSD's /usr/src/lib/msun/src/e_j0.c and +// came with this notice. The go code is a simplified +// version of the original C. +// +// ==================================================== +// Copyright (C) 1993 by Sun Microsystems, Inc. All rights reserved. +// +// Developed at SunPro, a Sun Microsystems, Inc. business. +// Permission to use, copy, modify, and distribute this +// software is freely granted, provided that this notice +// is preserved. +// ==================================================== +// +// __ieee754_j0(x), __ieee754_y0(x) +// Bessel function of the first and second kinds of order zero. +// Method -- j0(x): +// 1. For tiny x, we use j0(x) = 1 - x**2/4 + x**4/64 - ... +// 2. Reduce x to |x| since j0(x)=j0(-x), and +// for x in (0,2) +// j0(x) = 1-z/4+ z**2*R0/S0, where z = x*x; +// (precision: |j0-1+z/4-z**2R0/S0 |<2**-63.67 ) +// for x in (2,inf) +// j0(x) = sqrt(2/(pi*x))*(p0(x)*cos(x0)-q0(x)*sin(x0)) +// where x0 = x-pi/4. It is better to compute sin(x0),cos(x0) +// as follow: +// cos(x0) = cos(x)cos(pi/4)+sin(x)sin(pi/4) +// = 1/sqrt(2) * (cos(x) + sin(x)) +// sin(x0) = sin(x)cos(pi/4)-cos(x)sin(pi/4) +// = 1/sqrt(2) * (sin(x) - cos(x)) +// (To avoid cancellation, use +// sin(x) +- cos(x) = -cos(2x)/(sin(x) -+ cos(x)) +// to compute the worse one.) +// +// 3 Special cases +// j0(nan)= nan +// j0(0) = 1 +// j0(inf) = 0 +// +// Method -- y0(x): +// 1. For x<2. +// Since +// y0(x) = 2/pi*(j0(x)*(ln(x/2)+Euler) + x**2/4 - ...) +// therefore y0(x)-2/pi*j0(x)*ln(x) is an even function. +// We use the following function to approximate y0, +// y0(x) = U(z)/V(z) + (2/pi)*(j0(x)*ln(x)), z= x**2 +// where +// U(z) = u00 + u01*z + ... + u06*z**6 +// V(z) = 1 + v01*z + ... + v04*z**4 +// with absolute approximation error bounded by 2**-72. +// Note: For tiny x, U/V = u0 and j0(x)~1, hence +// y0(tiny) = u0 + (2/pi)*ln(tiny), (choose tiny<2**-27) +// 2. For x>=2. +// y0(x) = sqrt(2/(pi*x))*(p0(x)*cos(x0)+q0(x)*sin(x0)) +// where x0 = x-pi/4. It is better to compute sin(x0),cos(x0) +// by the method mentioned above. +// 3. Special cases: y0(0)=-inf, y0(x<0)=NaN, y0(inf)=0. +// + +// J0 returns the order-zero Bessel function of the first kind. +// +// Special cases are: +// J0(±Inf) = 0 +// J0(0) = 1 +// J0(NaN) = NaN +func J0(x float64) float64 { + const ( + Huge = 1e300 + TwoM27 = 1.0 / (1 << 27) // 2**-27 0x3e40000000000000 + TwoM13 = 1.0 / (1 << 13) // 2**-13 0x3f20000000000000 + Two129 = 1 << 129 // 2**129 0x4800000000000000 + // R0/S0 on [0, 2] + R02 = 1.56249999999999947958e-02 // 0x3F8FFFFFFFFFFFFD + R03 = -1.89979294238854721751e-04 // 0xBF28E6A5B61AC6E9 + R04 = 1.82954049532700665670e-06 // 0x3EBEB1D10C503919 + R05 = -4.61832688532103189199e-09 // 0xBE33D5E773D63FCE + S01 = 1.56191029464890010492e-02 // 0x3F8FFCE882C8C2A4 + S02 = 1.16926784663337450260e-04 // 0x3F1EA6D2DD57DBF4 + S03 = 5.13546550207318111446e-07 // 0x3EA13B54CE84D5A9 + S04 = 1.16614003333790000205e-09 // 0x3E1408BCF4745D8F + ) + // special cases + switch { + case IsNaN(x): + return x + case IsInf(x, 0): + return 0 + case x == 0: + return 1 + } + + x = Abs(x) + if x >= 2 { + s, c := Sincos(x) + ss := s - c + cc := s + c + + // make sure x+x does not overflow + if x < MaxFloat64/2 { + z := -Cos(x + x) + if s*c < 0 { + cc = z / ss + } else { + ss = z / cc + } + } + + // j0(x) = 1/sqrt(pi) * (P(0,x)*cc - Q(0,x)*ss) / sqrt(x) + // y0(x) = 1/sqrt(pi) * (P(0,x)*ss + Q(0,x)*cc) / sqrt(x) + + var z float64 + if x > Two129 { // |x| > ~6.8056e+38 + z = (1 / SqrtPi) * cc / Sqrt(x) + } else { + u := pzero(x) + v := qzero(x) + z = (1 / SqrtPi) * (u*cc - v*ss) / Sqrt(x) + } + return z // |x| >= 2.0 + } + if x < TwoM13 { // |x| < ~1.2207e-4 + if x < TwoM27 { + return 1 // |x| < ~7.4506e-9 + } + return 1 - 0.25*x*x // ~7.4506e-9 < |x| < ~1.2207e-4 + } + z := x * x + r := z * (R02 + z*(R03+z*(R04+z*R05))) + s := 1 + z*(S01+z*(S02+z*(S03+z*S04))) + if x < 1 { + return 1 + z*(-0.25+(r/s)) // |x| < 1.00 + } + u := 0.5 * x + return (1+u)*(1-u) + z*(r/s) // 1.0 < |x| < 2.0 +} + +// Y0 returns the order-zero Bessel function of the second kind. +// +// Special cases are: +// Y0(+Inf) = 0 +// Y0(0) = -Inf +// Y0(x < 0) = NaN +// Y0(NaN) = NaN +func Y0(x float64) float64 { + const ( + TwoM27 = 1.0 / (1 << 27) // 2**-27 0x3e40000000000000 + Two129 = 1 << 129 // 2**129 0x4800000000000000 + U00 = -7.38042951086872317523e-02 // 0xBFB2E4D699CBD01F + U01 = 1.76666452509181115538e-01 // 0x3FC69D019DE9E3FC + U02 = -1.38185671945596898896e-02 // 0xBF8C4CE8B16CFA97 + U03 = 3.47453432093683650238e-04 // 0x3F36C54D20B29B6B + U04 = -3.81407053724364161125e-06 // 0xBECFFEA773D25CAD + U05 = 1.95590137035022920206e-08 // 0x3E5500573B4EABD4 + U06 = -3.98205194132103398453e-11 // 0xBDC5E43D693FB3C8 + V01 = 1.27304834834123699328e-02 // 0x3F8A127091C9C71A + V02 = 7.60068627350353253702e-05 // 0x3F13ECBBF578C6C1 + V03 = 2.59150851840457805467e-07 // 0x3E91642D7FF202FD + V04 = 4.41110311332675467403e-10 // 0x3DFE50183BD6D9EF + ) + // special cases + switch { + case x < 0 || IsNaN(x): + return NaN() + case IsInf(x, 1): + return 0 + case x == 0: + return Inf(-1) + } + + if x >= 2 { // |x| >= 2.0 + + // y0(x) = sqrt(2/(pi*x))*(p0(x)*sin(x0)+q0(x)*cos(x0)) + // where x0 = x-pi/4 + // Better formula: + // cos(x0) = cos(x)cos(pi/4)+sin(x)sin(pi/4) + // = 1/sqrt(2) * (sin(x) + cos(x)) + // sin(x0) = sin(x)cos(3pi/4)-cos(x)sin(3pi/4) + // = 1/sqrt(2) * (sin(x) - cos(x)) + // To avoid cancellation, use + // sin(x) +- cos(x) = -cos(2x)/(sin(x) -+ cos(x)) + // to compute the worse one. + + s, c := Sincos(x) + ss := s - c + cc := s + c + + // j0(x) = 1/sqrt(pi) * (P(0,x)*cc - Q(0,x)*ss) / sqrt(x) + // y0(x) = 1/sqrt(pi) * (P(0,x)*ss + Q(0,x)*cc) / sqrt(x) + + // make sure x+x does not overflow + if x < MaxFloat64/2 { + z := -Cos(x + x) + if s*c < 0 { + cc = z / ss + } else { + ss = z / cc + } + } + var z float64 + if x > Two129 { // |x| > ~6.8056e+38 + z = (1 / SqrtPi) * ss / Sqrt(x) + } else { + u := pzero(x) + v := qzero(x) + z = (1 / SqrtPi) * (u*ss + v*cc) / Sqrt(x) + } + return z // |x| >= 2.0 + } + if x <= TwoM27 { + return U00 + (2/Pi)*Log(x) // |x| < ~7.4506e-9 + } + z := x * x + u := U00 + z*(U01+z*(U02+z*(U03+z*(U04+z*(U05+z*U06))))) + v := 1 + z*(V01+z*(V02+z*(V03+z*V04))) + return u/v + (2/Pi)*J0(x)*Log(x) // ~7.4506e-9 < |x| < 2.0 +} + +// The asymptotic expansions of pzero is +// 1 - 9/128 s**2 + 11025/98304 s**4 - ..., where s = 1/x. +// For x >= 2, We approximate pzero by +// pzero(x) = 1 + (R/S) +// where R = pR0 + pR1*s**2 + pR2*s**4 + ... + pR5*s**10 +// S = 1 + pS0*s**2 + ... + pS4*s**10 +// and +// | pzero(x)-1-R/S | <= 2 ** ( -60.26) + +// for x in [inf, 8]=1/[0,0.125] +var p0R8 = [6]float64{ + 0.00000000000000000000e+00, // 0x0000000000000000 + -7.03124999999900357484e-02, // 0xBFB1FFFFFFFFFD32 + -8.08167041275349795626e+00, // 0xC02029D0B44FA779 + -2.57063105679704847262e+02, // 0xC07011027B19E863 + -2.48521641009428822144e+03, // 0xC0A36A6ECD4DCAFC + -5.25304380490729545272e+03, // 0xC0B4850B36CC643D +} +var p0S8 = [5]float64{ + 1.16534364619668181717e+02, // 0x405D223307A96751 + 3.83374475364121826715e+03, // 0x40ADF37D50596938 + 4.05978572648472545552e+04, // 0x40E3D2BB6EB6B05F + 1.16752972564375915681e+05, // 0x40FC810F8F9FA9BD + 4.76277284146730962675e+04, // 0x40E741774F2C49DC +} + +// for x in [8,4.5454]=1/[0.125,0.22001] +var p0R5 = [6]float64{ + -1.14125464691894502584e-11, // 0xBDA918B147E495CC + -7.03124940873599280078e-02, // 0xBFB1FFFFE69AFBC6 + -4.15961064470587782438e+00, // 0xC010A370F90C6BBF + -6.76747652265167261021e+01, // 0xC050EB2F5A7D1783 + -3.31231299649172967747e+02, // 0xC074B3B36742CC63 + -3.46433388365604912451e+02, // 0xC075A6EF28A38BD7 +} +var p0S5 = [5]float64{ + 6.07539382692300335975e+01, // 0x404E60810C98C5DE + 1.05125230595704579173e+03, // 0x40906D025C7E2864 + 5.97897094333855784498e+03, // 0x40B75AF88FBE1D60 + 9.62544514357774460223e+03, // 0x40C2CCB8FA76FA38 + 2.40605815922939109441e+03, // 0x40A2CC1DC70BE864 +} + +// for x in [4.547,2.8571]=1/[0.2199,0.35001] +var p0R3 = [6]float64{ + -2.54704601771951915620e-09, // 0xBE25E1036FE1AA86 + -7.03119616381481654654e-02, // 0xBFB1FFF6F7C0E24B + -2.40903221549529611423e+00, // 0xC00345B2AEA48074 + -2.19659774734883086467e+01, // 0xC035F74A4CB94E14 + -5.80791704701737572236e+01, // 0xC04D0A22420A1A45 + -3.14479470594888503854e+01, // 0xC03F72ACA892D80F +} +var p0S3 = [5]float64{ + 3.58560338055209726349e+01, // 0x4041ED9284077DD3 + 3.61513983050303863820e+02, // 0x40769839464A7C0E + 1.19360783792111533330e+03, // 0x4092A66E6D1061D6 + 1.12799679856907414432e+03, // 0x40919FFCB8C39B7E + 1.73580930813335754692e+02, // 0x4065B296FC379081 +} + +// for x in [2.8570,2]=1/[0.3499,0.5] +var p0R2 = [6]float64{ + -8.87534333032526411254e-08, // 0xBE77D316E927026D + -7.03030995483624743247e-02, // 0xBFB1FF62495E1E42 + -1.45073846780952986357e+00, // 0xBFF736398A24A843 + -7.63569613823527770791e+00, // 0xC01E8AF3EDAFA7F3 + -1.11931668860356747786e+01, // 0xC02662E6C5246303 + -3.23364579351335335033e+00, // 0xC009DE81AF8FE70F +} +var p0S2 = [5]float64{ + 2.22202997532088808441e+01, // 0x40363865908B5959 + 1.36206794218215208048e+02, // 0x4061069E0EE8878F + 2.70470278658083486789e+02, // 0x4070E78642EA079B + 1.53875394208320329881e+02, // 0x40633C033AB6FAFF + 1.46576176948256193810e+01, // 0x402D50B344391809 +} + +func pzero(x float64) float64 { + var p *[6]float64 + var q *[5]float64 + if x >= 8 { + p = &p0R8 + q = &p0S8 + } else if x >= 4.5454 { + p = &p0R5 + q = &p0S5 + } else if x >= 2.8571 { + p = &p0R3 + q = &p0S3 + } else if x >= 2 { + p = &p0R2 + q = &p0S2 + } + z := 1 / (x * x) + r := p[0] + z*(p[1]+z*(p[2]+z*(p[3]+z*(p[4]+z*p[5])))) + s := 1 + z*(q[0]+z*(q[1]+z*(q[2]+z*(q[3]+z*q[4])))) + return 1 + r/s +} + +// For x >= 8, the asymptotic expansions of qzero is +// -1/8 s + 75/1024 s**3 - ..., where s = 1/x. +// We approximate pzero by +// qzero(x) = s*(-1.25 + (R/S)) +// where R = qR0 + qR1*s**2 + qR2*s**4 + ... + qR5*s**10 +// S = 1 + qS0*s**2 + ... + qS5*s**12 +// and +// | qzero(x)/s +1.25-R/S | <= 2**(-61.22) + +// for x in [inf, 8]=1/[0,0.125] +var q0R8 = [6]float64{ + 0.00000000000000000000e+00, // 0x0000000000000000 + 7.32421874999935051953e-02, // 0x3FB2BFFFFFFFFE2C + 1.17682064682252693899e+01, // 0x402789525BB334D6 + 5.57673380256401856059e+02, // 0x40816D6315301825 + 8.85919720756468632317e+03, // 0x40C14D993E18F46D + 3.70146267776887834771e+04, // 0x40E212D40E901566 +} +var q0S8 = [6]float64{ + 1.63776026895689824414e+02, // 0x406478D5365B39BC + 8.09834494656449805916e+03, // 0x40BFA2584E6B0563 + 1.42538291419120476348e+05, // 0x4101665254D38C3F + 8.03309257119514397345e+05, // 0x412883DA83A52B43 + 8.40501579819060512818e+05, // 0x4129A66B28DE0B3D + -3.43899293537866615225e+05, // 0xC114FD6D2C9530C5 +} + +// for x in [8,4.5454]=1/[0.125,0.22001] +var q0R5 = [6]float64{ + 1.84085963594515531381e-11, // 0x3DB43D8F29CC8CD9 + 7.32421766612684765896e-02, // 0x3FB2BFFFD172B04C + 5.83563508962056953777e+00, // 0x401757B0B9953DD3 + 1.35111577286449829671e+02, // 0x4060E3920A8788E9 + 1.02724376596164097464e+03, // 0x40900CF99DC8C481 + 1.98997785864605384631e+03, // 0x409F17E953C6E3A6 +} +var q0S5 = [6]float64{ + 8.27766102236537761883e+01, // 0x4054B1B3FB5E1543 + 2.07781416421392987104e+03, // 0x40A03BA0DA21C0CE + 1.88472887785718085070e+04, // 0x40D267D27B591E6D + 5.67511122894947329769e+04, // 0x40EBB5E397E02372 + 3.59767538425114471465e+04, // 0x40E191181F7A54A0 + -5.35434275601944773371e+03, // 0xC0B4EA57BEDBC609 +} + +// for x in [4.547,2.8571]=1/[0.2199,0.35001] +var q0R3 = [6]float64{ + 4.37741014089738620906e-09, // 0x3E32CD036ADECB82 + 7.32411180042911447163e-02, // 0x3FB2BFEE0E8D0842 + 3.34423137516170720929e+00, // 0x400AC0FC61149CF5 + 4.26218440745412650017e+01, // 0x40454F98962DAEDD + 1.70808091340565596283e+02, // 0x406559DBE25EFD1F + 1.66733948696651168575e+02, // 0x4064D77C81FA21E0 +} +var q0S3 = [6]float64{ + 4.87588729724587182091e+01, // 0x40486122BFE343A6 + 7.09689221056606015736e+02, // 0x40862D8386544EB3 + 3.70414822620111362994e+03, // 0x40ACF04BE44DFC63 + 6.46042516752568917582e+03, // 0x40B93C6CD7C76A28 + 2.51633368920368957333e+03, // 0x40A3A8AAD94FB1C0 + -1.49247451836156386662e+02, // 0xC062A7EB201CF40F +} + +// for x in [2.8570,2]=1/[0.3499,0.5] +var q0R2 = [6]float64{ + 1.50444444886983272379e-07, // 0x3E84313B54F76BDB + 7.32234265963079278272e-02, // 0x3FB2BEC53E883E34 + 1.99819174093815998816e+00, // 0x3FFFF897E727779C + 1.44956029347885735348e+01, // 0x402CFDBFAAF96FE5 + 3.16662317504781540833e+01, // 0x403FAA8E29FBDC4A + 1.62527075710929267416e+01, // 0x403040B171814BB4 +} +var q0S2 = [6]float64{ + 3.03655848355219184498e+01, // 0x403E5D96F7C07AED + 2.69348118608049844624e+02, // 0x4070D591E4D14B40 + 8.44783757595320139444e+02, // 0x408A664522B3BF22 + 8.82935845112488550512e+02, // 0x408B977C9C5CC214 + 2.12666388511798828631e+02, // 0x406A95530E001365 + -5.31095493882666946917e+00, // 0xC0153E6AF8B32931 +} + +func qzero(x float64) float64 { + var p, q *[6]float64 + if x >= 8 { + p = &q0R8 + q = &q0S8 + } else if x >= 4.5454 { + p = &q0R5 + q = &q0S5 + } else if x >= 2.8571 { + p = &q0R3 + q = &q0S3 + } else if x >= 2 { + p = &q0R2 + q = &q0S2 + } + z := 1 / (x * x) + r := p[0] + z*(p[1]+z*(p[2]+z*(p[3]+z*(p[4]+z*p[5])))) + s := 1 + z*(q[0]+z*(q[1]+z*(q[2]+z*(q[3]+z*(q[4]+z*q[5]))))) + return (-0.125 + r/s) / x +} diff --git a/src/math/j1.go b/src/math/j1.go new file mode 100644 index 0000000..7c7d279 --- /dev/null +++ b/src/math/j1.go @@ -0,0 +1,422 @@ +// Copyright 2010 The Go Authors. All rights reserved. +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. + +package math + +/* + Bessel function of the first and second kinds of order one. +*/ + +// The original C code and the long comment below are +// from FreeBSD's /usr/src/lib/msun/src/e_j1.c and +// came with this notice. The go code is a simplified +// version of the original C. +// +// ==================================================== +// Copyright (C) 1993 by Sun Microsystems, Inc. All rights reserved. +// +// Developed at SunPro, a Sun Microsystems, Inc. business. +// Permission to use, copy, modify, and distribute this +// software is freely granted, provided that this notice +// is preserved. +// ==================================================== +// +// __ieee754_j1(x), __ieee754_y1(x) +// Bessel function of the first and second kinds of order one. +// Method -- j1(x): +// 1. For tiny x, we use j1(x) = x/2 - x**3/16 + x**5/384 - ... +// 2. Reduce x to |x| since j1(x)=-j1(-x), and +// for x in (0,2) +// j1(x) = x/2 + x*z*R0/S0, where z = x*x; +// (precision: |j1/x - 1/2 - R0/S0 |<2**-61.51 ) +// for x in (2,inf) +// j1(x) = sqrt(2/(pi*x))*(p1(x)*cos(x1)-q1(x)*sin(x1)) +// y1(x) = sqrt(2/(pi*x))*(p1(x)*sin(x1)+q1(x)*cos(x1)) +// where x1 = x-3*pi/4. It is better to compute sin(x1),cos(x1) +// as follow: +// cos(x1) = cos(x)cos(3pi/4)+sin(x)sin(3pi/4) +// = 1/sqrt(2) * (sin(x) - cos(x)) +// sin(x1) = sin(x)cos(3pi/4)-cos(x)sin(3pi/4) +// = -1/sqrt(2) * (sin(x) + cos(x)) +// (To avoid cancellation, use +// sin(x) +- cos(x) = -cos(2x)/(sin(x) -+ cos(x)) +// to compute the worse one.) +// +// 3 Special cases +// j1(nan)= nan +// j1(0) = 0 +// j1(inf) = 0 +// +// Method -- y1(x): +// 1. screen out x<=0 cases: y1(0)=-inf, y1(x<0)=NaN +// 2. For x<2. +// Since +// y1(x) = 2/pi*(j1(x)*(ln(x/2)+Euler)-1/x-x/2+5/64*x**3-...) +// therefore y1(x)-2/pi*j1(x)*ln(x)-1/x is an odd function. +// We use the following function to approximate y1, +// y1(x) = x*U(z)/V(z) + (2/pi)*(j1(x)*ln(x)-1/x), z= x**2 +// where for x in [0,2] (abs err less than 2**-65.89) +// U(z) = U0[0] + U0[1]*z + ... + U0[4]*z**4 +// V(z) = 1 + v0[0]*z + ... + v0[4]*z**5 +// Note: For tiny x, 1/x dominate y1 and hence +// y1(tiny) = -2/pi/tiny, (choose tiny<2**-54) +// 3. For x>=2. +// y1(x) = sqrt(2/(pi*x))*(p1(x)*sin(x1)+q1(x)*cos(x1)) +// where x1 = x-3*pi/4. It is better to compute sin(x1),cos(x1) +// by method mentioned above. + +// J1 returns the order-one Bessel function of the first kind. +// +// Special cases are: +// J1(±Inf) = 0 +// J1(NaN) = NaN +func J1(x float64) float64 { + const ( + TwoM27 = 1.0 / (1 << 27) // 2**-27 0x3e40000000000000 + Two129 = 1 << 129 // 2**129 0x4800000000000000 + // R0/S0 on [0, 2] + R00 = -6.25000000000000000000e-02 // 0xBFB0000000000000 + R01 = 1.40705666955189706048e-03 // 0x3F570D9F98472C61 + R02 = -1.59955631084035597520e-05 // 0xBEF0C5C6BA169668 + R03 = 4.96727999609584448412e-08 // 0x3E6AAAFA46CA0BD9 + S01 = 1.91537599538363460805e-02 // 0x3F939D0B12637E53 + S02 = 1.85946785588630915560e-04 // 0x3F285F56B9CDF664 + S03 = 1.17718464042623683263e-06 // 0x3EB3BFF8333F8498 + S04 = 5.04636257076217042715e-09 // 0x3E35AC88C97DFF2C + S05 = 1.23542274426137913908e-11 // 0x3DAB2ACFCFB97ED8 + ) + // special cases + switch { + case IsNaN(x): + return x + case IsInf(x, 0) || x == 0: + return 0 + } + + sign := false + if x < 0 { + x = -x + sign = true + } + if x >= 2 { + s, c := Sincos(x) + ss := -s - c + cc := s - c + + // make sure x+x does not overflow + if x < MaxFloat64/2 { + z := Cos(x + x) + if s*c > 0 { + cc = z / ss + } else { + ss = z / cc + } + } + + // j1(x) = 1/sqrt(pi) * (P(1,x)*cc - Q(1,x)*ss) / sqrt(x) + // y1(x) = 1/sqrt(pi) * (P(1,x)*ss + Q(1,x)*cc) / sqrt(x) + + var z float64 + if x > Two129 { + z = (1 / SqrtPi) * cc / Sqrt(x) + } else { + u := pone(x) + v := qone(x) + z = (1 / SqrtPi) * (u*cc - v*ss) / Sqrt(x) + } + if sign { + return -z + } + return z + } + if x < TwoM27 { // |x|<2**-27 + return 0.5 * x // inexact if x!=0 necessary + } + z := x * x + r := z * (R00 + z*(R01+z*(R02+z*R03))) + s := 1.0 + z*(S01+z*(S02+z*(S03+z*(S04+z*S05)))) + r *= x + z = 0.5*x + r/s + if sign { + return -z + } + return z +} + +// Y1 returns the order-one Bessel function of the second kind. +// +// Special cases are: +// Y1(+Inf) = 0 +// Y1(0) = -Inf +// Y1(x < 0) = NaN +// Y1(NaN) = NaN +func Y1(x float64) float64 { + const ( + TwoM54 = 1.0 / (1 << 54) // 2**-54 0x3c90000000000000 + Two129 = 1 << 129 // 2**129 0x4800000000000000 + U00 = -1.96057090646238940668e-01 // 0xBFC91866143CBC8A + U01 = 5.04438716639811282616e-02 // 0x3FA9D3C776292CD1 + U02 = -1.91256895875763547298e-03 // 0xBF5F55E54844F50F + U03 = 2.35252600561610495928e-05 // 0x3EF8AB038FA6B88E + U04 = -9.19099158039878874504e-08 // 0xBE78AC00569105B8 + V00 = 1.99167318236649903973e-02 // 0x3F94650D3F4DA9F0 + V01 = 2.02552581025135171496e-04 // 0x3F2A8C896C257764 + V02 = 1.35608801097516229404e-06 // 0x3EB6C05A894E8CA6 + V03 = 6.22741452364621501295e-09 // 0x3E3ABF1D5BA69A86 + V04 = 1.66559246207992079114e-11 // 0x3DB25039DACA772A + ) + // special cases + switch { + case x < 0 || IsNaN(x): + return NaN() + case IsInf(x, 1): + return 0 + case x == 0: + return Inf(-1) + } + + if x >= 2 { + s, c := Sincos(x) + ss := -s - c + cc := s - c + + // make sure x+x does not overflow + if x < MaxFloat64/2 { + z := Cos(x + x) + if s*c > 0 { + cc = z / ss + } else { + ss = z / cc + } + } + // y1(x) = sqrt(2/(pi*x))*(p1(x)*sin(x0)+q1(x)*cos(x0)) + // where x0 = x-3pi/4 + // Better formula: + // cos(x0) = cos(x)cos(3pi/4)+sin(x)sin(3pi/4) + // = 1/sqrt(2) * (sin(x) - cos(x)) + // sin(x0) = sin(x)cos(3pi/4)-cos(x)sin(3pi/4) + // = -1/sqrt(2) * (cos(x) + sin(x)) + // To avoid cancellation, use + // sin(x) +- cos(x) = -cos(2x)/(sin(x) -+ cos(x)) + // to compute the worse one. + + var z float64 + if x > Two129 { + z = (1 / SqrtPi) * ss / Sqrt(x) + } else { + u := pone(x) + v := qone(x) + z = (1 / SqrtPi) * (u*ss + v*cc) / Sqrt(x) + } + return z + } + if x <= TwoM54 { // x < 2**-54 + return -(2 / Pi) / x + } + z := x * x + u := U00 + z*(U01+z*(U02+z*(U03+z*U04))) + v := 1 + z*(V00+z*(V01+z*(V02+z*(V03+z*V04)))) + return x*(u/v) + (2/Pi)*(J1(x)*Log(x)-1/x) +} + +// For x >= 8, the asymptotic expansions of pone is +// 1 + 15/128 s**2 - 4725/2**15 s**4 - ..., where s = 1/x. +// We approximate pone by +// pone(x) = 1 + (R/S) +// where R = pr0 + pr1*s**2 + pr2*s**4 + ... + pr5*s**10 +// S = 1 + ps0*s**2 + ... + ps4*s**10 +// and +// | pone(x)-1-R/S | <= 2**(-60.06) + +// for x in [inf, 8]=1/[0,0.125] +var p1R8 = [6]float64{ + 0.00000000000000000000e+00, // 0x0000000000000000 + 1.17187499999988647970e-01, // 0x3FBDFFFFFFFFFCCE + 1.32394806593073575129e+01, // 0x402A7A9D357F7FCE + 4.12051854307378562225e+02, // 0x4079C0D4652EA590 + 3.87474538913960532227e+03, // 0x40AE457DA3A532CC + 7.91447954031891731574e+03, // 0x40BEEA7AC32782DD +} +var p1S8 = [5]float64{ + 1.14207370375678408436e+02, // 0x405C8D458E656CAC + 3.65093083420853463394e+03, // 0x40AC85DC964D274F + 3.69562060269033463555e+04, // 0x40E20B8697C5BB7F + 9.76027935934950801311e+04, // 0x40F7D42CB28F17BB + 3.08042720627888811578e+04, // 0x40DE1511697A0B2D +} + +// for x in [8,4.5454] = 1/[0.125,0.22001] +var p1R5 = [6]float64{ + 1.31990519556243522749e-11, // 0x3DAD0667DAE1CA7D + 1.17187493190614097638e-01, // 0x3FBDFFFFE2C10043 + 6.80275127868432871736e+00, // 0x401B36046E6315E3 + 1.08308182990189109773e+02, // 0x405B13B9452602ED + 5.17636139533199752805e+02, // 0x40802D16D052D649 + 5.28715201363337541807e+02, // 0x408085B8BB7E0CB7 +} +var p1S5 = [5]float64{ + 5.92805987221131331921e+01, // 0x404DA3EAA8AF633D + 9.91401418733614377743e+02, // 0x408EFB361B066701 + 5.35326695291487976647e+03, // 0x40B4E9445706B6FB + 7.84469031749551231769e+03, // 0x40BEA4B0B8A5BB15 + 1.50404688810361062679e+03, // 0x40978030036F5E51 +} + +// for x in[4.5453,2.8571] = 1/[0.2199,0.35001] +var p1R3 = [6]float64{ + 3.02503916137373618024e-09, // 0x3E29FC21A7AD9EDD + 1.17186865567253592491e-01, // 0x3FBDFFF55B21D17B + 3.93297750033315640650e+00, // 0x400F76BCE85EAD8A + 3.51194035591636932736e+01, // 0x40418F489DA6D129 + 9.10550110750781271918e+01, // 0x4056C3854D2C1837 + 4.85590685197364919645e+01, // 0x4048478F8EA83EE5 +} +var p1S3 = [5]float64{ + 3.47913095001251519989e+01, // 0x40416549A134069C + 3.36762458747825746741e+02, // 0x40750C3307F1A75F + 1.04687139975775130551e+03, // 0x40905B7C5037D523 + 8.90811346398256432622e+02, // 0x408BD67DA32E31E9 + 1.03787932439639277504e+02, // 0x4059F26D7C2EED53 +} + +// for x in [2.8570,2] = 1/[0.3499,0.5] +var p1R2 = [6]float64{ + 1.07710830106873743082e-07, // 0x3E7CE9D4F65544F4 + 1.17176219462683348094e-01, // 0x3FBDFF42BE760D83 + 2.36851496667608785174e+00, // 0x4002F2B7F98FAEC0 + 1.22426109148261232917e+01, // 0x40287C377F71A964 + 1.76939711271687727390e+01, // 0x4031B1A8177F8EE2 + 5.07352312588818499250e+00, // 0x40144B49A574C1FE +} +var p1S2 = [5]float64{ + 2.14364859363821409488e+01, // 0x40356FBD8AD5ECDC + 1.25290227168402751090e+02, // 0x405F529314F92CD5 + 2.32276469057162813669e+02, // 0x406D08D8D5A2DBD9 + 1.17679373287147100768e+02, // 0x405D6B7ADA1884A9 + 8.36463893371618283368e+00, // 0x4020BAB1F44E5192 +} + +func pone(x float64) float64 { + var p *[6]float64 + var q *[5]float64 + if x >= 8 { + p = &p1R8 + q = &p1S8 + } else if x >= 4.5454 { + p = &p1R5 + q = &p1S5 + } else if x >= 2.8571 { + p = &p1R3 + q = &p1S3 + } else if x >= 2 { + p = &p1R2 + q = &p1S2 + } + z := 1 / (x * x) + r := p[0] + z*(p[1]+z*(p[2]+z*(p[3]+z*(p[4]+z*p[5])))) + s := 1.0 + z*(q[0]+z*(q[1]+z*(q[2]+z*(q[3]+z*q[4])))) + return 1 + r/s +} + +// For x >= 8, the asymptotic expansions of qone is +// 3/8 s - 105/1024 s**3 - ..., where s = 1/x. +// We approximate qone by +// qone(x) = s*(0.375 + (R/S)) +// where R = qr1*s**2 + qr2*s**4 + ... + qr5*s**10 +// S = 1 + qs1*s**2 + ... + qs6*s**12 +// and +// | qone(x)/s -0.375-R/S | <= 2**(-61.13) + +// for x in [inf, 8] = 1/[0,0.125] +var q1R8 = [6]float64{ + 0.00000000000000000000e+00, // 0x0000000000000000 + -1.02539062499992714161e-01, // 0xBFBA3FFFFFFFFDF3 + -1.62717534544589987888e+01, // 0xC0304591A26779F7 + -7.59601722513950107896e+02, // 0xC087BCD053E4B576 + -1.18498066702429587167e+04, // 0xC0C724E740F87415 + -4.84385124285750353010e+04, // 0xC0E7A6D065D09C6A +} +var q1S8 = [6]float64{ + 1.61395369700722909556e+02, // 0x40642CA6DE5BCDE5 + 7.82538599923348465381e+03, // 0x40BE9162D0D88419 + 1.33875336287249578163e+05, // 0x4100579AB0B75E98 + 7.19657723683240939863e+05, // 0x4125F65372869C19 + 6.66601232617776375264e+05, // 0x412457D27719AD5C + -2.94490264303834643215e+05, // 0xC111F9690EA5AA18 +} + +// for x in [8,4.5454] = 1/[0.125,0.22001] +var q1R5 = [6]float64{ + -2.08979931141764104297e-11, // 0xBDB6FA431AA1A098 + -1.02539050241375426231e-01, // 0xBFBA3FFFCB597FEF + -8.05644828123936029840e+00, // 0xC0201CE6CA03AD4B + -1.83669607474888380239e+02, // 0xC066F56D6CA7B9B0 + -1.37319376065508163265e+03, // 0xC09574C66931734F + -2.61244440453215656817e+03, // 0xC0A468E388FDA79D +} +var q1S5 = [6]float64{ + 8.12765501384335777857e+01, // 0x405451B2FF5A11B2 + 1.99179873460485964642e+03, // 0x409F1F31E77BF839 + 1.74684851924908907677e+04, // 0x40D10F1F0D64CE29 + 4.98514270910352279316e+04, // 0x40E8576DAABAD197 + 2.79480751638918118260e+04, // 0x40DB4B04CF7C364B + -4.71918354795128470869e+03, // 0xC0B26F2EFCFFA004 +} + +// for x in [4.5454,2.8571] = 1/[0.2199,0.35001] ??? +var q1R3 = [6]float64{ + -5.07831226461766561369e-09, // 0xBE35CFA9D38FC84F + -1.02537829820837089745e-01, // 0xBFBA3FEB51AEED54 + -4.61011581139473403113e+00, // 0xC01270C23302D9FF + -5.78472216562783643212e+01, // 0xC04CEC71C25D16DA + -2.28244540737631695038e+02, // 0xC06C87D34718D55F + -2.19210128478909325622e+02, // 0xC06B66B95F5C1BF6 +} +var q1S3 = [6]float64{ + 4.76651550323729509273e+01, // 0x4047D523CCD367E4 + 6.73865112676699709482e+02, // 0x40850EEBC031EE3E + 3.38015286679526343505e+03, // 0x40AA684E448E7C9A + 5.54772909720722782367e+03, // 0x40B5ABBAA61D54A6 + 1.90311919338810798763e+03, // 0x409DBC7A0DD4DF4B + -1.35201191444307340817e+02, // 0xC060E670290A311F +} + +// for x in [2.8570,2] = 1/[0.3499,0.5] +var q1R2 = [6]float64{ + -1.78381727510958865572e-07, // 0xBE87F12644C626D2 + -1.02517042607985553460e-01, // 0xBFBA3E8E9148B010 + -2.75220568278187460720e+00, // 0xC006048469BB4EDA + -1.96636162643703720221e+01, // 0xC033A9E2C168907F + -4.23253133372830490089e+01, // 0xC04529A3DE104AAA + -2.13719211703704061733e+01, // 0xC0355F3639CF6E52 +} +var q1S2 = [6]float64{ + 2.95333629060523854548e+01, // 0x403D888A78AE64FF + 2.52981549982190529136e+02, // 0x406F9F68DB821CBA + 7.57502834868645436472e+02, // 0x4087AC05CE49A0F7 + 7.39393205320467245656e+02, // 0x40871B2548D4C029 + 1.55949003336666123687e+02, // 0x40637E5E3C3ED8D4 + -4.95949898822628210127e+00, // 0xC013D686E71BE86B +} + +func qone(x float64) float64 { + var p, q *[6]float64 + if x >= 8 { + p = &q1R8 + q = &q1S8 + } else if x >= 4.5454 { + p = &q1R5 + q = &q1S5 + } else if x >= 2.8571 { + p = &q1R3 + q = &q1S3 + } else if x >= 2 { + p = &q1R2 + q = &q1S2 + } + z := 1 / (x * x) + r := p[0] + z*(p[1]+z*(p[2]+z*(p[3]+z*(p[4]+z*p[5])))) + s := 1 + z*(q[0]+z*(q[1]+z*(q[2]+z*(q[3]+z*(q[4]+z*q[5]))))) + return (0.375 + r/s) / x +} diff --git a/src/math/jn.go b/src/math/jn.go new file mode 100644 index 0000000..b1aca8f --- /dev/null +++ b/src/math/jn.go @@ -0,0 +1,304 @@ +// Copyright 2010 The Go Authors. All rights reserved. +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. + +package math + +/* + Bessel function of the first and second kinds of order n. +*/ + +// The original C code and the long comment below are +// from FreeBSD's /usr/src/lib/msun/src/e_jn.c and +// came with this notice. The go code is a simplified +// version of the original C. +// +// ==================================================== +// Copyright (C) 1993 by Sun Microsystems, Inc. All rights reserved. +// +// Developed at SunPro, a Sun Microsystems, Inc. business. +// Permission to use, copy, modify, and distribute this +// software is freely granted, provided that this notice +// is preserved. +// ==================================================== +// +// __ieee754_jn(n, x), __ieee754_yn(n, x) +// floating point Bessel's function of the 1st and 2nd kind +// of order n +// +// Special cases: +// y0(0)=y1(0)=yn(n,0) = -inf with division by zero signal; +// y0(-ve)=y1(-ve)=yn(n,-ve) are NaN with invalid signal. +// Note 2. About jn(n,x), yn(n,x) +// For n=0, j0(x) is called, +// for n=1, j1(x) is called, +// for n<x, forward recursion is used starting +// from values of j0(x) and j1(x). +// for n>x, a continued fraction approximation to +// j(n,x)/j(n-1,x) is evaluated and then backward +// recursion is used starting from a supposed value +// for j(n,x). The resulting value of j(0,x) is +// compared with the actual value to correct the +// supposed value of j(n,x). +// +// yn(n,x) is similar in all respects, except +// that forward recursion is used for all +// values of n>1. + +// Jn returns the order-n Bessel function of the first kind. +// +// Special cases are: +// Jn(n, ±Inf) = 0 +// Jn(n, NaN) = NaN +func Jn(n int, x float64) float64 { + const ( + TwoM29 = 1.0 / (1 << 29) // 2**-29 0x3e10000000000000 + Two302 = 1 << 302 // 2**302 0x52D0000000000000 + ) + // special cases + switch { + case IsNaN(x): + return x + case IsInf(x, 0): + return 0 + } + // J(-n, x) = (-1)**n * J(n, x), J(n, -x) = (-1)**n * J(n, x) + // Thus, J(-n, x) = J(n, -x) + + if n == 0 { + return J0(x) + } + if x == 0 { + return 0 + } + if n < 0 { + n, x = -n, -x + } + if n == 1 { + return J1(x) + } + sign := false + if x < 0 { + x = -x + if n&1 == 1 { + sign = true // odd n and negative x + } + } + var b float64 + if float64(n) <= x { + // Safe to use J(n+1,x)=2n/x *J(n,x)-J(n-1,x) + if x >= Two302 { // x > 2**302 + + // (x >> n**2) + // Jn(x) = cos(x-(2n+1)*pi/4)*sqrt(2/x*pi) + // Yn(x) = sin(x-(2n+1)*pi/4)*sqrt(2/x*pi) + // Let s=sin(x), c=cos(x), + // xn=x-(2n+1)*pi/4, sqt2 = sqrt(2),then + // + // n sin(xn)*sqt2 cos(xn)*sqt2 + // ---------------------------------- + // 0 s-c c+s + // 1 -s-c -c+s + // 2 -s+c -c-s + // 3 s+c c-s + + var temp float64 + switch s, c := Sincos(x); n & 3 { + case 0: + temp = c + s + case 1: + temp = -c + s + case 2: + temp = -c - s + case 3: + temp = c - s + } + b = (1 / SqrtPi) * temp / Sqrt(x) + } else { + b = J1(x) + for i, a := 1, J0(x); i < n; i++ { + a, b = b, b*(float64(i+i)/x)-a // avoid underflow + } + } + } else { + if x < TwoM29 { // x < 2**-29 + // x is tiny, return the first Taylor expansion of J(n,x) + // J(n,x) = 1/n!*(x/2)**n - ... + + if n > 33 { // underflow + b = 0 + } else { + temp := x * 0.5 + b = temp + a := 1.0 + for i := 2; i <= n; i++ { + a *= float64(i) // a = n! + b *= temp // b = (x/2)**n + } + b /= a + } + } else { + // use backward recurrence + // x x**2 x**2 + // J(n,x)/J(n-1,x) = ---- ------ ------ ..... + // 2n - 2(n+1) - 2(n+2) + // + // 1 1 1 + // (for large x) = ---- ------ ------ ..... + // 2n 2(n+1) 2(n+2) + // -- - ------ - ------ - + // x x x + // + // Let w = 2n/x and h=2/x, then the above quotient + // is equal to the continued fraction: + // 1 + // = ----------------------- + // 1 + // w - ----------------- + // 1 + // w+h - --------- + // w+2h - ... + // + // To determine how many terms needed, let + // Q(0) = w, Q(1) = w(w+h) - 1, + // Q(k) = (w+k*h)*Q(k-1) - Q(k-2), + // When Q(k) > 1e4 good for single + // When Q(k) > 1e9 good for double + // When Q(k) > 1e17 good for quadruple + + // determine k + w := float64(n+n) / x + h := 2 / x + q0 := w + z := w + h + q1 := w*z - 1 + k := 1 + for q1 < 1e9 { + k++ + z += h + q0, q1 = q1, z*q1-q0 + } + m := n + n + t := 0.0 + for i := 2 * (n + k); i >= m; i -= 2 { + t = 1 / (float64(i)/x - t) + } + a := t + b = 1 + // estimate log((2/x)**n*n!) = n*log(2/x)+n*ln(n) + // Hence, if n*(log(2n/x)) > ... + // single 8.8722839355e+01 + // double 7.09782712893383973096e+02 + // long double 1.1356523406294143949491931077970765006170e+04 + // then recurrent value may overflow and the result is + // likely underflow to zero + + tmp := float64(n) + v := 2 / x + tmp = tmp * Log(Abs(v*tmp)) + if tmp < 7.09782712893383973096e+02 { + for i := n - 1; i > 0; i-- { + di := float64(i + i) + a, b = b, b*di/x-a + } + } else { + for i := n - 1; i > 0; i-- { + di := float64(i + i) + a, b = b, b*di/x-a + // scale b to avoid spurious overflow + if b > 1e100 { + a /= b + t /= b + b = 1 + } + } + } + b = t * J0(x) / b + } + } + if sign { + return -b + } + return b +} + +// Yn returns the order-n Bessel function of the second kind. +// +// Special cases are: +// Yn(n, +Inf) = 0 +// Yn(n ≥ 0, 0) = -Inf +// Yn(n < 0, 0) = +Inf if n is odd, -Inf if n is even +// Yn(n, x < 0) = NaN +// Yn(n, NaN) = NaN +func Yn(n int, x float64) float64 { + const Two302 = 1 << 302 // 2**302 0x52D0000000000000 + // special cases + switch { + case x < 0 || IsNaN(x): + return NaN() + case IsInf(x, 1): + return 0 + } + + if n == 0 { + return Y0(x) + } + if x == 0 { + if n < 0 && n&1 == 1 { + return Inf(1) + } + return Inf(-1) + } + sign := false + if n < 0 { + n = -n + if n&1 == 1 { + sign = true // sign true if n < 0 && |n| odd + } + } + if n == 1 { + if sign { + return -Y1(x) + } + return Y1(x) + } + var b float64 + if x >= Two302 { // x > 2**302 + // (x >> n**2) + // Jn(x) = cos(x-(2n+1)*pi/4)*sqrt(2/x*pi) + // Yn(x) = sin(x-(2n+1)*pi/4)*sqrt(2/x*pi) + // Let s=sin(x), c=cos(x), + // xn=x-(2n+1)*pi/4, sqt2 = sqrt(2),then + // + // n sin(xn)*sqt2 cos(xn)*sqt2 + // ---------------------------------- + // 0 s-c c+s + // 1 -s-c -c+s + // 2 -s+c -c-s + // 3 s+c c-s + + var temp float64 + switch s, c := Sincos(x); n & 3 { + case 0: + temp = s - c + case 1: + temp = -s - c + case 2: + temp = -s + c + case 3: + temp = s + c + } + b = (1 / SqrtPi) * temp / Sqrt(x) + } else { + a := Y0(x) + b = Y1(x) + // quit if b is -inf + for i := 1; i < n && !IsInf(b, -1); i++ { + a, b = b, (float64(i+i)/x)*b-a + } + } + if sign { + return -b + } + return b +} diff --git a/src/math/ldexp.go b/src/math/ldexp.go new file mode 100644 index 0000000..55c82f1 --- /dev/null +++ b/src/math/ldexp.go @@ -0,0 +1,50 @@ +// Copyright 2009 The Go Authors. All rights reserved. +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. + +package math + +// Ldexp is the inverse of Frexp. +// It returns frac × 2**exp. +// +// Special cases are: +// Ldexp(±0, exp) = ±0 +// Ldexp(±Inf, exp) = ±Inf +// Ldexp(NaN, exp) = NaN +func Ldexp(frac float64, exp int) float64 { + if haveArchLdexp { + return archLdexp(frac, exp) + } + return ldexp(frac, exp) +} + +func ldexp(frac float64, exp int) float64 { + // special cases + switch { + case frac == 0: + return frac // correctly return -0 + case IsInf(frac, 0) || IsNaN(frac): + return frac + } + frac, e := normalize(frac) + exp += e + x := Float64bits(frac) + exp += int(x>>shift)&mask - bias + if exp < -1075 { + return Copysign(0, frac) // underflow + } + if exp > 1023 { // overflow + if frac < 0 { + return Inf(-1) + } + return Inf(1) + } + var m float64 = 1 + if exp < -1022 { // denormal + exp += 53 + m = 1.0 / (1 << 53) // 2**-53 + } + x &^= mask << shift + x |= uint64(exp+bias) << shift + return m * Float64frombits(x) +} diff --git a/src/math/lgamma.go b/src/math/lgamma.go new file mode 100644 index 0000000..7af5871 --- /dev/null +++ b/src/math/lgamma.go @@ -0,0 +1,365 @@ +// Copyright 2010 The Go Authors. All rights reserved. +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. + +package math + +/* + Floating-point logarithm of the Gamma function. +*/ + +// The original C code and the long comment below are +// from FreeBSD's /usr/src/lib/msun/src/e_lgamma_r.c and +// came with this notice. The go code is a simplified +// version of the original C. +// +// ==================================================== +// Copyright (C) 1993 by Sun Microsystems, Inc. All rights reserved. +// +// Developed at SunPro, a Sun Microsystems, Inc. business. +// Permission to use, copy, modify, and distribute this +// software is freely granted, provided that this notice +// is preserved. +// ==================================================== +// +// __ieee754_lgamma_r(x, signgamp) +// Reentrant version of the logarithm of the Gamma function +// with user provided pointer for the sign of Gamma(x). +// +// Method: +// 1. Argument Reduction for 0 < x <= 8 +// Since gamma(1+s)=s*gamma(s), for x in [0,8], we may +// reduce x to a number in [1.5,2.5] by +// lgamma(1+s) = log(s) + lgamma(s) +// for example, +// lgamma(7.3) = log(6.3) + lgamma(6.3) +// = log(6.3*5.3) + lgamma(5.3) +// = log(6.3*5.3*4.3*3.3*2.3) + lgamma(2.3) +// 2. Polynomial approximation of lgamma around its +// minimum (ymin=1.461632144968362245) to maintain monotonicity. +// On [ymin-0.23, ymin+0.27] (i.e., [1.23164,1.73163]), use +// Let z = x-ymin; +// lgamma(x) = -1.214862905358496078218 + z**2*poly(z) +// poly(z) is a 14 degree polynomial. +// 2. Rational approximation in the primary interval [2,3] +// We use the following approximation: +// s = x-2.0; +// lgamma(x) = 0.5*s + s*P(s)/Q(s) +// with accuracy +// |P/Q - (lgamma(x)-0.5s)| < 2**-61.71 +// Our algorithms are based on the following observation +// +// zeta(2)-1 2 zeta(3)-1 3 +// lgamma(2+s) = s*(1-Euler) + --------- * s - --------- * s + ... +// 2 3 +// +// where Euler = 0.5772156649... is the Euler constant, which +// is very close to 0.5. +// +// 3. For x>=8, we have +// lgamma(x)~(x-0.5)log(x)-x+0.5*log(2pi)+1/(12x)-1/(360x**3)+.... +// (better formula: +// lgamma(x)~(x-0.5)*(log(x)-1)-.5*(log(2pi)-1) + ...) +// Let z = 1/x, then we approximation +// f(z) = lgamma(x) - (x-0.5)(log(x)-1) +// by +// 3 5 11 +// w = w0 + w1*z + w2*z + w3*z + ... + w6*z +// where +// |w - f(z)| < 2**-58.74 +// +// 4. For negative x, since (G is gamma function) +// -x*G(-x)*G(x) = pi/sin(pi*x), +// we have +// G(x) = pi/(sin(pi*x)*(-x)*G(-x)) +// since G(-x) is positive, sign(G(x)) = sign(sin(pi*x)) for x<0 +// Hence, for x<0, signgam = sign(sin(pi*x)) and +// lgamma(x) = log(|Gamma(x)|) +// = log(pi/(|x*sin(pi*x)|)) - lgamma(-x); +// Note: one should avoid computing pi*(-x) directly in the +// computation of sin(pi*(-x)). +// +// 5. Special Cases +// lgamma(2+s) ~ s*(1-Euler) for tiny s +// lgamma(1)=lgamma(2)=0 +// lgamma(x) ~ -log(x) for tiny x +// lgamma(0) = lgamma(inf) = inf +// lgamma(-integer) = +-inf +// +// + +var _lgamA = [...]float64{ + 7.72156649015328655494e-02, // 0x3FB3C467E37DB0C8 + 3.22467033424113591611e-01, // 0x3FD4A34CC4A60FAD + 6.73523010531292681824e-02, // 0x3FB13E001A5562A7 + 2.05808084325167332806e-02, // 0x3F951322AC92547B + 7.38555086081402883957e-03, // 0x3F7E404FB68FEFE8 + 2.89051383673415629091e-03, // 0x3F67ADD8CCB7926B + 1.19270763183362067845e-03, // 0x3F538A94116F3F5D + 5.10069792153511336608e-04, // 0x3F40B6C689B99C00 + 2.20862790713908385557e-04, // 0x3F2CF2ECED10E54D + 1.08011567247583939954e-04, // 0x3F1C5088987DFB07 + 2.52144565451257326939e-05, // 0x3EFA7074428CFA52 + 4.48640949618915160150e-05, // 0x3F07858E90A45837 +} +var _lgamR = [...]float64{ + 1.0, // placeholder + 1.39200533467621045958e+00, // 0x3FF645A762C4AB74 + 7.21935547567138069525e-01, // 0x3FE71A1893D3DCDC + 1.71933865632803078993e-01, // 0x3FC601EDCCFBDF27 + 1.86459191715652901344e-02, // 0x3F9317EA742ED475 + 7.77942496381893596434e-04, // 0x3F497DDACA41A95B + 7.32668430744625636189e-06, // 0x3EDEBAF7A5B38140 +} +var _lgamS = [...]float64{ + -7.72156649015328655494e-02, // 0xBFB3C467E37DB0C8 + 2.14982415960608852501e-01, // 0x3FCB848B36E20878 + 3.25778796408930981787e-01, // 0x3FD4D98F4F139F59 + 1.46350472652464452805e-01, // 0x3FC2BB9CBEE5F2F7 + 2.66422703033638609560e-02, // 0x3F9B481C7E939961 + 1.84028451407337715652e-03, // 0x3F5E26B67368F239 + 3.19475326584100867617e-05, // 0x3F00BFECDD17E945 +} +var _lgamT = [...]float64{ + 4.83836122723810047042e-01, // 0x3FDEF72BC8EE38A2 + -1.47587722994593911752e-01, // 0xBFC2E4278DC6C509 + 6.46249402391333854778e-02, // 0x3FB08B4294D5419B + -3.27885410759859649565e-02, // 0xBFA0C9A8DF35B713 + 1.79706750811820387126e-02, // 0x3F9266E7970AF9EC + -1.03142241298341437450e-02, // 0xBF851F9FBA91EC6A + 6.10053870246291332635e-03, // 0x3F78FCE0E370E344 + -3.68452016781138256760e-03, // 0xBF6E2EFFB3E914D7 + 2.25964780900612472250e-03, // 0x3F6282D32E15C915 + -1.40346469989232843813e-03, // 0xBF56FE8EBF2D1AF1 + 8.81081882437654011382e-04, // 0x3F4CDF0CEF61A8E9 + -5.38595305356740546715e-04, // 0xBF41A6109C73E0EC + 3.15632070903625950361e-04, // 0x3F34AF6D6C0EBBF7 + -3.12754168375120860518e-04, // 0xBF347F24ECC38C38 + 3.35529192635519073543e-04, // 0x3F35FD3EE8C2D3F4 +} +var _lgamU = [...]float64{ + -7.72156649015328655494e-02, // 0xBFB3C467E37DB0C8 + 6.32827064025093366517e-01, // 0x3FE4401E8B005DFF + 1.45492250137234768737e+00, // 0x3FF7475CD119BD6F + 9.77717527963372745603e-01, // 0x3FEF497644EA8450 + 2.28963728064692451092e-01, // 0x3FCD4EAEF6010924 + 1.33810918536787660377e-02, // 0x3F8B678BBF2BAB09 +} +var _lgamV = [...]float64{ + 1.0, + 2.45597793713041134822e+00, // 0x4003A5D7C2BD619C + 2.12848976379893395361e+00, // 0x40010725A42B18F5 + 7.69285150456672783825e-01, // 0x3FE89DFBE45050AF + 1.04222645593369134254e-01, // 0x3FBAAE55D6537C88 + 3.21709242282423911810e-03, // 0x3F6A5ABB57D0CF61 +} +var _lgamW = [...]float64{ + 4.18938533204672725052e-01, // 0x3FDACFE390C97D69 + 8.33333333333329678849e-02, // 0x3FB555555555553B + -2.77777777728775536470e-03, // 0xBF66C16C16B02E5C + 7.93650558643019558500e-04, // 0x3F4A019F98CF38B6 + -5.95187557450339963135e-04, // 0xBF4380CB8C0FE741 + 8.36339918996282139126e-04, // 0x3F4B67BA4CDAD5D1 + -1.63092934096575273989e-03, // 0xBF5AB89D0B9E43E4 +} + +// Lgamma returns the natural logarithm and sign (-1 or +1) of Gamma(x). +// +// Special cases are: +// Lgamma(+Inf) = +Inf +// Lgamma(0) = +Inf +// Lgamma(-integer) = +Inf +// Lgamma(-Inf) = -Inf +// Lgamma(NaN) = NaN +func Lgamma(x float64) (lgamma float64, sign int) { + const ( + Ymin = 1.461632144968362245 + Two52 = 1 << 52 // 0x4330000000000000 ~4.5036e+15 + Two53 = 1 << 53 // 0x4340000000000000 ~9.0072e+15 + Two58 = 1 << 58 // 0x4390000000000000 ~2.8823e+17 + Tiny = 1.0 / (1 << 70) // 0x3b90000000000000 ~8.47033e-22 + Tc = 1.46163214496836224576e+00 // 0x3FF762D86356BE3F + Tf = -1.21486290535849611461e-01 // 0xBFBF19B9BCC38A42 + // Tt = -(tail of Tf) + Tt = -3.63867699703950536541e-18 // 0xBC50C7CAA48A971F + ) + // special cases + sign = 1 + switch { + case IsNaN(x): + lgamma = x + return + case IsInf(x, 0): + lgamma = x + return + case x == 0: + lgamma = Inf(1) + return + } + + neg := false + if x < 0 { + x = -x + neg = true + } + + if x < Tiny { // if |x| < 2**-70, return -log(|x|) + if neg { + sign = -1 + } + lgamma = -Log(x) + return + } + var nadj float64 + if neg { + if x >= Two52 { // |x| >= 2**52, must be -integer + lgamma = Inf(1) + return + } + t := sinPi(x) + if t == 0 { + lgamma = Inf(1) // -integer + return + } + nadj = Log(Pi / Abs(t*x)) + if t < 0 { + sign = -1 + } + } + + switch { + case x == 1 || x == 2: // purge off 1 and 2 + lgamma = 0 + return + case x < 2: // use lgamma(x) = lgamma(x+1) - log(x) + var y float64 + var i int + if x <= 0.9 { + lgamma = -Log(x) + switch { + case x >= (Ymin - 1 + 0.27): // 0.7316 <= x <= 0.9 + y = 1 - x + i = 0 + case x >= (Ymin - 1 - 0.27): // 0.2316 <= x < 0.7316 + y = x - (Tc - 1) + i = 1 + default: // 0 < x < 0.2316 + y = x + i = 2 + } + } else { + lgamma = 0 + switch { + case x >= (Ymin + 0.27): // 1.7316 <= x < 2 + y = 2 - x + i = 0 + case x >= (Ymin - 0.27): // 1.2316 <= x < 1.7316 + y = x - Tc + i = 1 + default: // 0.9 < x < 1.2316 + y = x - 1 + i = 2 + } + } + switch i { + case 0: + z := y * y + p1 := _lgamA[0] + z*(_lgamA[2]+z*(_lgamA[4]+z*(_lgamA[6]+z*(_lgamA[8]+z*_lgamA[10])))) + p2 := z * (_lgamA[1] + z*(+_lgamA[3]+z*(_lgamA[5]+z*(_lgamA[7]+z*(_lgamA[9]+z*_lgamA[11]))))) + p := y*p1 + p2 + lgamma += (p - 0.5*y) + case 1: + z := y * y + w := z * y + p1 := _lgamT[0] + w*(_lgamT[3]+w*(_lgamT[6]+w*(_lgamT[9]+w*_lgamT[12]))) // parallel comp + p2 := _lgamT[1] + w*(_lgamT[4]+w*(_lgamT[7]+w*(_lgamT[10]+w*_lgamT[13]))) + p3 := _lgamT[2] + w*(_lgamT[5]+w*(_lgamT[8]+w*(_lgamT[11]+w*_lgamT[14]))) + p := z*p1 - (Tt - w*(p2+y*p3)) + lgamma += (Tf + p) + case 2: + p1 := y * (_lgamU[0] + y*(_lgamU[1]+y*(_lgamU[2]+y*(_lgamU[3]+y*(_lgamU[4]+y*_lgamU[5]))))) + p2 := 1 + y*(_lgamV[1]+y*(_lgamV[2]+y*(_lgamV[3]+y*(_lgamV[4]+y*_lgamV[5])))) + lgamma += (-0.5*y + p1/p2) + } + case x < 8: // 2 <= x < 8 + i := int(x) + y := x - float64(i) + p := y * (_lgamS[0] + y*(_lgamS[1]+y*(_lgamS[2]+y*(_lgamS[3]+y*(_lgamS[4]+y*(_lgamS[5]+y*_lgamS[6])))))) + q := 1 + y*(_lgamR[1]+y*(_lgamR[2]+y*(_lgamR[3]+y*(_lgamR[4]+y*(_lgamR[5]+y*_lgamR[6]))))) + lgamma = 0.5*y + p/q + z := 1.0 // Lgamma(1+s) = Log(s) + Lgamma(s) + switch i { + case 7: + z *= (y + 6) + fallthrough + case 6: + z *= (y + 5) + fallthrough + case 5: + z *= (y + 4) + fallthrough + case 4: + z *= (y + 3) + fallthrough + case 3: + z *= (y + 2) + lgamma += Log(z) + } + case x < Two58: // 8 <= x < 2**58 + t := Log(x) + z := 1 / x + y := z * z + w := _lgamW[0] + z*(_lgamW[1]+y*(_lgamW[2]+y*(_lgamW[3]+y*(_lgamW[4]+y*(_lgamW[5]+y*_lgamW[6]))))) + lgamma = (x-0.5)*(t-1) + w + default: // 2**58 <= x <= Inf + lgamma = x * (Log(x) - 1) + } + if neg { + lgamma = nadj - lgamma + } + return +} + +// sinPi(x) is a helper function for negative x +func sinPi(x float64) float64 { + const ( + Two52 = 1 << 52 // 0x4330000000000000 ~4.5036e+15 + Two53 = 1 << 53 // 0x4340000000000000 ~9.0072e+15 + ) + if x < 0.25 { + return -Sin(Pi * x) + } + + // argument reduction + z := Floor(x) + var n int + if z != x { // inexact + x = Mod(x, 2) + n = int(x * 4) + } else { + if x >= Two53 { // x must be even + x = 0 + n = 0 + } else { + if x < Two52 { + z = x + Two52 // exact + } + n = int(1 & Float64bits(z)) + x = float64(n) + n <<= 2 + } + } + switch n { + case 0: + x = Sin(Pi * x) + case 1, 2: + x = Cos(Pi * (0.5 - x)) + case 3, 4: + x = Sin(Pi * (1 - x)) + case 5, 6: + x = -Cos(Pi * (x - 1.5)) + default: + x = Sin(Pi * (x - 2)) + } + return -x +} diff --git a/src/math/log.go b/src/math/log.go new file mode 100644 index 0000000..1b3e306 --- /dev/null +++ b/src/math/log.go @@ -0,0 +1,128 @@ +// Copyright 2009 The Go Authors. All rights reserved. +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. + +package math + +/* + Floating-point logarithm. +*/ + +// The original C code, the long comment, and the constants +// below are from FreeBSD's /usr/src/lib/msun/src/e_log.c +// and came with this notice. The go code is a simpler +// version of the original C. +// +// ==================================================== +// Copyright (C) 1993 by Sun Microsystems, Inc. All rights reserved. +// +// Developed at SunPro, a Sun Microsystems, Inc. business. +// Permission to use, copy, modify, and distribute this +// software is freely granted, provided that this notice +// is preserved. +// ==================================================== +// +// __ieee754_log(x) +// Return the logarithm of x +// +// Method : +// 1. Argument Reduction: find k and f such that +// x = 2**k * (1+f), +// where sqrt(2)/2 < 1+f < sqrt(2) . +// +// 2. Approximation of log(1+f). +// Let s = f/(2+f) ; based on log(1+f) = log(1+s) - log(1-s) +// = 2s + 2/3 s**3 + 2/5 s**5 + ....., +// = 2s + s*R +// We use a special Reme algorithm on [0,0.1716] to generate +// a polynomial of degree 14 to approximate R. The maximum error +// of this polynomial approximation is bounded by 2**-58.45. In +// other words, +// 2 4 6 8 10 12 14 +// R(z) ~ L1*s +L2*s +L3*s +L4*s +L5*s +L6*s +L7*s +// (the values of L1 to L7 are listed in the program) and +// | 2 14 | -58.45 +// | L1*s +...+L7*s - R(z) | <= 2 +// | | +// Note that 2s = f - s*f = f - hfsq + s*hfsq, where hfsq = f*f/2. +// In order to guarantee error in log below 1ulp, we compute log by +// log(1+f) = f - s*(f - R) (if f is not too large) +// log(1+f) = f - (hfsq - s*(hfsq+R)). (better accuracy) +// +// 3. Finally, log(x) = k*Ln2 + log(1+f). +// = k*Ln2_hi+(f-(hfsq-(s*(hfsq+R)+k*Ln2_lo))) +// Here Ln2 is split into two floating point number: +// Ln2_hi + Ln2_lo, +// where n*Ln2_hi is always exact for |n| < 2000. +// +// Special cases: +// log(x) is NaN with signal if x < 0 (including -INF) ; +// log(+INF) is +INF; log(0) is -INF with signal; +// log(NaN) is that NaN with no signal. +// +// Accuracy: +// according to an error analysis, the error is always less than +// 1 ulp (unit in the last place). +// +// Constants: +// The hexadecimal values are the intended ones for the following +// constants. The decimal values may be used, provided that the +// compiler will convert from decimal to binary accurately enough +// to produce the hexadecimal values shown. + +// Log returns the natural logarithm of x. +// +// Special cases are: +// Log(+Inf) = +Inf +// Log(0) = -Inf +// Log(x < 0) = NaN +// Log(NaN) = NaN +func Log(x float64) float64 { + if haveArchLog { + return archLog(x) + } + return log(x) +} + +func log(x float64) float64 { + const ( + Ln2Hi = 6.93147180369123816490e-01 /* 3fe62e42 fee00000 */ + Ln2Lo = 1.90821492927058770002e-10 /* 3dea39ef 35793c76 */ + L1 = 6.666666666666735130e-01 /* 3FE55555 55555593 */ + L2 = 3.999999999940941908e-01 /* 3FD99999 9997FA04 */ + L3 = 2.857142874366239149e-01 /* 3FD24924 94229359 */ + L4 = 2.222219843214978396e-01 /* 3FCC71C5 1D8E78AF */ + L5 = 1.818357216161805012e-01 /* 3FC74664 96CB03DE */ + L6 = 1.531383769920937332e-01 /* 3FC39A09 D078C69F */ + L7 = 1.479819860511658591e-01 /* 3FC2F112 DF3E5244 */ + ) + + // special cases + switch { + case IsNaN(x) || IsInf(x, 1): + return x + case x < 0: + return NaN() + case x == 0: + return Inf(-1) + } + + // reduce + f1, ki := Frexp(x) + if f1 < Sqrt2/2 { + f1 *= 2 + ki-- + } + f := f1 - 1 + k := float64(ki) + + // compute + s := f / (2 + f) + s2 := s * s + s4 := s2 * s2 + t1 := s2 * (L1 + s4*(L3+s4*(L5+s4*L7))) + t2 := s4 * (L2 + s4*(L4+s4*L6)) + R := t1 + t2 + hfsq := 0.5 * f * f + return k*Ln2Hi - ((hfsq - (s*(hfsq+R) + k*Ln2Lo)) - f) +} diff --git a/src/math/log10.go b/src/math/log10.go new file mode 100644 index 0000000..e6916a5 --- /dev/null +++ b/src/math/log10.go @@ -0,0 +1,37 @@ +// Copyright 2009 The Go Authors. All rights reserved. +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. + +package math + +// Log10 returns the decimal logarithm of x. +// The special cases are the same as for Log. +func Log10(x float64) float64 { + if haveArchLog10 { + return archLog10(x) + } + return log10(x) +} + +func log10(x float64) float64 { + return Log(x) * (1 / Ln10) +} + +// Log2 returns the binary logarithm of x. +// The special cases are the same as for Log. +func Log2(x float64) float64 { + if haveArchLog2 { + return archLog2(x) + } + return log2(x) +} + +func log2(x float64) float64 { + frac, exp := Frexp(x) + // Make sure exact powers of two give an exact answer. + // Don't depend on Log(0.5)*(1/Ln2)+exp being exactly exp-1. + if frac == 0.5 { + return float64(exp - 1) + } + return Log(frac)*(1/Ln2) + float64(exp) +} diff --git a/src/math/log10_s390x.s b/src/math/log10_s390x.s new file mode 100644 index 0000000..3638afe --- /dev/null +++ b/src/math/log10_s390x.s @@ -0,0 +1,156 @@ +// Copyright 2016 The Go Authors. All rights reserved. +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. + +#include "textflag.h" + +// Minimax polynomial coefficients and other constants +DATA log10rodataL19<>+0(SB)/8, $0.000000000000000000E+00 +DATA log10rodataL19<>+8(SB)/8, $-1.0 +DATA log10rodataL19<>+16(SB)/8, $0x7FF8000000000000 //+NanN +DATA log10rodataL19<>+24(SB)/8, $.15375570329280596749 +DATA log10rodataL19<>+32(SB)/8, $.60171950900703668594E+04 +DATA log10rodataL19<>+40(SB)/8, $-1.9578460454940795898 +DATA log10rodataL19<>+48(SB)/8, $0.78962633073318517310E-01 +DATA log10rodataL19<>+56(SB)/8, $-.71784211884836937993E-02 +DATA log10rodataL19<>+64(SB)/8, $0.87011165920689940661E-03 +DATA log10rodataL19<>+72(SB)/8, $-.11865158981621437541E-03 +DATA log10rodataL19<>+80(SB)/8, $0.17258413403018680410E-04 +DATA log10rodataL19<>+88(SB)/8, $0.40752932047883484315E-06 +DATA log10rodataL19<>+96(SB)/8, $-.26149194688832680410E-05 +DATA log10rodataL19<>+104(SB)/8, $0.92453396963875026759E-08 +DATA log10rodataL19<>+112(SB)/8, $-.64572084905921579630E-07 +DATA log10rodataL19<>+120(SB)/8, $-5.5 +DATA log10rodataL19<>+128(SB)/8, $18446744073709551616. +GLOBL log10rodataL19<>+0(SB), RODATA, $136 + +// Table of log10 correction terms +DATA log10tab2074<>+0(SB)/8, $0.254164497922885069E-01 +DATA log10tab2074<>+8(SB)/8, $0.179018857989381839E-01 +DATA log10tab2074<>+16(SB)/8, $0.118926768029048674E-01 +DATA log10tab2074<>+24(SB)/8, $0.722595568238080033E-02 +DATA log10tab2074<>+32(SB)/8, $0.376393570022739135E-02 +DATA log10tab2074<>+40(SB)/8, $0.138901135928814326E-02 +DATA log10tab2074<>+48(SB)/8, $0 +DATA log10tab2074<>+56(SB)/8, $-0.490780466387818203E-03 +DATA log10tab2074<>+64(SB)/8, $-0.159811431402137571E-03 +DATA log10tab2074<>+72(SB)/8, $0.925796337165100494E-03 +DATA log10tab2074<>+80(SB)/8, $0.270683176738357035E-02 +DATA log10tab2074<>+88(SB)/8, $0.513079030821304758E-02 +DATA log10tab2074<>+96(SB)/8, $0.815089785397996303E-02 +DATA log10tab2074<>+104(SB)/8, $0.117253060262419215E-01 +DATA log10tab2074<>+112(SB)/8, $0.158164239345343963E-01 +DATA log10tab2074<>+120(SB)/8, $0.203903595489229786E-01 +GLOBL log10tab2074<>+0(SB), RODATA, $128 + +// Log10 returns the decimal logarithm of the argument. +// +// Special cases are: +// Log(+Inf) = +Inf +// Log(0) = -Inf +// Log(x < 0) = NaN +// Log(NaN) = NaN +// The algorithm used is minimax polynomial approximation +// with coefficients determined with a Remez exchange algorithm. + +TEXT ·log10Asm(SB),NOSPLIT,$8-16 + FMOVD x+0(FP), F0 + MOVD $log10rodataL19<>+0(SB), R9 + FMOVD F0, x-8(SP) + WORD $0xC0298006 //iilf %r2,2147909631 + BYTE $0x7F + BYTE $0xFF + WORD $0x5840F008 //l %r4, 8(%r15) + SUBW R4, R2, R3 + RISBGZ $32, $47, $0, R3, R5 + MOVH $0x0, R1 + RISBGN $0, $31, $32, R5, R1 + WORD $0xC0590016 //iilf %r5,1507327 + BYTE $0xFF + BYTE $0xFF + MOVW R4, R10 + MOVW R5, R11 + CMPBLE R10, R11, L2 + WORD $0xC0297FEF //iilf %r2,2146435071 + BYTE $0xFF + BYTE $0xFF + MOVW R4, R10 + MOVW R2, R11 + CMPBLE R10, R11, L16 +L3: +L1: + FMOVD F0, ret+8(FP) + RET + +L2: + LTDBR F0, F0 + BLEU L13 + WORD $0xED009080 //mdb %f0,.L20-.L19(%r9) + BYTE $0x00 + BYTE $0x1C + FMOVD F0, x-8(SP) + WORD $0x5B20F008 //s %r2, 8(%r15) + RISBGZ $57, $60, $51, R2, R3 + ANDW $0xFFFF0000, R2 + RISBGN $0, $31, $32, R2, R1 + ADDW $0x4000000, R2 + BLEU L17 +L8: + SRW $8, R2, R2 + ORW $0x45000000, R2 +L4: + FMOVD log10rodataL19<>+120(SB), F2 + LDGR R1, F4 + WFMADB V4, V0, V2, V0 + FMOVD log10rodataL19<>+112(SB), F4 + FMOVD log10rodataL19<>+104(SB), F6 + WFMADB V0, V6, V4, V6 + FMOVD log10rodataL19<>+96(SB), F4 + FMOVD log10rodataL19<>+88(SB), F1 + WFMADB V0, V1, V4, V1 + WFMDB V0, V0, V4 + FMOVD log10rodataL19<>+80(SB), F2 + WFMADB V6, V4, V1, V6 + FMOVD log10rodataL19<>+72(SB), F1 + WFMADB V0, V2, V1, V2 + FMOVD log10rodataL19<>+64(SB), F1 + RISBGZ $57, $60, $0, R3, R3 + WFMADB V4, V6, V2, V6 + FMOVD log10rodataL19<>+56(SB), F2 + WFMADB V0, V1, V2, V1 + VLVGF $0, R2, V2 + WFMADB V4, V6, V1, V4 + LDEBR F2, F2 + FMOVD log10rodataL19<>+48(SB), F6 + WFMADB V0, V4, V6, V4 + FMOVD log10rodataL19<>+40(SB), F1 + FMOVD log10rodataL19<>+32(SB), F6 + MOVD $log10tab2074<>+0(SB), R1 + WFMADB V2, V1, V6, V2 + WORD $0x68331000 //ld %f3,0(%r3,%r1) + WFMADB V0, V4, V3, V0 + FMOVD log10rodataL19<>+24(SB), F4 + FMADD F4, F2, F0 + FMOVD F0, ret+8(FP) + RET + +L16: + RISBGZ $40, $55, $56, R3, R2 + RISBGZ $57, $60, $51, R3, R3 + ORW $0x45000000, R2 + BR L4 +L13: + BGE L18 //jnl .L18 + BVS L18 + FMOVD log10rodataL19<>+16(SB), F0 + BR L1 +L17: + SRAW $1, R2, R2 + SUBW $0x40000000, R2 + BR L8 +L18: + FMOVD log10rodataL19<>+8(SB), F0 + WORD $0xED009000 //ddb %f0,.L36-.L19(%r9) + BYTE $0x00 + BYTE $0x1D + BR L1 diff --git a/src/math/log1p.go b/src/math/log1p.go new file mode 100644 index 0000000..c117f72 --- /dev/null +++ b/src/math/log1p.go @@ -0,0 +1,202 @@ +// Copyright 2010 The Go Authors. All rights reserved. +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. + +package math + +// The original C code, the long comment, and the constants +// below are from FreeBSD's /usr/src/lib/msun/src/s_log1p.c +// and came with this notice. The go code is a simplified +// version of the original C. +// +// ==================================================== +// Copyright (C) 1993 by Sun Microsystems, Inc. All rights reserved. +// +// Developed at SunPro, a Sun Microsystems, Inc. business. +// Permission to use, copy, modify, and distribute this +// software is freely granted, provided that this notice +// is preserved. +// ==================================================== +// +// +// double log1p(double x) +// +// Method : +// 1. Argument Reduction: find k and f such that +// 1+x = 2**k * (1+f), +// where sqrt(2)/2 < 1+f < sqrt(2) . +// +// Note. If k=0, then f=x is exact. However, if k!=0, then f +// may not be representable exactly. In that case, a correction +// term is need. Let u=1+x rounded. Let c = (1+x)-u, then +// log(1+x) - log(u) ~ c/u. Thus, we proceed to compute log(u), +// and add back the correction term c/u. +// (Note: when x > 2**53, one can simply return log(x)) +// +// 2. Approximation of log1p(f). +// Let s = f/(2+f) ; based on log(1+f) = log(1+s) - log(1-s) +// = 2s + 2/3 s**3 + 2/5 s**5 + ....., +// = 2s + s*R +// We use a special Reme algorithm on [0,0.1716] to generate +// a polynomial of degree 14 to approximate R The maximum error +// of this polynomial approximation is bounded by 2**-58.45. In +// other words, +// 2 4 6 8 10 12 14 +// R(z) ~ Lp1*s +Lp2*s +Lp3*s +Lp4*s +Lp5*s +Lp6*s +Lp7*s +// (the values of Lp1 to Lp7 are listed in the program) +// and +// | 2 14 | -58.45 +// | Lp1*s +...+Lp7*s - R(z) | <= 2 +// | | +// Note that 2s = f - s*f = f - hfsq + s*hfsq, where hfsq = f*f/2. +// In order to guarantee error in log below 1ulp, we compute log +// by +// log1p(f) = f - (hfsq - s*(hfsq+R)). +// +// 3. Finally, log1p(x) = k*ln2 + log1p(f). +// = k*ln2_hi+(f-(hfsq-(s*(hfsq+R)+k*ln2_lo))) +// Here ln2 is split into two floating point number: +// ln2_hi + ln2_lo, +// where n*ln2_hi is always exact for |n| < 2000. +// +// Special cases: +// log1p(x) is NaN with signal if x < -1 (including -INF) ; +// log1p(+INF) is +INF; log1p(-1) is -INF with signal; +// log1p(NaN) is that NaN with no signal. +// +// Accuracy: +// according to an error analysis, the error is always less than +// 1 ulp (unit in the last place). +// +// Constants: +// The hexadecimal values are the intended ones for the following +// constants. The decimal values may be used, provided that the +// compiler will convert from decimal to binary accurately enough +// to produce the hexadecimal values shown. +// +// Note: Assuming log() return accurate answer, the following +// algorithm can be used to compute log1p(x) to within a few ULP: +// +// u = 1+x; +// if(u==1.0) return x ; else +// return log(u)*(x/(u-1.0)); +// +// See HP-15C Advanced Functions Handbook, p.193. + +// Log1p returns the natural logarithm of 1 plus its argument x. +// It is more accurate than Log(1 + x) when x is near zero. +// +// Special cases are: +// Log1p(+Inf) = +Inf +// Log1p(±0) = ±0 +// Log1p(-1) = -Inf +// Log1p(x < -1) = NaN +// Log1p(NaN) = NaN +func Log1p(x float64) float64 { + if haveArchLog1p { + return archLog1p(x) + } + return log1p(x) +} + +func log1p(x float64) float64 { + const ( + Sqrt2M1 = 4.142135623730950488017e-01 // Sqrt(2)-1 = 0x3fda827999fcef34 + Sqrt2HalfM1 = -2.928932188134524755992e-01 // Sqrt(2)/2-1 = 0xbfd2bec333018866 + Small = 1.0 / (1 << 29) // 2**-29 = 0x3e20000000000000 + Tiny = 1.0 / (1 << 54) // 2**-54 + Two53 = 1 << 53 // 2**53 + Ln2Hi = 6.93147180369123816490e-01 // 3fe62e42fee00000 + Ln2Lo = 1.90821492927058770002e-10 // 3dea39ef35793c76 + Lp1 = 6.666666666666735130e-01 // 3FE5555555555593 + Lp2 = 3.999999999940941908e-01 // 3FD999999997FA04 + Lp3 = 2.857142874366239149e-01 // 3FD2492494229359 + Lp4 = 2.222219843214978396e-01 // 3FCC71C51D8E78AF + Lp5 = 1.818357216161805012e-01 // 3FC7466496CB03DE + Lp6 = 1.531383769920937332e-01 // 3FC39A09D078C69F + Lp7 = 1.479819860511658591e-01 // 3FC2F112DF3E5244 + ) + + // special cases + switch { + case x < -1 || IsNaN(x): // includes -Inf + return NaN() + case x == -1: + return Inf(-1) + case IsInf(x, 1): + return Inf(1) + } + + absx := Abs(x) + + var f float64 + var iu uint64 + k := 1 + if absx < Sqrt2M1 { // |x| < Sqrt(2)-1 + if absx < Small { // |x| < 2**-29 + if absx < Tiny { // |x| < 2**-54 + return x + } + return x - x*x*0.5 + } + if x > Sqrt2HalfM1 { // Sqrt(2)/2-1 < x + // (Sqrt(2)/2-1) < x < (Sqrt(2)-1) + k = 0 + f = x + iu = 1 + } + } + var c float64 + if k != 0 { + var u float64 + if absx < Two53 { // 1<<53 + u = 1.0 + x + iu = Float64bits(u) + k = int((iu >> 52) - 1023) + // correction term + if k > 0 { + c = 1.0 - (u - x) + } else { + c = x - (u - 1.0) + } + c /= u + } else { + u = x + iu = Float64bits(u) + k = int((iu >> 52) - 1023) + c = 0 + } + iu &= 0x000fffffffffffff + if iu < 0x0006a09e667f3bcd { // mantissa of Sqrt(2) + u = Float64frombits(iu | 0x3ff0000000000000) // normalize u + } else { + k++ + u = Float64frombits(iu | 0x3fe0000000000000) // normalize u/2 + iu = (0x0010000000000000 - iu) >> 2 + } + f = u - 1.0 // Sqrt(2)/2 < u < Sqrt(2) + } + hfsq := 0.5 * f * f + var s, R, z float64 + if iu == 0 { // |f| < 2**-20 + if f == 0 { + if k == 0 { + return 0 + } + c += float64(k) * Ln2Lo + return float64(k)*Ln2Hi + c + } + R = hfsq * (1.0 - 0.66666666666666666*f) // avoid division + if k == 0 { + return f - R + } + return float64(k)*Ln2Hi - ((R - (float64(k)*Ln2Lo + c)) - f) + } + s = f / (2.0 + f) + z = s * s + R = z * (Lp1 + z*(Lp2+z*(Lp3+z*(Lp4+z*(Lp5+z*(Lp6+z*Lp7)))))) + if k == 0 { + return f - (hfsq - s*(hfsq+R)) + } + return float64(k)*Ln2Hi - ((hfsq - (s*(hfsq+R) + (float64(k)*Ln2Lo + c))) - f) +} diff --git a/src/math/log1p_s390x.s b/src/math/log1p_s390x.s new file mode 100644 index 0000000..00eb374 --- /dev/null +++ b/src/math/log1p_s390x.s @@ -0,0 +1,180 @@ +// Copyright 2017 The Go Authors. All rights reserved. +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. + +#include "textflag.h" + +// Constants +DATA ·log1pxlim<> + 0(SB)/4, $0xfff00000 +GLOBL ·log1pxlim<> + 0(SB), RODATA, $4 +DATA ·log1pxzero<> + 0(SB)/8, $0.0 +GLOBL ·log1pxzero<> + 0(SB), RODATA, $8 +DATA ·log1pxminf<> + 0(SB)/8, $0xfff0000000000000 +GLOBL ·log1pxminf<> + 0(SB), RODATA, $8 +DATA ·log1pxnan<> + 0(SB)/8, $0x7ff8000000000000 +GLOBL ·log1pxnan<> + 0(SB), RODATA, $8 +DATA ·log1pyout<> + 0(SB)/8, $0x40fce621e71da000 +GLOBL ·log1pyout<> + 0(SB), RODATA, $8 +DATA ·log1pxout<> + 0(SB)/8, $0x40f1000000000000 +GLOBL ·log1pxout<> + 0(SB), RODATA, $8 +DATA ·log1pxl2<> + 0(SB)/8, $0xbfda7aecbeba4e46 +GLOBL ·log1pxl2<> + 0(SB), RODATA, $8 +DATA ·log1pxl1<> + 0(SB)/8, $0x3ffacde700000000 +GLOBL ·log1pxl1<> + 0(SB), RODATA, $8 +DATA ·log1pxa<> + 0(SB)/8, $5.5 +GLOBL ·log1pxa<> + 0(SB), RODATA, $8 +DATA ·log1pxmone<> + 0(SB)/8, $-1.0 +GLOBL ·log1pxmone<> + 0(SB), RODATA, $8 + +// Minimax polynomial approximations +DATA ·log1pc8<> + 0(SB)/8, $0.212881813645679599E-07 +GLOBL ·log1pc8<> + 0(SB), RODATA, $8 +DATA ·log1pc7<> + 0(SB)/8, $-.148682720127920854E-06 +GLOBL ·log1pc7<> + 0(SB), RODATA, $8 +DATA ·log1pc6<> + 0(SB)/8, $0.938370938292558173E-06 +GLOBL ·log1pc6<> + 0(SB), RODATA, $8 +DATA ·log1pc5<> + 0(SB)/8, $-.602107458843052029E-05 +GLOBL ·log1pc5<> + 0(SB), RODATA, $8 +DATA ·log1pc4<> + 0(SB)/8, $0.397389654305194527E-04 +GLOBL ·log1pc4<> + 0(SB), RODATA, $8 +DATA ·log1pc3<> + 0(SB)/8, $-.273205381970859341E-03 +GLOBL ·log1pc3<> + 0(SB), RODATA, $8 +DATA ·log1pc2<> + 0(SB)/8, $0.200350613573012186E-02 +GLOBL ·log1pc2<> + 0(SB), RODATA, $8 +DATA ·log1pc1<> + 0(SB)/8, $-.165289256198351540E-01 +GLOBL ·log1pc1<> + 0(SB), RODATA, $8 +DATA ·log1pc0<> + 0(SB)/8, $0.181818181818181826E+00 +GLOBL ·log1pc0<> + 0(SB), RODATA, $8 + + +// Table of log10 correction terms +DATA ·log1ptab<> + 0(SB)/8, $0.585235384085551248E-01 +DATA ·log1ptab<> + 8(SB)/8, $0.412206153771168640E-01 +DATA ·log1ptab<> + 16(SB)/8, $0.273839003221648339E-01 +DATA ·log1ptab<> + 24(SB)/8, $0.166383778368856480E-01 +DATA ·log1ptab<> + 32(SB)/8, $0.866678223433169637E-02 +DATA ·log1ptab<> + 40(SB)/8, $0.319831684989627514E-02 +DATA ·log1ptab<> + 48(SB)/8, $-.000000000000000000E+00 +DATA ·log1ptab<> + 56(SB)/8, $-.113006378583725549E-02 +DATA ·log1ptab<> + 64(SB)/8, $-.367979419636602491E-03 +DATA ·log1ptab<> + 72(SB)/8, $0.213172484510484979E-02 +DATA ·log1ptab<> + 80(SB)/8, $0.623271047682013536E-02 +DATA ·log1ptab<> + 88(SB)/8, $0.118140812789696885E-01 +DATA ·log1ptab<> + 96(SB)/8, $0.187681358930914206E-01 +DATA ·log1ptab<> + 104(SB)/8, $0.269985148668178992E-01 +DATA ·log1ptab<> + 112(SB)/8, $0.364186619761331328E-01 +DATA ·log1ptab<> + 120(SB)/8, $0.469505379381388441E-01 +GLOBL ·log1ptab<> + 0(SB), RODATA, $128 + +// Log1p returns the natural logarithm of 1 plus its argument x. +// It is more accurate than Log(1 + x) when x is near zero. +// +// Special cases are: +// Log1p(+Inf) = +Inf +// Log1p(±0) = ±0 +// Log1p(-1) = -Inf +// Log1p(x < -1) = NaN +// Log1p(NaN) = NaN +// The algorithm used is minimax polynomial approximation +// with coefficients determined with a Remez exchange algorithm. + +TEXT ·log1pAsm(SB), NOSPLIT, $0-16 + FMOVD x+0(FP), F0 + MOVD $·log1pxmone<>+0(SB), R1 + MOVD ·log1pxout<>+0(SB), R2 + FMOVD 0(R1), F3 + MOVD $·log1pxa<>+0(SB), R1 + MOVWZ ·log1pxlim<>+0(SB), R0 + FMOVD 0(R1), F1 + MOVD $·log1pc8<>+0(SB), R1 + FMOVD 0(R1), F5 + MOVD $·log1pc7<>+0(SB), R1 + VLEG $0, 0(R1), V20 + MOVD $·log1pc6<>+0(SB), R1 + WFSDB V0, V3, V4 + VLEG $0, 0(R1), V18 + MOVD $·log1pc5<>+0(SB), R1 + VLEG $0, 0(R1), V16 + MOVD R2, R5 + LGDR F4, R3 + WORD $0xC0190006 //iilf %r1,425983 + BYTE $0x7F + BYTE $0xFF + SRAD $32, R3, R3 + SUBW R3, R1 + SRW $16, R1, R1 + BYTE $0x18 //lr %r4,%r1 + BYTE $0x41 + RISBGN $0, $15, $48, R4, R2 + RISBGN $16, $31, $32, R4, R5 + MOVW R0, R6 + MOVW R3, R7 + CMPBGT R6, R7, L8 + WFCEDBS V4, V4, V6 + MOVD $·log1pxzero<>+0(SB), R1 + FMOVD 0(R1), F2 + BVS LEXITTAGlog1p + WORD $0xB3130044 // lcdbr %f4,%f4 + WFCEDBS V2, V4, V6 + BEQ L9 + WFCHDBS V4, V2, V2 + BEQ LEXITTAGlog1p + MOVD $·log1pxnan<>+0(SB), R1 + FMOVD 0(R1), F0 + FMOVD F0, ret+8(FP) + RET + +L8: + LDGR R2, F2 + FSUB F4, F3 + FMADD F2, F4, F1 + MOVD $·log1pc4<>+0(SB), R2 + WORD $0xB3130041 // lcdbr %f4,%f1 + FMOVD 0(R2), F7 + FSUB F3, F0 + MOVD $·log1pc3<>+0(SB), R2 + FMOVD 0(R2), F3 + MOVD $·log1pc2<>+0(SB), R2 + WFMDB V1, V1, V6 + FMADD F7, F4, F3 + WFMSDB V0, V2, V1, V0 + FMOVD 0(R2), F7 + WFMADB V4, V5, V20, V5 + MOVD $·log1pc1<>+0(SB), R2 + FMOVD 0(R2), F2 + FMADD F7, F4, F2 + WFMADB V4, V18, V16, V4 + FMADD F3, F6, F2 + WFMADB V5, V6, V4, V5 + FMUL F6, F6 + MOVD $·log1pc0<>+0(SB), R2 + WFMADB V6, V5, V2, V6 + FMOVD 0(R2), F4 + WFMADB V0, V6, V4, V6 + RISBGZ $57, $60, $3, R1, R1 + MOVD $·log1ptab<>+0(SB), R2 + MOVD $·log1pxl1<>+0(SB), R3 + WORD $0x68112000 //ld %f1,0(%r1,%r2) + FMOVD 0(R3), F2 + WFMADB V0, V6, V1, V0 + MOVD $·log1pyout<>+0(SB), R1 + LDGR R5, F6 + FMOVD 0(R1), F4 + WFMSDB V2, V6, V4, V2 + MOVD $·log1pxl2<>+0(SB), R1 + FMOVD 0(R1), F4 + FMADD F4, F2, F0 + FMOVD F0, ret+8(FP) + RET + +L9: + MOVD $·log1pxminf<>+0(SB), R1 + FMOVD 0(R1), F0 + FMOVD F0, ret+8(FP) + RET + + +LEXITTAGlog1p: + FMOVD F0, ret+8(FP) + RET + diff --git a/src/math/log_amd64.s b/src/math/log_amd64.s new file mode 100644 index 0000000..d84091f --- /dev/null +++ b/src/math/log_amd64.s @@ -0,0 +1,112 @@ +// Copyright 2010 The Go Authors. All rights reserved. +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. + +#include "textflag.h" + +#define HSqrt2 7.07106781186547524401e-01 // sqrt(2)/2 +#define Ln2Hi 6.93147180369123816490e-01 // 0x3fe62e42fee00000 +#define Ln2Lo 1.90821492927058770002e-10 // 0x3dea39ef35793c76 +#define L1 6.666666666666735130e-01 // 0x3FE5555555555593 +#define L2 3.999999999940941908e-01 // 0x3FD999999997FA04 +#define L3 2.857142874366239149e-01 // 0x3FD2492494229359 +#define L4 2.222219843214978396e-01 // 0x3FCC71C51D8E78AF +#define L5 1.818357216161805012e-01 // 0x3FC7466496CB03DE +#define L6 1.531383769920937332e-01 // 0x3FC39A09D078C69F +#define L7 1.479819860511658591e-01 // 0x3FC2F112DF3E5244 +#define NaN 0x7FF8000000000001 +#define NegInf 0xFFF0000000000000 +#define PosInf 0x7FF0000000000000 + +// func Log(x float64) float64 +TEXT ·archLog(SB),NOSPLIT,$0 + // test bits for special cases + MOVQ x+0(FP), BX + MOVQ $~(1<<63), AX // sign bit mask + ANDQ BX, AX + JEQ isZero + MOVQ $0, AX + CMPQ AX, BX + JGT isNegative + MOVQ $PosInf, AX + CMPQ AX, BX + JLE isInfOrNaN + // f1, ki := math.Frexp(x); k := float64(ki) + MOVQ BX, X0 + MOVQ $0x000FFFFFFFFFFFFF, AX + MOVQ AX, X2 + ANDPD X0, X2 + MOVSD $0.5, X0 // 0x3FE0000000000000 + ORPD X0, X2 // X2= f1 + SHRQ $52, BX + ANDL $0x7FF, BX + SUBL $0x3FE, BX + XORPS X1, X1 // break dependency for CVTSL2SD + CVTSL2SD BX, X1 // x1= k, x2= f1 + // if f1 < math.Sqrt2/2 { k -= 1; f1 *= 2 } + MOVSD $HSqrt2, X0 // x0= 0.7071, x1= k, x2= f1 + CMPSD X2, X0, 5 // cmpnlt; x0= 0 or ^0, x1= k, x2 = f1 + MOVSD $1.0, X3 // x0= 0 or ^0, x1= k, x2 = f1, x3= 1 + ANDPD X0, X3 // x0= 0 or ^0, x1= k, x2 = f1, x3= 0 or 1 + SUBSD X3, X1 // x0= 0 or ^0, x1= k, x2 = f1, x3= 0 or 1 + MOVSD $1.0, X0 // x0= 1, x1= k, x2= f1, x3= 0 or 1 + ADDSD X0, X3 // x0= 1, x1= k, x2= f1, x3= 1 or 2 + MULSD X3, X2 // x0= 1, x1= k, x2= f1 + // f := f1 - 1 + SUBSD X0, X2 // x1= k, x2= f + // s := f / (2 + f) + MOVSD $2.0, X0 + ADDSD X2, X0 + MOVAPD X2, X3 + DIVSD X0, X3 // x1=k, x2= f, x3= s + // s2 := s * s + MOVAPD X3, X4 // x1= k, x2= f, x3= s + MULSD X4, X4 // x1= k, x2= f, x3= s, x4= s2 + // s4 := s2 * s2 + MOVAPD X4, X5 // x1= k, x2= f, x3= s, x4= s2 + MULSD X5, X5 // x1= k, x2= f, x3= s, x4= s2, x5= s4 + // t1 := s2 * (L1 + s4*(L3+s4*(L5+s4*L7))) + MOVSD $L7, X6 + MULSD X5, X6 + ADDSD $L5, X6 + MULSD X5, X6 + ADDSD $L3, X6 + MULSD X5, X6 + ADDSD $L1, X6 + MULSD X6, X4 // x1= k, x2= f, x3= s, x4= t1, x5= s4 + // t2 := s4 * (L2 + s4*(L4+s4*L6)) + MOVSD $L6, X6 + MULSD X5, X6 + ADDSD $L4, X6 + MULSD X5, X6 + ADDSD $L2, X6 + MULSD X6, X5 // x1= k, x2= f, x3= s, x4= t1, x5= t2 + // R := t1 + t2 + ADDSD X5, X4 // x1= k, x2= f, x3= s, x4= R + // hfsq := 0.5 * f * f + MOVSD $0.5, X0 + MULSD X2, X0 + MULSD X2, X0 // x0= hfsq, x1= k, x2= f, x3= s, x4= R + // return k*Ln2Hi - ((hfsq - (s*(hfsq+R) + k*Ln2Lo)) - f) + ADDSD X0, X4 // x0= hfsq, x1= k, x2= f, x3= s, x4= hfsq+R + MULSD X4, X3 // x0= hfsq, x1= k, x2= f, x3= s*(hfsq+R) + MOVSD $Ln2Lo, X4 + MULSD X1, X4 // x4= k*Ln2Lo + ADDSD X4, X3 // x0= hfsq, x1= k, x2= f, x3= s*(hfsq+R)+k*Ln2Lo + SUBSD X3, X0 // x0= hfsq-(s*(hfsq+R)+k*Ln2Lo), x1= k, x2= f + SUBSD X2, X0 // x0= (hfsq-(s*(hfsq+R)+k*Ln2Lo))-f, x1= k + MULSD $Ln2Hi, X1 // x0= (hfsq-(s*(hfsq+R)+k*Ln2Lo))-f, x1= k*Ln2Hi + SUBSD X0, X1 // x1= k*Ln2Hi-((hfsq-(s*(hfsq+R)+k*Ln2Lo))-f) + MOVSD X1, ret+8(FP) + RET +isInfOrNaN: + MOVQ BX, ret+8(FP) // +Inf or NaN, return x + RET +isNegative: + MOVQ $NaN, AX + MOVQ AX, ret+8(FP) // return NaN + RET +isZero: + MOVQ $NegInf, AX + MOVQ AX, ret+8(FP) // return -Inf + RET diff --git a/src/math/log_asm.go b/src/math/log_asm.go new file mode 100644 index 0000000..848cce1 --- /dev/null +++ b/src/math/log_asm.go @@ -0,0 +1,11 @@ +// Copyright 2021 The Go Authors. All rights reserved. +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. + +//go:build amd64 || s390x + +package math + +const haveArchLog = true + +func archLog(x float64) float64 diff --git a/src/math/log_s390x.s b/src/math/log_s390x.s new file mode 100644 index 0000000..4b514f3 --- /dev/null +++ b/src/math/log_s390x.s @@ -0,0 +1,168 @@ +// Copyright 2017 The Go Authors. All rights reserved. +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. + +#include "textflag.h" + +// Minimax polynomial approximations +DATA ·logrodataL21<> + 0(SB)/8, $-.499999999999999778E+00 +DATA ·logrodataL21<> + 8(SB)/8, $0.333333333333343751E+00 +DATA ·logrodataL21<> + 16(SB)/8, $-.250000000001606881E+00 +DATA ·logrodataL21<> + 24(SB)/8, $0.199999999971603032E+00 +DATA ·logrodataL21<> + 32(SB)/8, $-.166666663114122038E+00 +DATA ·logrodataL21<> + 40(SB)/8, $-.125002923782692399E+00 +DATA ·logrodataL21<> + 48(SB)/8, $0.111142014580396256E+00 +DATA ·logrodataL21<> + 56(SB)/8, $0.759438932618934220E-01 +DATA ·logrodataL21<> + 64(SB)/8, $0.142857144267212549E+00 +DATA ·logrodataL21<> + 72(SB)/8, $-.993038938793590759E-01 +DATA ·logrodataL21<> + 80(SB)/8, $-1.0 +GLOBL ·logrodataL21<> + 0(SB), RODATA, $88 + +// Constants +DATA ·logxminf<> + 0(SB)/8, $0xfff0000000000000 +GLOBL ·logxminf<> + 0(SB), RODATA, $8 +DATA ·logxnan<> + 0(SB)/8, $0x7ff8000000000000 +GLOBL ·logxnan<> + 0(SB), RODATA, $8 +DATA ·logx43f<> + 0(SB)/8, $0x43f0000000000000 +GLOBL ·logx43f<> + 0(SB), RODATA, $8 +DATA ·logxl2<> + 0(SB)/8, $0x3fda7aecbeba4e46 +GLOBL ·logxl2<> + 0(SB), RODATA, $8 +DATA ·logxl1<> + 0(SB)/8, $0x3ffacde700000000 +GLOBL ·logxl1<> + 0(SB), RODATA, $8 + +/* Input transform scale and add constants */ +DATA ·logxm<> + 0(SB)/8, $0x3fc77604e63c84b1 +DATA ·logxm<> + 8(SB)/8, $0x40fb39456ab53250 +DATA ·logxm<> + 16(SB)/8, $0x3fc9ee358b945f3f +DATA ·logxm<> + 24(SB)/8, $0x40fb39418bf3b137 +DATA ·logxm<> + 32(SB)/8, $0x3fccfb2e1304f4b6 +DATA ·logxm<> + 40(SB)/8, $0x40fb393d3eda3022 +DATA ·logxm<> + 48(SB)/8, $0x3fd0000000000000 +DATA ·logxm<> + 56(SB)/8, $0x40fb393969e70000 +DATA ·logxm<> + 64(SB)/8, $0x3fd11117aafbfe04 +DATA ·logxm<> + 72(SB)/8, $0x40fb3936eaefafcf +DATA ·logxm<> + 80(SB)/8, $0x3fd2492af5e658b2 +DATA ·logxm<> + 88(SB)/8, $0x40fb39343ff01715 +DATA ·logxm<> + 96(SB)/8, $0x3fd3b50c622a43dd +DATA ·logxm<> + 104(SB)/8, $0x40fb39315adae2f3 +DATA ·logxm<> + 112(SB)/8, $0x3fd56bbeea918777 +DATA ·logxm<> + 120(SB)/8, $0x40fb392e21698552 +GLOBL ·logxm<> + 0(SB), RODATA, $128 + +// Log returns the natural logarithm of the argument. +// +// Special cases are: +// Log(+Inf) = +Inf +// Log(0) = -Inf +// Log(x < 0) = NaN +// Log(NaN) = NaN +// The algorithm used is minimax polynomial approximation using a table of +// polynomial coefficients determined with a Remez exchange algorithm. + +TEXT ·logAsm(SB), NOSPLIT, $0-16 + FMOVD x+0(FP), F0 + MOVD $·logrodataL21<>+0(SB), R9 + MOVH $0x8006, R4 + LGDR F0, R1 + MOVD $0x3FF0000000000000, R6 + SRAD $48, R1, R1 + MOVD $0x40F03E8000000000, R8 + SUBW R1, R4 + RISBGZ $32, $59, $0, R4, R2 + RISBGN $0, $15, $48, R2, R6 + RISBGN $16, $31, $32, R2, R8 + MOVW R1, R7 + CMPBGT R7, $22, L17 + LTDBR F0, F0 + MOVD $·logx43f<>+0(SB), R1 + FMOVD 0(R1), F2 + BLEU L3 + MOVH $0x8005, R12 + MOVH $0x8405, R0 + BR L15 +L7: + LTDBR F0, F0 + BLEU L3 +L15: + FMUL F2, F0 + LGDR F0, R1 + SRAD $48, R1, R1 + SUBW R1, R0, R2 + SUBW R1, R12, R3 + BYTE $0x18 //lr %r4,%r2 + BYTE $0x42 + ANDW $0xFFFFFFF0, R3 + ANDW $0xFFFFFFF0, R2 + BYTE $0x18 //lr %r5,%r1 + BYTE $0x51 + MOVW R1, R7 + CMPBLE R7, $22, L7 + RISBGN $0, $15, $48, R3, R6 + RISBGN $16, $31, $32, R2, R8 +L2: + MOVH R5, R5 + MOVH $0x7FEF, R1 + CMPW R5, R1 + BGT L1 + LDGR R6, F2 + FMUL F2, F0 + RISBGZ $57, $59, $3, R4, R4 + FMOVD 80(R9), F2 + MOVD $·logxm<>+0(SB), R7 + ADD R7, R4 + FMOVD 72(R9), F4 + WORD $0xED004000 //madb %f2,%f0,0(%r4) + BYTE $0x20 + BYTE $0x1E + FMOVD 64(R9), F1 + FMOVD F2, F0 + FMOVD 56(R9), F2 + WFMADB V0, V2, V4, V2 + WFMDB V0, V0, V6 + FMOVD 48(R9), F4 + WFMADB V0, V2, V4, V2 + FMOVD 40(R9), F4 + WFMADB V2, V6, V1, V2 + FMOVD 32(R9), F1 + WFMADB V6, V4, V1, V4 + FMOVD 24(R9), F1 + WFMADB V6, V2, V1, V2 + FMOVD 16(R9), F1 + WFMADB V6, V4, V1, V4 + MOVD $·logxl1<>+0(SB), R1 + FMOVD 8(R9), F1 + WFMADB V6, V2, V1, V2 + FMOVD 0(R9), F1 + WFMADB V6, V4, V1, V4 + FMOVD 8(R4), F1 + WFMADB V0, V2, V4, V2 + LDGR R8, F4 + WFMADB V6, V2, V0, V2 + WORD $0xED401000 //msdb %f1,%f4,0(%r1) + BYTE $0x10 + BYTE $0x1F + MOVD ·logxl2<>+0(SB), R1 + WORD $0xB3130001 //lcdbr %f0,%f1 + LDGR R1, F4 + WFMADB V0, V4, V2, V0 +L1: + FMOVD F0, ret+8(FP) + RET +L3: + LTDBR F0, F0 + BEQ L20 + BGE L1 + BVS L1 + + MOVD $·logxnan<>+0(SB), R1 + FMOVD 0(R1), F0 + BR L1 +L20: + MOVD $·logxminf<>+0(SB), R1 + FMOVD 0(R1), F0 + FMOVD F0, ret+8(FP) + RET +L17: + BYTE $0x18 //lr %r5,%r1 + BYTE $0x51 + BR L2 diff --git a/src/math/log_stub.go b/src/math/log_stub.go new file mode 100644 index 0000000..d35992b --- /dev/null +++ b/src/math/log_stub.go @@ -0,0 +1,13 @@ +// Copyright 2021 The Go Authors. All rights reserved. +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. + +//go:build !amd64 && !s390x + +package math + +const haveArchLog = false + +func archLog(x float64) float64 { + panic("not implemented") +} diff --git a/src/math/logb.go b/src/math/logb.go new file mode 100644 index 0000000..f2769d4 --- /dev/null +++ b/src/math/logb.go @@ -0,0 +1,50 @@ +// Copyright 2010 The Go Authors. All rights reserved. +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. + +package math + +// Logb returns the binary exponent of x. +// +// Special cases are: +// Logb(±Inf) = +Inf +// Logb(0) = -Inf +// Logb(NaN) = NaN +func Logb(x float64) float64 { + // special cases + switch { + case x == 0: + return Inf(-1) + case IsInf(x, 0): + return Inf(1) + case IsNaN(x): + return x + } + return float64(ilogb(x)) +} + +// Ilogb returns the binary exponent of x as an integer. +// +// Special cases are: +// Ilogb(±Inf) = MaxInt32 +// Ilogb(0) = MinInt32 +// Ilogb(NaN) = MaxInt32 +func Ilogb(x float64) int { + // special cases + switch { + case x == 0: + return MinInt32 + case IsNaN(x): + return MaxInt32 + case IsInf(x, 0): + return MaxInt32 + } + return ilogb(x) +} + +// logb returns the binary exponent of x. It assumes x is finite and +// non-zero. +func ilogb(x float64) int { + x, exp := normalize(x) + return int((Float64bits(x)>>shift)&mask) - bias + exp +} diff --git a/src/math/mod.go b/src/math/mod.go new file mode 100644 index 0000000..6bc5f28 --- /dev/null +++ b/src/math/mod.go @@ -0,0 +1,51 @@ +// Copyright 2009-2010 The Go Authors. All rights reserved. +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. + +package math + +/* + Floating-point mod function. +*/ + +// Mod returns the floating-point remainder of x/y. +// The magnitude of the result is less than y and its +// sign agrees with that of x. +// +// Special cases are: +// Mod(±Inf, y) = NaN +// Mod(NaN, y) = NaN +// Mod(x, 0) = NaN +// Mod(x, ±Inf) = x +// Mod(x, NaN) = NaN +func Mod(x, y float64) float64 { + if haveArchMod { + return archMod(x, y) + } + return mod(x, y) +} + +func mod(x, y float64) float64 { + if y == 0 || IsInf(x, 0) || IsNaN(x) || IsNaN(y) { + return NaN() + } + y = Abs(y) + + yfr, yexp := Frexp(y) + r := x + if x < 0 { + r = -x + } + + for r >= y { + rfr, rexp := Frexp(r) + if rfr < yfr { + rexp = rexp - 1 + } + r = r - Ldexp(y, rexp-yexp) + } + if x < 0 { + r = -r + } + return r +} diff --git a/src/math/modf.go b/src/math/modf.go new file mode 100644 index 0000000..bf08dc6 --- /dev/null +++ b/src/math/modf.go @@ -0,0 +1,42 @@ +// Copyright 2009 The Go Authors. All rights reserved. +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. + +package math + +// Modf returns integer and fractional floating-point numbers +// that sum to f. Both values have the same sign as f. +// +// Special cases are: +// Modf(±Inf) = ±Inf, NaN +// Modf(NaN) = NaN, NaN +func Modf(f float64) (int float64, frac float64) { + if haveArchModf { + return archModf(f) + } + return modf(f) +} + +func modf(f float64) (int float64, frac float64) { + if f < 1 { + switch { + case f < 0: + int, frac = Modf(-f) + return -int, -frac + case f == 0: + return f, f // Return -0, -0 when f == -0 + } + return 0, f + } + + x := Float64bits(f) + e := uint(x>>shift)&mask - bias + + // Keep the top 12+e bits, the integer part; clear the rest. + if e < 64-12 { + x &^= 1<<(64-12-e) - 1 + } + int = Float64frombits(x) + frac = f - int + return +} diff --git a/src/math/modf_arm64.s b/src/math/modf_arm64.s new file mode 100644 index 0000000..1e4a329 --- /dev/null +++ b/src/math/modf_arm64.s @@ -0,0 +1,18 @@ +// Copyright 2016 The Go Authors. All rights reserved. +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. + +#include "textflag.h" + +// func archModf(f float64) (int float64, frac float64) +TEXT ·archModf(SB),NOSPLIT,$0 + MOVD f+0(FP), R0 + FMOVD R0, F0 + FRINTZD F0, F1 + FMOVD F1, int+8(FP) + FSUBD F1, F0 + FMOVD F0, R1 + AND $(1<<63), R0 + ORR R0, R1 // must have same sign + MOVD R1, frac+16(FP) + RET diff --git a/src/math/modf_asm.go b/src/math/modf_asm.go new file mode 100644 index 0000000..c63be6c --- /dev/null +++ b/src/math/modf_asm.go @@ -0,0 +1,11 @@ +// Copyright 2021 The Go Authors. All rights reserved. +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. + +//go:build arm64 || ppc64 || ppc64le + +package math + +const haveArchModf = true + +func archModf(f float64) (int float64, frac float64) diff --git a/src/math/modf_noasm.go b/src/math/modf_noasm.go new file mode 100644 index 0000000..55c6a7f --- /dev/null +++ b/src/math/modf_noasm.go @@ -0,0 +1,13 @@ +// Copyright 2021 The Go Authors. All rights reserved. +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. + +//go:build !arm64 && !ppc64 && !ppc64le + +package math + +const haveArchModf = false + +func archModf(f float64) (int float64, frac float64) { + panic("not implemented") +} diff --git a/src/math/modf_ppc64x.s b/src/math/modf_ppc64x.s new file mode 100644 index 0000000..1303067 --- /dev/null +++ b/src/math/modf_ppc64x.s @@ -0,0 +1,18 @@ +// Copyright 2017 The Go Authors. All rights reserved. +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. + +//go:build ppc64 || ppc64le +// +build ppc64 ppc64le + +#include "textflag.h" + +// func archModf(f float64) (int float64, frac float64) +TEXT ·archModf(SB),NOSPLIT,$0 + FMOVD f+0(FP), F0 + FRIZ F0, F1 + FMOVD F1, int+8(FP) + FSUB F1, F0, F2 + FCPSGN F2, F0, F2 + FMOVD F2, frac+16(FP) + RET diff --git a/src/math/nextafter.go b/src/math/nextafter.go new file mode 100644 index 0000000..9088e4d --- /dev/null +++ b/src/math/nextafter.go @@ -0,0 +1,49 @@ +// Copyright 2010 The Go Authors. All rights reserved. +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. + +package math + +// Nextafter32 returns the next representable float32 value after x towards y. +// +// Special cases are: +// Nextafter32(x, x) = x +// Nextafter32(NaN, y) = NaN +// Nextafter32(x, NaN) = NaN +func Nextafter32(x, y float32) (r float32) { + switch { + case IsNaN(float64(x)) || IsNaN(float64(y)): // special case + r = float32(NaN()) + case x == y: + r = x + case x == 0: + r = float32(Copysign(float64(Float32frombits(1)), float64(y))) + case (y > x) == (x > 0): + r = Float32frombits(Float32bits(x) + 1) + default: + r = Float32frombits(Float32bits(x) - 1) + } + return +} + +// Nextafter returns the next representable float64 value after x towards y. +// +// Special cases are: +// Nextafter(x, x) = x +// Nextafter(NaN, y) = NaN +// Nextafter(x, NaN) = NaN +func Nextafter(x, y float64) (r float64) { + switch { + case IsNaN(x) || IsNaN(y): // special case + r = NaN() + case x == y: + r = x + case x == 0: + r = Copysign(Float64frombits(1), y) + case (y > x) == (x > 0): + r = Float64frombits(Float64bits(x) + 1) + default: + r = Float64frombits(Float64bits(x) - 1) + } + return +} diff --git a/src/math/pow.go b/src/math/pow.go new file mode 100644 index 0000000..e45a044 --- /dev/null +++ b/src/math/pow.go @@ -0,0 +1,156 @@ +// Copyright 2009 The Go Authors. All rights reserved. +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. + +package math + +func isOddInt(x float64) bool { + xi, xf := Modf(x) + return xf == 0 && int64(xi)&1 == 1 +} + +// Special cases taken from FreeBSD's /usr/src/lib/msun/src/e_pow.c +// updated by IEEE Std. 754-2008 "Section 9.2.1 Special values". + +// Pow returns x**y, the base-x exponential of y. +// +// Special cases are (in order): +// Pow(x, ±0) = 1 for any x +// Pow(1, y) = 1 for any y +// Pow(x, 1) = x for any x +// Pow(NaN, y) = NaN +// Pow(x, NaN) = NaN +// Pow(±0, y) = ±Inf for y an odd integer < 0 +// Pow(±0, -Inf) = +Inf +// Pow(±0, +Inf) = +0 +// Pow(±0, y) = +Inf for finite y < 0 and not an odd integer +// Pow(±0, y) = ±0 for y an odd integer > 0 +// Pow(±0, y) = +0 for finite y > 0 and not an odd integer +// Pow(-1, ±Inf) = 1 +// Pow(x, +Inf) = +Inf for |x| > 1 +// Pow(x, -Inf) = +0 for |x| > 1 +// Pow(x, +Inf) = +0 for |x| < 1 +// Pow(x, -Inf) = +Inf for |x| < 1 +// Pow(+Inf, y) = +Inf for y > 0 +// Pow(+Inf, y) = +0 for y < 0 +// Pow(-Inf, y) = Pow(-0, -y) +// Pow(x, y) = NaN for finite x < 0 and finite non-integer y +func Pow(x, y float64) float64 { + if haveArchPow { + return archPow(x, y) + } + return pow(x, y) +} + +func pow(x, y float64) float64 { + switch { + case y == 0 || x == 1: + return 1 + case y == 1: + return x + case IsNaN(x) || IsNaN(y): + return NaN() + case x == 0: + switch { + case y < 0: + if isOddInt(y) { + return Copysign(Inf(1), x) + } + return Inf(1) + case y > 0: + if isOddInt(y) { + return x + } + return 0 + } + case IsInf(y, 0): + switch { + case x == -1: + return 1 + case (Abs(x) < 1) == IsInf(y, 1): + return 0 + default: + return Inf(1) + } + case IsInf(x, 0): + if IsInf(x, -1) { + return Pow(1/x, -y) // Pow(-0, -y) + } + switch { + case y < 0: + return 0 + case y > 0: + return Inf(1) + } + case y == 0.5: + return Sqrt(x) + case y == -0.5: + return 1 / Sqrt(x) + } + + yi, yf := Modf(Abs(y)) + if yf != 0 && x < 0 { + return NaN() + } + if yi >= 1<<63 { + // yi is a large even int that will lead to overflow (or underflow to 0) + // for all x except -1 (x == 1 was handled earlier) + switch { + case x == -1: + return 1 + case (Abs(x) < 1) == (y > 0): + return 0 + default: + return Inf(1) + } + } + + // ans = a1 * 2**ae (= 1 for now). + a1 := 1.0 + ae := 0 + + // ans *= x**yf + if yf != 0 { + if yf > 0.5 { + yf-- + yi++ + } + a1 = Exp(yf * Log(x)) + } + + // ans *= x**yi + // by multiplying in successive squarings + // of x according to bits of yi. + // accumulate powers of two into exp. + x1, xe := Frexp(x) + for i := int64(yi); i != 0; i >>= 1 { + if xe < -1<<12 || 1<<12 < xe { + // catch xe before it overflows the left shift below + // Since i !=0 it has at least one bit still set, so ae will accumulate xe + // on at least one more iteration, ae += xe is a lower bound on ae + // the lower bound on ae exceeds the size of a float64 exp + // so the final call to Ldexp will produce under/overflow (0/Inf) + ae += xe + break + } + if i&1 == 1 { + a1 *= x1 + ae += xe + } + x1 *= x1 + xe <<= 1 + if x1 < .5 { + x1 += x1 + xe-- + } + } + + // ans = a1*2**ae + // if y < 0 { ans = 1 / ans } + // but in the opposite order + if y < 0 { + a1 = 1 / a1 + ae = -ae + } + return Ldexp(a1, ae) +} diff --git a/src/math/pow10.go b/src/math/pow10.go new file mode 100644 index 0000000..1234e20 --- /dev/null +++ b/src/math/pow10.go @@ -0,0 +1,46 @@ +// Copyright 2009 The Go Authors. All rights reserved. +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. + +package math + +// pow10tab stores the pre-computed values 10**i for i < 32. +var pow10tab = [...]float64{ + 1e00, 1e01, 1e02, 1e03, 1e04, 1e05, 1e06, 1e07, 1e08, 1e09, + 1e10, 1e11, 1e12, 1e13, 1e14, 1e15, 1e16, 1e17, 1e18, 1e19, + 1e20, 1e21, 1e22, 1e23, 1e24, 1e25, 1e26, 1e27, 1e28, 1e29, + 1e30, 1e31, +} + +// pow10postab32 stores the pre-computed value for 10**(i*32) at index i. +var pow10postab32 = [...]float64{ + 1e00, 1e32, 1e64, 1e96, 1e128, 1e160, 1e192, 1e224, 1e256, 1e288, +} + +// pow10negtab32 stores the pre-computed value for 10**(-i*32) at index i. +var pow10negtab32 = [...]float64{ + 1e-00, 1e-32, 1e-64, 1e-96, 1e-128, 1e-160, 1e-192, 1e-224, 1e-256, 1e-288, 1e-320, +} + +// Pow10 returns 10**n, the base-10 exponential of n. +// +// Special cases are: +// Pow10(n) = 0 for n < -323 +// Pow10(n) = +Inf for n > 308 +func Pow10(n int) float64 { + if 0 <= n && n <= 308 { + return pow10postab32[uint(n)/32] * pow10tab[uint(n)%32] + } + + if -323 <= n && n <= 0 { + return pow10negtab32[uint(-n)/32] / pow10tab[uint(-n)%32] + } + + // n < -323 || 308 < n + if n > 0 { + return Inf(1) + } + + // n < -323 + return 0 +} diff --git a/src/math/pow_s390x.s b/src/math/pow_s390x.s new file mode 100644 index 0000000..9a0fff3 --- /dev/null +++ b/src/math/pow_s390x.s @@ -0,0 +1,634 @@ +// Copyright 2017 The Go Authors. All rights reserved. +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. + +#include "textflag.h" + +#define PosInf 0x7FF0000000000000 +#define NaN 0x7FF8000000000001 +#define NegInf 0xFFF0000000000000 +#define PosOne 0x3FF0000000000000 +#define NegOne 0xBFF0000000000000 +#define NegZero 0x8000000000000000 + +// Minimax polynomial approximation +DATA ·powrodataL51<> + 0(SB)/8, $-1.0 +DATA ·powrodataL51<> + 8(SB)/8, $1.0 +DATA ·powrodataL51<> + 16(SB)/8, $0.24022650695910110361E+00 +DATA ·powrodataL51<> + 24(SB)/8, $0.69314718055994686185E+00 +DATA ·powrodataL51<> + 32(SB)/8, $0.96181291057109484809E-02 +DATA ·powrodataL51<> + 40(SB)/8, $0.15403814778342868389E-03 +DATA ·powrodataL51<> + 48(SB)/8, $0.55504108652095235601E-01 +DATA ·powrodataL51<> + 56(SB)/8, $0.13333818813168698658E-02 +DATA ·powrodataL51<> + 64(SB)/8, $0.68205322933914439200E-12 +DATA ·powrodataL51<> + 72(SB)/8, $-.18466496523378731640E-01 +DATA ·powrodataL51<> + 80(SB)/8, $0.19697596291603973706E-02 +DATA ·powrodataL51<> + 88(SB)/8, $0.23083120654155209200E+00 +DATA ·powrodataL51<> + 96(SB)/8, $0.55324356012093416771E-06 +DATA ·powrodataL51<> + 104(SB)/8, $-.40340677224649339048E-05 +DATA ·powrodataL51<> + 112(SB)/8, $0.30255507904062541562E-04 +DATA ·powrodataL51<> + 120(SB)/8, $-.77453979912413008787E-07 +DATA ·powrodataL51<> + 128(SB)/8, $-.23637115549923464737E-03 +DATA ·powrodataL51<> + 136(SB)/8, $0.11016119077267717198E-07 +DATA ·powrodataL51<> + 144(SB)/8, $0.22608272174486123035E-09 +DATA ·powrodataL51<> + 152(SB)/8, $-.15895808101370190382E-08 +DATA ·powrodataL51<> + 160(SB)/8, $0x4540190000000000 +GLOBL ·powrodataL51<> + 0(SB), RODATA, $168 + +// Constants +DATA ·pow_x001a<> + 0(SB)/8, $0x1a000000000000 +GLOBL ·pow_x001a<> + 0(SB), RODATA, $8 +DATA ·pow_xinf<> + 0(SB)/8, $0x7ff0000000000000 //+Inf +GLOBL ·pow_xinf<> + 0(SB), RODATA, $8 +DATA ·pow_xnan<> + 0(SB)/8, $0x7ff8000000000000 //NaN +GLOBL ·pow_xnan<> + 0(SB), RODATA, $8 +DATA ·pow_x434<> + 0(SB)/8, $0x4340000000000000 +GLOBL ·pow_x434<> + 0(SB), RODATA, $8 +DATA ·pow_x433<> + 0(SB)/8, $0x4330000000000000 +GLOBL ·pow_x433<> + 0(SB), RODATA, $8 +DATA ·pow_x43f<> + 0(SB)/8, $0x43f0000000000000 +GLOBL ·pow_x43f<> + 0(SB), RODATA, $8 +DATA ·pow_xadd<> + 0(SB)/8, $0xc2f0000100003fef +GLOBL ·pow_xadd<> + 0(SB), RODATA, $8 +DATA ·pow_xa<> + 0(SB)/8, $0x4019000000000000 +GLOBL ·pow_xa<> + 0(SB), RODATA, $8 + +// Scale correction tables +DATA powiadd<> + 0(SB)/8, $0xf000000000000000 +DATA powiadd<> + 8(SB)/8, $0x1000000000000000 +GLOBL powiadd<> + 0(SB), RODATA, $16 +DATA powxscale<> + 0(SB)/8, $0x4ff0000000000000 +DATA powxscale<> + 8(SB)/8, $0x2ff0000000000000 +GLOBL powxscale<> + 0(SB), RODATA, $16 + +// Fractional powers of 2 table +DATA ·powtexp<> + 0(SB)/8, $0.442737824274138381E-01 +DATA ·powtexp<> + 8(SB)/8, $0.263602189790660309E-01 +DATA ·powtexp<> + 16(SB)/8, $0.122565642281703586E-01 +DATA ·powtexp<> + 24(SB)/8, $0.143757052860721398E-02 +DATA ·powtexp<> + 32(SB)/8, $-.651375034121276075E-02 +DATA ·powtexp<> + 40(SB)/8, $-.119317678849450159E-01 +DATA ·powtexp<> + 48(SB)/8, $-.150868749549871069E-01 +DATA ·powtexp<> + 56(SB)/8, $-.161992609578469234E-01 +DATA ·powtexp<> + 64(SB)/8, $-.154492360403337917E-01 +DATA ·powtexp<> + 72(SB)/8, $-.129850717389178721E-01 +DATA ·powtexp<> + 80(SB)/8, $-.892902649276657891E-02 +DATA ·powtexp<> + 88(SB)/8, $-.338202636596794887E-02 +DATA ·powtexp<> + 96(SB)/8, $0.357266307045684762E-02 +DATA ·powtexp<> + 104(SB)/8, $0.118665304327406698E-01 +DATA ·powtexp<> + 112(SB)/8, $0.214434994118118914E-01 +DATA ·powtexp<> + 120(SB)/8, $0.322580645161290314E-01 +GLOBL ·powtexp<> + 0(SB), RODATA, $128 + +// Log multiplier tables +DATA ·powtl<> + 0(SB)/8, $0xbdf9723a80db6a05 +DATA ·powtl<> + 8(SB)/8, $0x3e0cfe4a0babe862 +DATA ·powtl<> + 16(SB)/8, $0xbe163b42dd33dada +DATA ·powtl<> + 24(SB)/8, $0xbe0cdf9de2a8429c +DATA ·powtl<> + 32(SB)/8, $0xbde9723a80db6a05 +DATA ·powtl<> + 40(SB)/8, $0xbdb37fcae081745e +DATA ·powtl<> + 48(SB)/8, $0xbdd8b2f901ac662c +DATA ·powtl<> + 56(SB)/8, $0xbde867dc68c36cc9 +DATA ·powtl<> + 64(SB)/8, $0xbdd23e36b47256b7 +DATA ·powtl<> + 72(SB)/8, $0xbde4c9b89fcc7933 +DATA ·powtl<> + 80(SB)/8, $0xbdd16905cad7cf66 +DATA ·powtl<> + 88(SB)/8, $0x3ddb417414aa5529 +DATA ·powtl<> + 96(SB)/8, $0xbdce046f2889983c +DATA ·powtl<> + 104(SB)/8, $0x3dc2c3865d072897 +DATA ·powtl<> + 112(SB)/8, $0x8000000000000000 +DATA ·powtl<> + 120(SB)/8, $0x3dc1ca48817f8afe +DATA ·powtl<> + 128(SB)/8, $0xbdd703518a88bfb7 +DATA ·powtl<> + 136(SB)/8, $0x3dc64afcc46942ce +DATA ·powtl<> + 144(SB)/8, $0xbd9d79191389891a +DATA ·powtl<> + 152(SB)/8, $0x3ddd563044da4fa0 +DATA ·powtl<> + 160(SB)/8, $0x3e0f42b5e5f8f4b6 +DATA ·powtl<> + 168(SB)/8, $0x3e0dfa2c2cbf6ead +DATA ·powtl<> + 176(SB)/8, $0x3e14e25e91661293 +DATA ·powtl<> + 184(SB)/8, $0x3e0aac461509e20c +GLOBL ·powtl<> + 0(SB), RODATA, $192 + +DATA ·powtm<> + 0(SB)/8, $0x3da69e13 +DATA ·powtm<> + 8(SB)/8, $0x100003d66fcb6 +DATA ·powtm<> + 16(SB)/8, $0x200003d1538df +DATA ·powtm<> + 24(SB)/8, $0x300003cab729e +DATA ·powtm<> + 32(SB)/8, $0x400003c1a784c +DATA ·powtm<> + 40(SB)/8, $0x500003ac9b074 +DATA ·powtm<> + 48(SB)/8, $0x60000bb498d22 +DATA ·powtm<> + 56(SB)/8, $0x68000bb8b29a2 +DATA ·powtm<> + 64(SB)/8, $0x70000bb9a32d4 +DATA ·powtm<> + 72(SB)/8, $0x74000bb9946bb +DATA ·powtm<> + 80(SB)/8, $0x78000bb92e34b +DATA ·powtm<> + 88(SB)/8, $0x80000bb6c57dc +DATA ·powtm<> + 96(SB)/8, $0x84000bb4020f7 +DATA ·powtm<> + 104(SB)/8, $0x8c000ba93832d +DATA ·powtm<> + 112(SB)/8, $0x9000080000000 +DATA ·powtm<> + 120(SB)/8, $0x940003aa66c4c +DATA ·powtm<> + 128(SB)/8, $0x980003b2fb12a +DATA ·powtm<> + 136(SB)/8, $0xa00003bc1def6 +DATA ·powtm<> + 144(SB)/8, $0xa80003c1eb0eb +DATA ·powtm<> + 152(SB)/8, $0xb00003c64dcec +DATA ·powtm<> + 160(SB)/8, $0xc00003cc49e4e +DATA ·powtm<> + 168(SB)/8, $0xd00003d12f1de +DATA ·powtm<> + 176(SB)/8, $0xe00003d4a9c6f +DATA ·powtm<> + 184(SB)/8, $0xf00003d846c66 +GLOBL ·powtm<> + 0(SB), RODATA, $192 + +// Table of indeces into multiplier tables +// Adjusted from asm to remove offset and convert +DATA ·powtabi<> + 0(SB)/8, $0x1010101 +DATA ·powtabi<> + 8(SB)/8, $0x101020202020203 +DATA ·powtabi<> + 16(SB)/8, $0x303030404040405 +DATA ·powtabi<> + 24(SB)/8, $0x505050606060708 +DATA ·powtabi<> + 32(SB)/8, $0x90a0b0c0d0e0f10 +DATA ·powtabi<> + 40(SB)/8, $0x1011111212121313 +DATA ·powtabi<> + 48(SB)/8, $0x1314141414151515 +DATA ·powtabi<> + 56(SB)/8, $0x1516161617171717 +GLOBL ·powtabi<> + 0(SB), RODATA, $64 + +// Pow returns x**y, the base-x exponential of y. +// +// Special cases are (in order): +// Pow(x, ±0) = 1 for any x +// Pow(1, y) = 1 for any y +// Pow(x, 1) = x for any x +// Pow(NaN, y) = NaN +// Pow(x, NaN) = NaN +// Pow(±0, y) = ±Inf for y an odd integer < 0 +// Pow(±0, -Inf) = +Inf +// Pow(±0, +Inf) = +0 +// Pow(±0, y) = +Inf for finite y < 0 and not an odd integer +// Pow(±0, y) = ±0 for y an odd integer > 0 +// Pow(±0, y) = +0 for finite y > 0 and not an odd integer +// Pow(-1, ±Inf) = 1 +// Pow(x, +Inf) = +Inf for |x| > 1 +// Pow(x, -Inf) = +0 for |x| > 1 +// Pow(x, +Inf) = +0 for |x| < 1 +// Pow(x, -Inf) = +Inf for |x| < 1 +// Pow(+Inf, y) = +Inf for y > 0 +// Pow(+Inf, y) = +0 for y < 0 +// Pow(-Inf, y) = Pow(-0, -y) +// Pow(x, y) = NaN for finite x < 0 and finite non-integer y + +TEXT ·powAsm(SB), NOSPLIT, $0-24 + // special case + MOVD x+0(FP), R1 + MOVD y+8(FP), R2 + + // special case Pow(1, y) = 1 for any y + MOVD $PosOne, R3 + CMPUBEQ R1, R3, xIsOne + + // special case Pow(x, 1) = x for any x + MOVD $PosOne, R4 + CMPUBEQ R2, R4, yIsOne + + // special case Pow(x, NaN) = NaN for any x + MOVD $~(1<<63), R5 + AND R2, R5 // y = |y| + MOVD $PosInf, R4 + CMPUBLT R4, R5, yIsNan + + MOVD $NegInf, R3 + CMPUBEQ R1, R3, xIsNegInf + + MOVD $NegOne, R3 + CMPUBEQ R1, R3, xIsNegOne + + MOVD $PosInf, R3 + CMPUBEQ R1, R3, xIsPosInf + + MOVD $NegZero, R3 + CMPUBEQ R1, R3, xIsNegZero + + MOVD $PosInf, R4 + CMPUBEQ R2, R4, yIsPosInf + + MOVD $0x0, R3 + CMPUBEQ R1, R3, xIsPosZero + CMPBLT R1, R3, xLtZero + BR Normal +xIsPosInf: + // special case Pow(+Inf, y) = +Inf for y > 0 + MOVD $0x0, R4 + CMPBGT R2, R4, posInfGeZero + BR Normal +xIsNegInf: + //Pow(-Inf, y) = Pow(-0, -y) + FMOVD y+8(FP), F2 + FNEG F2, F2 // y = -y + BR negZeroNegY // call Pow(-0, -y) +xIsNegOne: + // special case Pow(-1, ±Inf) = 1 + MOVD $PosInf, R4 + CMPUBEQ R2, R4, negOnePosInf + MOVD $NegInf, R4 + CMPUBEQ R2, R4, negOneNegInf + BR Normal +xIsPosZero: + // special case Pow(+0, -Inf) = +Inf + MOVD $NegInf, R4 + CMPUBEQ R2, R4, zeroNegInf + + // special case Pow(+0, y < 0) = +Inf + FMOVD y+8(FP), F2 + FMOVD $(0.0), F4 + FCMPU F2, F4 + BLT posZeroLtZero //y < 0.0 + BR Normal +xIsNegZero: + // special case Pow(-0, -Inf) = +Inf + MOVD $NegInf, R4 + CMPUBEQ R2, R4, zeroNegInf + FMOVD y+8(FP), F2 +negZeroNegY: + // special case Pow(x, ±0) = 1 for any x + FMOVD $(0.0), F4 + FCMPU F4, F2 + BLT negZeroGtZero // y > 0.0 + BEQ yIsZero // y = 0.0 + + FMOVD $(-0.0), F4 + FCMPU F4, F2 + BLT negZeroGtZero // y > -0.0 + BEQ yIsZero // y = -0.0 + + // special case Pow(-0, y) = -Inf for y an odd integer < 0 + // special case Pow(-0, y) = +Inf for finite y < 0 and not an odd integer + FIDBR $5, F2, F4 //F2 translate to integer F4 + FCMPU F2, F4 + BNE zeroNotOdd // y is not an (odd) integer and y < 0 + FMOVD $(2.0), F4 + FDIV F4, F2 // F2 = F2 / 2.0 + FIDBR $5, F2, F4 //F2 translate to integer F4 + FCMPU F2, F4 + BNE negZeroOddInt // y is an odd integer and y < 0 + BR zeroNotOdd // y is not an (odd) integer and y < 0 + +negZeroGtZero: + // special case Pow(-0, y) = -0 for y an odd integer > 0 + // special case Pow(±0, y) = +0 for finite y > 0 and not an odd integer + FIDBR $5, F2, F4 //F2 translate to integer F4 + FCMPU F2, F4 + BNE zeroNotOddGtZero // y is not an (odd) integer and y > 0 + FMOVD $(2.0), F4 + FDIV F4, F2 // F2 = F2 / 2.0 + FIDBR $5, F2, F4 //F2 translate to integer F4 + FCMPU F2, F4 + BNE negZeroOddIntGtZero // y is an odd integer and y > 0 + BR zeroNotOddGtZero // y is not an (odd) integer + +xLtZero: + // special case Pow(x, y) = NaN for finite x < 0 and finite non-integer y + FMOVD y+8(FP), F2 + FIDBR $5, F2, F4 + FCMPU F2, F4 + BNE ltZeroInt + BR Normal +yIsPosInf: + // special case Pow(x, +Inf) = +Inf for |x| > 1 + FMOVD x+0(FP), F1 + FMOVD $(1.0), F3 + FCMPU F1, F3 + BGT gtOnePosInf + FMOVD $(-1.0), F3 + FCMPU F1, F3 + BLT ltNegOnePosInf +Normal: + FMOVD x+0(FP), F0 + FMOVD y+8(FP), F2 + MOVD $·powrodataL51<>+0(SB), R9 + LGDR F0, R3 + WORD $0xC0298009 //iilf %r2,2148095317 + BYTE $0x55 + BYTE $0x55 + RISBGNZ $32, $63, $32, R3, R1 + SUBW R1, R2 + RISBGNZ $58, $63, $50, R2, R3 + BYTE $0x18 //lr %r5,%r1 + BYTE $0x51 + MOVD $·powtabi<>+0(SB), R12 + WORD $0xE303C000 //llgc %r0,0(%r3,%r12) + BYTE $0x00 + BYTE $0x90 + SUBW $0x1A0000, R5 + SLD $3, R0, R3 + MOVD $·powtm<>+0(SB), R4 + MOVH $0x0, R8 + ANDW $0x7FF00000, R2 + ORW R5, R1 + WORD $0x5A234000 //a %r2,0(%r3,%r4) + MOVD $0x3FF0000000000000, R5 + RISBGZ $40, $63, $56, R2, R3 + RISBGN $0, $31, $32, R2, R8 + ORW $0x45000000, R3 + MOVW R1, R6 + CMPBLT R6, $0, L42 + FMOVD F0, F4 +L2: + VLVGF $0, R3, V1 + MOVD $·pow_xa<>+0(SB), R2 + WORD $0xED3090A0 //lde %f3,.L52-.L51(%r9) + BYTE $0x00 + BYTE $0x24 + FMOVD 0(R2), F6 + FSUBS F1, F3 + LDGR R8, F1 + WFMSDB V4, V1, V6, V4 + FMOVD 152(R9), F6 + WFMDB V4, V4, V7 + FMOVD 144(R9), F1 + FMOVD 136(R9), F5 + WFMADB V4, V1, V6, V1 + VLEG $0, 128(R9), V16 + FMOVD 120(R9), F6 + WFMADB V4, V5, V6, V5 + FMOVD 112(R9), F6 + WFMADB V1, V7, V5, V1 + WFMADB V4, V6, V16, V16 + SLD $3, R0, R2 + FMOVD 104(R9), F5 + WORD $0xED824004 //ldeb %f8,4(%r2,%r4) + BYTE $0x00 + BYTE $0x04 + LDEBR F3, F3 + FMOVD 96(R9), F6 + WFMADB V4, V6, V5, V6 + FADD F8, F3 + WFMADB V7, V6, V16, V6 + FMUL F7, F7 + FMOVD 88(R9), F5 + FMADD F7, F1, F6 + WFMADB V4, V5, V3, V16 + FMOVD 80(R9), F1 + WFSDB V16, V3, V3 + MOVD $·powtl<>+0(SB), R3 + WFMADB V4, V6, V1, V6 + FMADD F5, F4, F3 + FMOVD 72(R9), F1 + WFMADB V4, V6, V1, V6 + WORD $0xED323000 //adb %f3,0(%r2,%r3) + BYTE $0x00 + BYTE $0x1A + FMOVD 64(R9), F1 + WFMADB V4, V6, V1, V6 + MOVD $·pow_xadd<>+0(SB), R2 + WFMADB V4, V6, V3, V4 + FMOVD 0(R2), F5 + WFADB V4, V16, V3 + VLEG $0, 56(R9), V20 + WFMSDB V2, V3, V5, V3 + VLEG $0, 48(R9), V18 + WFADB V3, V5, V6 + LGDR F3, R2 + WFMSDB V2, V16, V6, V16 + FMOVD 40(R9), F1 + WFMADB V2, V4, V16, V4 + FMOVD 32(R9), F7 + WFMDB V4, V4, V3 + WFMADB V4, V1, V20, V1 + WFMADB V4, V7, V18, V7 + VLEG $0, 24(R9), V16 + WFMADB V1, V3, V7, V1 + FMOVD 16(R9), F5 + WFMADB V4, V5, V16, V5 + RISBGZ $57, $60, $3, R2, R4 + WFMADB V3, V1, V5, V1 + MOVD $·powtexp<>+0(SB), R3 + WORD $0x68343000 //ld %f3,0(%r4,%r3) + FMADD F3, F4, F4 + RISBGN $0, $15, $48, R2, R5 + WFMADB V4, V1, V3, V4 + LGDR F6, R2 + LDGR R5, F1 + SRAD $48, R2, R2 + FMADD F1, F4, F1 + RLL $16, R2, R2 + ANDW $0x7FFF0000, R2 + WORD $0xC22B3F71 //alfi %r2,1064370176 + BYTE $0x00 + BYTE $0x00 + ORW R2, R1, R3 + MOVW R3, R6 + CMPBLT R6, $0, L43 +L1: + FMOVD F1, ret+16(FP) + RET +L43: + LTDBR F0, F0 + BLTU L44 + FMOVD F0, F3 +L7: + MOVD $·pow_xinf<>+0(SB), R3 + FMOVD 0(R3), F5 + WFCEDBS V3, V5, V7 + BVS L8 + WFMDB V3, V2, V6 +L8: + WFCEDBS V2, V2, V3 + BVS L9 + LTDBR F2, F2 + BEQ L26 + MOVW R1, R6 + CMPBLT R6, $0, L45 +L11: + WORD $0xC0190003 //iilf %r1,262143 + BYTE $0xFF + BYTE $0xFF + MOVW R2, R7 + MOVW R1, R6 + CMPBLE R7, R6, L34 + RISBGNZ $32, $63, $32, R5, R1 + LGDR F6, R2 + MOVD $powiadd<>+0(SB), R3 + RISBGZ $60, $60, $4, R2, R2 + WORD $0x5A123000 //a %r1,0(%r2,%r3) + RISBGN $0, $31, $32, R1, R5 + LDGR R5, F1 + FMADD F1, F4, F1 + MOVD $powxscale<>+0(SB), R1 + WORD $0xED121000 //mdb %f1,0(%r2,%r1) + BYTE $0x00 + BYTE $0x1C + BR L1 +L42: + LTDBR F0, F0 + BLTU L46 + FMOVD F0, F4 +L3: + MOVD $·pow_x001a<>+0(SB), R2 + WORD $0xED402000 //cdb %f4,0(%r2) + BYTE $0x00 + BYTE $0x19 + BGE L2 + BVS L2 + MOVD $·pow_x43f<>+0(SB), R2 + WORD $0xED402000 //mdb %f4,0(%r2) + BYTE $0x00 + BYTE $0x1C + WORD $0xC0298009 //iilf %r2,2148095317 + BYTE $0x55 + BYTE $0x55 + LGDR F4, R3 + RISBGNZ $32, $63, $32, R3, R3 + SUBW R3, R2, R3 + RISBGZ $33, $43, $0, R3, R2 + RISBGNZ $58, $63, $50, R3, R3 + WORD $0xE303C000 //llgc %r0,0(%r3,%r12) + BYTE $0x00 + BYTE $0x90 + SLD $3, R0, R3 + WORD $0x5A234000 //a %r2,0(%r3,%r4) + BYTE $0x18 //lr %r3,%r2 + BYTE $0x32 + RISBGN $0, $31, $32, R3, R8 + ADDW $0x4000000, R3 + BLEU L5 + RISBGZ $40, $63, $56, R3, R3 + ORW $0x45000000, R3 + BR L2 +L9: + WFCEDBS V0, V0, V4 + BVS L35 + FMOVD F2, F1 + BR L1 +L46: + WORD $0xB3130040 //lcdbr %f4,%f0 + BR L3 +L44: + WORD $0xB3130030 //lcdbr %f3,%f0 + BR L7 +L35: + FMOVD F0, F1 + BR L1 +L26: + FMOVD 8(R9), F1 + BR L1 +L34: + FMOVD 8(R9), F4 +L19: + LTDBR F6, F6 + BLEU L47 +L18: + WFMDB V4, V5, V1 + BR L1 +L5: + RISBGZ $33, $50, $63, R3, R3 + WORD $0xC23B4000 //alfi %r3,1073741824 + BYTE $0x00 + BYTE $0x00 + RLL $24, R3, R3 + ORW $0x45000000, R3 + BR L2 +L45: + WFCEDBS V0, V0, V4 + BVS L35 + LTDBR F0, F0 + BLEU L48 + FMOVD 8(R9), F4 +L12: + MOVW R2, R6 + CMPBLT R6, $0, L19 + FMUL F4, F1 + BR L1 +L47: + BLT L40 + WFCEDBS V0, V0, V2 + BVS L49 +L16: + MOVD ·pow_xnan<>+0(SB), R1 + LDGR R1, F0 + WFMDB V4, V0, V1 + BR L1 +L48: + LGDR F0, R3 + RISBGNZ $32, $63, $32, R3, R1 + MOVW R1, R6 + CMPBEQ R6, $0, L29 + LTDBR F2, F2 + BLTU L50 + FMOVD F2, F4 +L14: + MOVD $·pow_x433<>+0(SB), R1 + FMOVD 0(R1), F7 + WFCHDBS V4, V7, V3 + BEQ L15 + WFADB V7, V4, V3 + FSUB F7, F3 + WFCEDBS V4, V3, V3 + BEQ L15 + LTDBR F0, F0 + FMOVD 8(R9), F4 + BNE L16 +L13: + LTDBR F2, F2 + BLT L18 +L40: + FMOVD $0, F0 + WFMDB V4, V0, V1 + BR L1 +L49: + WFMDB V0, V4, V1 + BR L1 +L29: + FMOVD 8(R9), F4 + BR L13 +L15: + MOVD $·pow_x434<>+0(SB), R1 + FMOVD 0(R1), F7 + WFCHDBS V4, V7, V3 + BEQ L32 + WFADB V7, V4, V3 + FSUB F7, F3 + WFCEDBS V4, V3, V4 + BEQ L32 + FMOVD 0(R9), F4 +L17: + LTDBR F0, F0 + BNE L12 + BR L13 +L32: + FMOVD 8(R9), F4 + BR L17 +L50: + WORD $0xB3130042 //lcdbr %f4,%f2 + BR L14 +xIsOne: // Pow(1, y) = 1 for any y +yIsOne: // Pow(x, 1) = x for any x +posInfGeZero: // Pow(+Inf, y) = +Inf for y > 0 + MOVD R1, ret+16(FP) + RET +yIsNan: // Pow(NaN, y) = NaN +ltZeroInt: // Pow(x, y) = NaN for finite x < 0 and finite non-integer y + MOVD $NaN, R2 + MOVD R2, ret+16(FP) + RET +negOnePosInf: // Pow(-1, ±Inf) = 1 +negOneNegInf: + MOVD $PosOne, R3 + MOVD R3, ret+16(FP) + RET +negZeroOddInt: + MOVD $NegInf, R3 + MOVD R3, ret+16(FP) + RET +zeroNotOdd: // Pow(±0, y) = +Inf for finite y < 0 and not an odd integer +posZeroLtZero: // special case Pow(+0, y < 0) = +Inf +zeroNegInf: // Pow(±0, -Inf) = +Inf + MOVD $PosInf, R3 + MOVD R3, ret+16(FP) + RET +gtOnePosInf: //Pow(x, +Inf) = +Inf for |x| > 1 +ltNegOnePosInf: + MOVD R2, ret+16(FP) + RET +yIsZero: //Pow(x, ±0) = 1 for any x + MOVD $PosOne, R4 + MOVD R4, ret+16(FP) + RET +negZeroOddIntGtZero: // Pow(-0, y) = -0 for y an odd integer > 0 + MOVD $NegZero, R3 + MOVD R3, ret+16(FP) + RET +zeroNotOddGtZero: // Pow(±0, y) = +0 for finite y > 0 and not an odd integer + MOVD $0, ret+16(FP) + RET diff --git a/src/math/rand/example_test.go b/src/math/rand/example_test.go new file mode 100644 index 0000000..f691e39 --- /dev/null +++ b/src/math/rand/example_test.go @@ -0,0 +1,157 @@ +// Copyright 2012 The Go Authors. All rights reserved. +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. + +package rand_test + +import ( + "fmt" + "math/rand" + "os" + "strings" + "text/tabwriter" +) + +// These tests serve as an example but also make sure we don't change +// the output of the random number generator when given a fixed seed. + +func Example() { + // Seeding with the same value results in the same random sequence each run. + // For different numbers, seed with a different value, such as + // time.Now().UnixNano(), which yields a constantly-changing number. + rand.Seed(42) + answers := []string{ + "It is certain", + "It is decidedly so", + "Without a doubt", + "Yes definitely", + "You may rely on it", + "As I see it yes", + "Most likely", + "Outlook good", + "Yes", + "Signs point to yes", + "Reply hazy try again", + "Ask again later", + "Better not tell you now", + "Cannot predict now", + "Concentrate and ask again", + "Don't count on it", + "My reply is no", + "My sources say no", + "Outlook not so good", + "Very doubtful", + } + fmt.Println("Magic 8-Ball says:", answers[rand.Intn(len(answers))]) + // Output: Magic 8-Ball says: As I see it yes +} + +// This example shows the use of each of the methods on a *Rand. +// The use of the global functions is the same, without the receiver. +func Example_rand() { + // Create and seed the generator. + // Typically a non-fixed seed should be used, such as time.Now().UnixNano(). + // Using a fixed seed will produce the same output on every run. + r := rand.New(rand.NewSource(99)) + + // The tabwriter here helps us generate aligned output. + w := tabwriter.NewWriter(os.Stdout, 1, 1, 1, ' ', 0) + defer w.Flush() + show := func(name string, v1, v2, v3 any) { + fmt.Fprintf(w, "%s\t%v\t%v\t%v\n", name, v1, v2, v3) + } + + // Float32 and Float64 values are in [0, 1). + show("Float32", r.Float32(), r.Float32(), r.Float32()) + show("Float64", r.Float64(), r.Float64(), r.Float64()) + + // ExpFloat64 values have an average of 1 but decay exponentially. + show("ExpFloat64", r.ExpFloat64(), r.ExpFloat64(), r.ExpFloat64()) + + // NormFloat64 values have an average of 0 and a standard deviation of 1. + show("NormFloat64", r.NormFloat64(), r.NormFloat64(), r.NormFloat64()) + + // Int31, Int63, and Uint32 generate values of the given width. + // The Int method (not shown) is like either Int31 or Int63 + // depending on the size of 'int'. + show("Int31", r.Int31(), r.Int31(), r.Int31()) + show("Int63", r.Int63(), r.Int63(), r.Int63()) + show("Uint32", r.Uint32(), r.Uint32(), r.Uint32()) + + // Intn, Int31n, and Int63n limit their output to be < n. + // They do so more carefully than using r.Int()%n. + show("Intn(10)", r.Intn(10), r.Intn(10), r.Intn(10)) + show("Int31n(10)", r.Int31n(10), r.Int31n(10), r.Int31n(10)) + show("Int63n(10)", r.Int63n(10), r.Int63n(10), r.Int63n(10)) + + // Perm generates a random permutation of the numbers [0, n). + show("Perm", r.Perm(5), r.Perm(5), r.Perm(5)) + // Output: + // Float32 0.2635776 0.6358173 0.6718283 + // Float64 0.628605430454327 0.4504798828572669 0.9562755949377957 + // ExpFloat64 0.3362240648200941 1.4256072328483647 0.24354758816173044 + // NormFloat64 0.17233959114940064 1.577014951434847 0.04259129641113857 + // Int31 1501292890 1486668269 182840835 + // Int63 3546343826724305832 5724354148158589552 5239846799706671610 + // Uint32 2760229429 296659907 1922395059 + // Intn(10) 1 2 5 + // Int31n(10) 4 7 8 + // Int63n(10) 7 6 3 + // Perm [1 4 2 3 0] [4 2 1 3 0] [1 2 4 0 3] +} + +func ExamplePerm() { + for _, value := range rand.Perm(3) { + fmt.Println(value) + } + + // Unordered output: 1 + // 2 + // 0 +} + +func ExampleShuffle() { + words := strings.Fields("ink runs from the corners of my mouth") + rand.Shuffle(len(words), func(i, j int) { + words[i], words[j] = words[j], words[i] + }) + fmt.Println(words) + + // Output: + // [mouth my the of runs corners from ink] +} + +func ExampleShuffle_slicesInUnison() { + numbers := []byte("12345") + letters := []byte("ABCDE") + // Shuffle numbers, swapping corresponding entries in letters at the same time. + rand.Shuffle(len(numbers), func(i, j int) { + numbers[i], numbers[j] = numbers[j], numbers[i] + letters[i], letters[j] = letters[j], letters[i] + }) + for i := range numbers { + fmt.Printf("%c: %c\n", letters[i], numbers[i]) + } + + // Output: + // C: 3 + // D: 4 + // A: 1 + // E: 5 + // B: 2 +} + +func ExampleIntn() { + // Seeding with the same value results in the same random sequence each run. + // For different numbers, seed with a different value, such as + // time.Now().UnixNano(), which yields a constantly-changing number. + rand.Seed(86) + fmt.Println(rand.Intn(100)) + fmt.Println(rand.Intn(100)) + fmt.Println(rand.Intn(100)) + + // Output: + // 42 + // 76 + // 30 +} diff --git a/src/math/rand/exp.go b/src/math/rand/exp.go new file mode 100644 index 0000000..5a8d946 --- /dev/null +++ b/src/math/rand/exp.go @@ -0,0 +1,222 @@ +// Copyright 2009 The Go Authors. All rights reserved. +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. + +package rand + +import ( + "math" +) + +/* + * Exponential distribution + * + * See "The Ziggurat Method for Generating Random Variables" + * (Marsaglia & Tsang, 2000) + * https://www.jstatsoft.org/v05/i08/paper [pdf] + */ + +const ( + re = 7.69711747013104972 +) + +// ExpFloat64 returns an exponentially distributed float64 in the range +// (0, +math.MaxFloat64] with an exponential distribution whose rate parameter +// (lambda) is 1 and whose mean is 1/lambda (1). +// To produce a distribution with a different rate parameter, +// callers can adjust the output using: +// +// sample = ExpFloat64() / desiredRateParameter +// +func (r *Rand) ExpFloat64() float64 { + for { + j := r.Uint32() + i := j & 0xFF + x := float64(j) * float64(we[i]) + if j < ke[i] { + return x + } + if i == 0 { + return re - math.Log(r.Float64()) + } + if fe[i]+float32(r.Float64())*(fe[i-1]-fe[i]) < float32(math.Exp(-x)) { + return x + } + } +} + +var ke = [256]uint32{ + 0xe290a139, 0x0, 0x9beadebc, 0xc377ac71, 0xd4ddb990, + 0xde893fb8, 0xe4a8e87c, 0xe8dff16a, 0xebf2deab, 0xee49a6e8, + 0xf0204efd, 0xf19bdb8e, 0xf2d458bb, 0xf3da104b, 0xf4b86d78, + 0xf577ad8a, 0xf61de83d, 0xf6afb784, 0xf730a573, 0xf7a37651, + 0xf80a5bb6, 0xf867189d, 0xf8bb1b4f, 0xf9079062, 0xf94d70ca, + 0xf98d8c7d, 0xf9c8928a, 0xf9ff175b, 0xfa319996, 0xfa6085f8, + 0xfa8c3a62, 0xfab5084e, 0xfadb36c8, 0xfaff0410, 0xfb20a6ea, + 0xfb404fb4, 0xfb5e2951, 0xfb7a59e9, 0xfb95038c, 0xfbae44ba, + 0xfbc638d8, 0xfbdcf892, 0xfbf29a30, 0xfc0731df, 0xfc1ad1ed, + 0xfc2d8b02, 0xfc3f6c4d, 0xfc5083ac, 0xfc60ddd1, 0xfc708662, + 0xfc7f8810, 0xfc8decb4, 0xfc9bbd62, 0xfca9027c, 0xfcb5c3c3, + 0xfcc20864, 0xfccdd70a, 0xfcd935e3, 0xfce42ab0, 0xfceebace, + 0xfcf8eb3b, 0xfd02c0a0, 0xfd0c3f59, 0xfd156b7b, 0xfd1e48d6, + 0xfd26daff, 0xfd2f2552, 0xfd372af7, 0xfd3eeee5, 0xfd4673e7, + 0xfd4dbc9e, 0xfd54cb85, 0xfd5ba2f2, 0xfd62451b, 0xfd68b415, + 0xfd6ef1da, 0xfd750047, 0xfd7ae120, 0xfd809612, 0xfd8620b4, + 0xfd8b8285, 0xfd90bcf5, 0xfd95d15e, 0xfd9ac10b, 0xfd9f8d36, + 0xfda43708, 0xfda8bf9e, 0xfdad2806, 0xfdb17141, 0xfdb59c46, + 0xfdb9a9fd, 0xfdbd9b46, 0xfdc170f6, 0xfdc52bd8, 0xfdc8ccac, + 0xfdcc542d, 0xfdcfc30b, 0xfdd319ef, 0xfdd6597a, 0xfdd98245, + 0xfddc94e5, 0xfddf91e6, 0xfde279ce, 0xfde54d1f, 0xfde80c52, + 0xfdeab7de, 0xfded5034, 0xfdefd5be, 0xfdf248e3, 0xfdf4aa06, + 0xfdf6f984, 0xfdf937b6, 0xfdfb64f4, 0xfdfd818d, 0xfdff8dd0, + 0xfe018a08, 0xfe03767a, 0xfe05536c, 0xfe07211c, 0xfe08dfc9, + 0xfe0a8fab, 0xfe0c30fb, 0xfe0dc3ec, 0xfe0f48b1, 0xfe10bf76, + 0xfe122869, 0xfe1383b4, 0xfe14d17c, 0xfe1611e7, 0xfe174516, + 0xfe186b2a, 0xfe19843e, 0xfe1a9070, 0xfe1b8fd6, 0xfe1c8289, + 0xfe1d689b, 0xfe1e4220, 0xfe1f0f26, 0xfe1fcfbc, 0xfe2083ed, + 0xfe212bc3, 0xfe21c745, 0xfe225678, 0xfe22d95f, 0xfe234ffb, + 0xfe23ba4a, 0xfe241849, 0xfe2469f2, 0xfe24af3c, 0xfe24e81e, + 0xfe25148b, 0xfe253474, 0xfe2547c7, 0xfe254e70, 0xfe25485a, + 0xfe25356a, 0xfe251586, 0xfe24e88f, 0xfe24ae64, 0xfe2466e1, + 0xfe2411df, 0xfe23af34, 0xfe233eb4, 0xfe22c02c, 0xfe22336b, + 0xfe219838, 0xfe20ee58, 0xfe20358c, 0xfe1f6d92, 0xfe1e9621, + 0xfe1daef0, 0xfe1cb7ac, 0xfe1bb002, 0xfe1a9798, 0xfe196e0d, + 0xfe1832fd, 0xfe16e5fe, 0xfe15869d, 0xfe141464, 0xfe128ed3, + 0xfe10f565, 0xfe0f478c, 0xfe0d84b1, 0xfe0bac36, 0xfe09bd73, + 0xfe07b7b5, 0xfe059a40, 0xfe03644c, 0xfe011504, 0xfdfeab88, + 0xfdfc26e9, 0xfdf98629, 0xfdf6c83b, 0xfdf3ec01, 0xfdf0f04a, + 0xfdedd3d1, 0xfdea953d, 0xfde7331e, 0xfde3abe9, 0xfddffdfb, + 0xfddc2791, 0xfdd826cd, 0xfdd3f9a8, 0xfdcf9dfc, 0xfdcb1176, + 0xfdc65198, 0xfdc15bb3, 0xfdbc2ce2, 0xfdb6c206, 0xfdb117be, + 0xfdab2a63, 0xfda4f5fd, 0xfd9e7640, 0xfd97a67a, 0xfd908192, + 0xfd8901f2, 0xfd812182, 0xfd78d98e, 0xfd7022bb, 0xfd66f4ed, + 0xfd5d4732, 0xfd530f9c, 0xfd48432b, 0xfd3cd59a, 0xfd30b936, + 0xfd23dea4, 0xfd16349e, 0xfd07a7a3, 0xfcf8219b, 0xfce7895b, + 0xfcd5c220, 0xfcc2aadb, 0xfcae1d5e, 0xfc97ed4e, 0xfc7fe6d4, + 0xfc65ccf3, 0xfc495762, 0xfc2a2fc8, 0xfc07ee19, 0xfbe213c1, + 0xfbb8051a, 0xfb890078, 0xfb5411a5, 0xfb180005, 0xfad33482, + 0xfa839276, 0xfa263b32, 0xf9b72d1c, 0xf930a1a2, 0xf889f023, + 0xf7b577d2, 0xf69c650c, 0xf51530f0, 0xf2cb0e3c, 0xeeefb15d, + 0xe6da6ecf, +} +var we = [256]float32{ + 2.0249555e-09, 1.486674e-11, 2.4409617e-11, 3.1968806e-11, + 3.844677e-11, 4.4228204e-11, 4.9516443e-11, 5.443359e-11, + 5.905944e-11, 6.344942e-11, 6.7643814e-11, 7.1672945e-11, + 7.556032e-11, 7.932458e-11, 8.298079e-11, 8.654132e-11, + 9.0016515e-11, 9.3415074e-11, 9.674443e-11, 1.0001099e-10, + 1.03220314e-10, 1.06377254e-10, 1.09486115e-10, 1.1255068e-10, + 1.1557435e-10, 1.1856015e-10, 1.2151083e-10, 1.2442886e-10, + 1.2731648e-10, 1.3017575e-10, 1.3300853e-10, 1.3581657e-10, + 1.3860142e-10, 1.4136457e-10, 1.4410738e-10, 1.4683108e-10, + 1.4953687e-10, 1.5222583e-10, 1.54899e-10, 1.5755733e-10, + 1.6020171e-10, 1.6283301e-10, 1.6545203e-10, 1.6805951e-10, + 1.7065617e-10, 1.732427e-10, 1.7581973e-10, 1.7838787e-10, + 1.8094774e-10, 1.8349985e-10, 1.8604476e-10, 1.8858298e-10, + 1.9111498e-10, 1.9364126e-10, 1.9616223e-10, 1.9867835e-10, + 2.0119004e-10, 2.0369768e-10, 2.0620168e-10, 2.087024e-10, + 2.1120022e-10, 2.136955e-10, 2.1618855e-10, 2.1867974e-10, + 2.2116936e-10, 2.2365775e-10, 2.261452e-10, 2.2863202e-10, + 2.311185e-10, 2.3360494e-10, 2.360916e-10, 2.3857874e-10, + 2.4106667e-10, 2.4355562e-10, 2.4604588e-10, 2.485377e-10, + 2.5103128e-10, 2.5352695e-10, 2.560249e-10, 2.585254e-10, + 2.6102867e-10, 2.6353494e-10, 2.6604446e-10, 2.6855745e-10, + 2.7107416e-10, 2.7359479e-10, 2.761196e-10, 2.7864877e-10, + 2.8118255e-10, 2.8372119e-10, 2.8626485e-10, 2.888138e-10, + 2.9136826e-10, 2.939284e-10, 2.9649452e-10, 2.9906677e-10, + 3.016454e-10, 3.0423064e-10, 3.0682268e-10, 3.0942177e-10, + 3.1202813e-10, 3.1464195e-10, 3.1726352e-10, 3.19893e-10, + 3.2253064e-10, 3.251767e-10, 3.2783135e-10, 3.3049485e-10, + 3.3316744e-10, 3.3584938e-10, 3.3854083e-10, 3.4124212e-10, + 3.4395342e-10, 3.46675e-10, 3.4940711e-10, 3.5215003e-10, + 3.5490397e-10, 3.5766917e-10, 3.6044595e-10, 3.6323455e-10, + 3.660352e-10, 3.6884823e-10, 3.7167386e-10, 3.745124e-10, + 3.773641e-10, 3.802293e-10, 3.8310827e-10, 3.860013e-10, + 3.8890866e-10, 3.918307e-10, 3.9476775e-10, 3.9772008e-10, + 4.0068804e-10, 4.0367196e-10, 4.0667217e-10, 4.09689e-10, + 4.1272286e-10, 4.1577405e-10, 4.1884296e-10, 4.2192994e-10, + 4.250354e-10, 4.281597e-10, 4.313033e-10, 4.3446652e-10, + 4.3764986e-10, 4.408537e-10, 4.4407847e-10, 4.4732465e-10, + 4.5059267e-10, 4.5388301e-10, 4.571962e-10, 4.6053267e-10, + 4.6389292e-10, 4.6727755e-10, 4.70687e-10, 4.741219e-10, + 4.7758275e-10, 4.810702e-10, 4.845848e-10, 4.8812715e-10, + 4.9169796e-10, 4.9529775e-10, 4.989273e-10, 5.0258725e-10, + 5.0627835e-10, 5.100013e-10, 5.1375687e-10, 5.1754584e-10, + 5.21369e-10, 5.2522725e-10, 5.2912136e-10, 5.330522e-10, + 5.370208e-10, 5.4102806e-10, 5.45075e-10, 5.491625e-10, + 5.532918e-10, 5.5746385e-10, 5.616799e-10, 5.6594107e-10, + 5.7024857e-10, 5.746037e-10, 5.7900773e-10, 5.834621e-10, + 5.8796823e-10, 5.925276e-10, 5.971417e-10, 6.018122e-10, + 6.065408e-10, 6.113292e-10, 6.1617933e-10, 6.2109295e-10, + 6.260722e-10, 6.3111916e-10, 6.3623595e-10, 6.4142497e-10, + 6.4668854e-10, 6.5202926e-10, 6.5744976e-10, 6.6295286e-10, + 6.6854156e-10, 6.742188e-10, 6.79988e-10, 6.858526e-10, + 6.9181616e-10, 6.978826e-10, 7.04056e-10, 7.103407e-10, + 7.167412e-10, 7.2326256e-10, 7.2990985e-10, 7.366886e-10, + 7.4360473e-10, 7.5066453e-10, 7.5787476e-10, 7.6524265e-10, + 7.7277595e-10, 7.80483e-10, 7.883728e-10, 7.9645507e-10, + 8.047402e-10, 8.1323964e-10, 8.219657e-10, 8.309319e-10, + 8.401528e-10, 8.496445e-10, 8.594247e-10, 8.6951274e-10, + 8.799301e-10, 8.9070046e-10, 9.018503e-10, 9.134092e-10, + 9.254101e-10, 9.378904e-10, 9.508923e-10, 9.644638e-10, + 9.786603e-10, 9.935448e-10, 1.0091913e-09, 1.025686e-09, + 1.0431306e-09, 1.0616465e-09, 1.08138e-09, 1.1025096e-09, + 1.1252564e-09, 1.1498986e-09, 1.1767932e-09, 1.206409e-09, + 1.2393786e-09, 1.276585e-09, 1.3193139e-09, 1.3695435e-09, + 1.4305498e-09, 1.508365e-09, 1.6160854e-09, 1.7921248e-09, +} +var fe = [256]float32{ + 1, 0.9381437, 0.90046996, 0.87170434, 0.8477855, 0.8269933, + 0.8084217, 0.7915276, 0.77595687, 0.7614634, 0.7478686, + 0.7350381, 0.72286767, 0.71127474, 0.70019263, 0.6895665, + 0.67935055, 0.6695063, 0.66000086, 0.65080583, 0.6418967, + 0.63325197, 0.6248527, 0.6166822, 0.60872537, 0.60096896, + 0.5934009, 0.58601034, 0.5787874, 0.57172304, 0.5648092, + 0.5580383, 0.5514034, 0.5448982, 0.5385169, 0.53225386, + 0.5261042, 0.52006316, 0.5141264, 0.50828975, 0.5025495, + 0.496902, 0.49134386, 0.485872, 0.48048335, 0.4751752, + 0.46994483, 0.46478975, 0.45970762, 0.45469615, 0.44975325, + 0.44487688, 0.44006512, 0.43531612, 0.43062815, 0.42599955, + 0.42142874, 0.4169142, 0.41245446, 0.40804818, 0.403694, + 0.3993907, 0.39513698, 0.39093173, 0.38677382, 0.38266218, + 0.37859577, 0.37457356, 0.37059465, 0.3666581, 0.362763, + 0.35890847, 0.35509375, 0.351318, 0.3475805, 0.34388044, + 0.34021714, 0.3365899, 0.33299807, 0.32944095, 0.32591796, + 0.3224285, 0.3189719, 0.31554767, 0.31215525, 0.30879408, + 0.3054636, 0.3021634, 0.29889292, 0.2956517, 0.29243928, + 0.28925523, 0.28609908, 0.28297043, 0.27986884, 0.27679393, + 0.2737453, 0.2707226, 0.2677254, 0.26475343, 0.26180625, + 0.25888354, 0.25598502, 0.2531103, 0.25025907, 0.24743107, + 0.24462597, 0.24184346, 0.23908329, 0.23634516, 0.23362878, + 0.23093392, 0.2282603, 0.22560766, 0.22297576, 0.22036438, + 0.21777324, 0.21520215, 0.21265087, 0.21011916, 0.20760682, + 0.20511365, 0.20263945, 0.20018397, 0.19774707, 0.19532852, + 0.19292815, 0.19054577, 0.1881812, 0.18583426, 0.18350479, + 0.1811926, 0.17889754, 0.17661946, 0.17435817, 0.17211354, + 0.1698854, 0.16767362, 0.16547804, 0.16329853, 0.16113494, + 0.15898713, 0.15685499, 0.15473837, 0.15263714, 0.15055119, + 0.14848037, 0.14642459, 0.14438373, 0.14235765, 0.14034624, + 0.13834943, 0.13636707, 0.13439907, 0.13244532, 0.13050574, + 0.1285802, 0.12666863, 0.12477092, 0.12288698, 0.12101672, + 0.119160056, 0.1173169, 0.115487166, 0.11367077, 0.11186763, + 0.11007768, 0.10830083, 0.10653701, 0.10478614, 0.10304816, + 0.101323, 0.09961058, 0.09791085, 0.09622374, 0.09454919, + 0.09288713, 0.091237515, 0.08960028, 0.087975375, 0.08636274, + 0.08476233, 0.083174095, 0.081597984, 0.08003395, 0.07848195, + 0.076941945, 0.07541389, 0.07389775, 0.072393484, 0.07090106, + 0.069420435, 0.06795159, 0.066494495, 0.06504912, 0.063615434, + 0.062193416, 0.060783047, 0.059384305, 0.057997175, + 0.05662164, 0.05525769, 0.053905312, 0.052564494, 0.051235236, + 0.049917534, 0.048611384, 0.047316793, 0.046033762, 0.0447623, + 0.043502413, 0.042254124, 0.041017443, 0.039792392, + 0.038578995, 0.037377283, 0.036187284, 0.035009038, + 0.033842582, 0.032687962, 0.031545233, 0.030414443, 0.02929566, + 0.02818895, 0.027094385, 0.026012046, 0.024942026, 0.023884421, + 0.022839336, 0.021806888, 0.020787204, 0.019780423, 0.0187867, + 0.0178062, 0.016839107, 0.015885621, 0.014945968, 0.014020392, + 0.013109165, 0.012212592, 0.011331013, 0.01046481, 0.009614414, + 0.008780315, 0.007963077, 0.0071633533, 0.006381906, + 0.0056196423, 0.0048776558, 0.004157295, 0.0034602648, + 0.0027887989, 0.0021459677, 0.0015362998, 0.0009672693, + 0.00045413437, +} diff --git a/src/math/rand/export_test.go b/src/math/rand/export_test.go new file mode 100644 index 0000000..560010b --- /dev/null +++ b/src/math/rand/export_test.go @@ -0,0 +1,17 @@ +// Copyright 2021 The Go Authors. All rights reserved. +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. + +package rand + +func Int31nForTest(r *Rand, n int32) int32 { + return r.int31n(n) +} + +func GetNormalDistributionParameters() (float64, [128]uint32, [128]float32, [128]float32) { + return rn, kn, wn, fn +} + +func GetExponentialDistributionParameters() (float64, [256]uint32, [256]float32, [256]float32) { + return re, ke, we, fe +} diff --git a/src/math/rand/gen_cooked.go b/src/math/rand/gen_cooked.go new file mode 100644 index 0000000..782bb66 --- /dev/null +++ b/src/math/rand/gen_cooked.go @@ -0,0 +1,89 @@ +// Copyright 2009 The Go Authors. All rights reserved. +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. + +//go:build ignore + +// This program computes the value of rngCooked in rng.go, +// which is used for seeding all instances of rand.Source. +// a 64bit and a 63bit version of the array is printed to +// the standard output. + +package main + +import "fmt" + +const ( + length = 607 + tap = 273 + mask = (1 << 63) - 1 + a = 48271 + m = (1 << 31) - 1 + q = 44488 + r = 3399 +) + +var ( + rngVec [length]int64 + rngTap, rngFeed int +) + +func seedrand(x int32) int32 { + hi := x / q + lo := x % q + x = a*lo - r*hi + if x < 0 { + x += m + } + return x +} + +func srand(seed int32) { + rngTap = 0 + rngFeed = length - tap + seed %= m + if seed < 0 { + seed += m + } else if seed == 0 { + seed = 89482311 + } + x := seed + for i := -20; i < length; i++ { + x = seedrand(x) + if i >= 0 { + var u int64 + u = int64(x) << 20 + x = seedrand(x) + u ^= int64(x) << 10 + x = seedrand(x) + u ^= int64(x) + rngVec[i] = u + } + } +} + +func vrand() int64 { + rngTap-- + if rngTap < 0 { + rngTap += length + } + rngFeed-- + if rngFeed < 0 { + rngFeed += length + } + x := (rngVec[rngFeed] + rngVec[rngTap]) + rngVec[rngFeed] = x + return x +} + +func main() { + srand(1) + for i := uint64(0); i < 7.8e12; i++ { + vrand() + } + fmt.Printf("rngVec after 7.8e12 calls to vrand:\n%#v\n", rngVec) + for i := range rngVec { + rngVec[i] &= mask + } + fmt.Printf("lower 63bit of rngVec after 7.8e12 calls to vrand:\n%#v\n", rngVec) +} diff --git a/src/math/rand/normal.go b/src/math/rand/normal.go new file mode 100644 index 0000000..2c5a7aa --- /dev/null +++ b/src/math/rand/normal.go @@ -0,0 +1,157 @@ +// Copyright 2009 The Go Authors. All rights reserved. +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. + +package rand + +import ( + "math" +) + +/* + * Normal distribution + * + * See "The Ziggurat Method for Generating Random Variables" + * (Marsaglia & Tsang, 2000) + * http://www.jstatsoft.org/v05/i08/paper [pdf] + */ + +const ( + rn = 3.442619855899 +) + +func absInt32(i int32) uint32 { + if i < 0 { + return uint32(-i) + } + return uint32(i) +} + +// NormFloat64 returns a normally distributed float64 in +// the range -math.MaxFloat64 through +math.MaxFloat64 inclusive, +// with standard normal distribution (mean = 0, stddev = 1). +// To produce a different normal distribution, callers can +// adjust the output using: +// +// sample = NormFloat64() * desiredStdDev + desiredMean +// +func (r *Rand) NormFloat64() float64 { + for { + j := int32(r.Uint32()) // Possibly negative + i := j & 0x7F + x := float64(j) * float64(wn[i]) + if absInt32(j) < kn[i] { + // This case should be hit better than 99% of the time. + return x + } + + if i == 0 { + // This extra work is only required for the base strip. + for { + x = -math.Log(r.Float64()) * (1.0 / rn) + y := -math.Log(r.Float64()) + if y+y >= x*x { + break + } + } + if j > 0 { + return rn + x + } + return -rn - x + } + if fn[i]+float32(r.Float64())*(fn[i-1]-fn[i]) < float32(math.Exp(-.5*x*x)) { + return x + } + } +} + +var kn = [128]uint32{ + 0x76ad2212, 0x0, 0x600f1b53, 0x6ce447a6, 0x725b46a2, + 0x7560051d, 0x774921eb, 0x789a25bd, 0x799045c3, 0x7a4bce5d, + 0x7adf629f, 0x7b5682a6, 0x7bb8a8c6, 0x7c0ae722, 0x7c50cce7, + 0x7c8cec5b, 0x7cc12cd6, 0x7ceefed2, 0x7d177e0b, 0x7d3b8883, + 0x7d5bce6c, 0x7d78dd64, 0x7d932886, 0x7dab0e57, 0x7dc0dd30, + 0x7dd4d688, 0x7de73185, 0x7df81cea, 0x7e07c0a3, 0x7e163efa, + 0x7e23b587, 0x7e303dfd, 0x7e3beec2, 0x7e46db77, 0x7e51155d, + 0x7e5aabb3, 0x7e63abf7, 0x7e6c222c, 0x7e741906, 0x7e7b9a18, + 0x7e82adfa, 0x7e895c63, 0x7e8fac4b, 0x7e95a3fb, 0x7e9b4924, + 0x7ea0a0ef, 0x7ea5b00d, 0x7eaa7ac3, 0x7eaf04f3, 0x7eb3522a, + 0x7eb765a5, 0x7ebb4259, 0x7ebeeafd, 0x7ec2620a, 0x7ec5a9c4, + 0x7ec8c441, 0x7ecbb365, 0x7ece78ed, 0x7ed11671, 0x7ed38d62, + 0x7ed5df12, 0x7ed80cb4, 0x7eda175c, 0x7edc0005, 0x7eddc78e, + 0x7edf6ebf, 0x7ee0f647, 0x7ee25ebe, 0x7ee3a8a9, 0x7ee4d473, + 0x7ee5e276, 0x7ee6d2f5, 0x7ee7a620, 0x7ee85c10, 0x7ee8f4cd, + 0x7ee97047, 0x7ee9ce59, 0x7eea0eca, 0x7eea3147, 0x7eea3568, + 0x7eea1aab, 0x7ee9e071, 0x7ee98602, 0x7ee90a88, 0x7ee86d08, + 0x7ee7ac6a, 0x7ee6c769, 0x7ee5bc9c, 0x7ee48a67, 0x7ee32efc, + 0x7ee1a857, 0x7edff42f, 0x7ede0ffa, 0x7edbf8d9, 0x7ed9ab94, + 0x7ed7248d, 0x7ed45fae, 0x7ed1585c, 0x7ece095f, 0x7eca6ccb, + 0x7ec67be2, 0x7ec22eee, 0x7ebd7d1a, 0x7eb85c35, 0x7eb2c075, + 0x7eac9c20, 0x7ea5df27, 0x7e9e769f, 0x7e964c16, 0x7e8d44ba, + 0x7e834033, 0x7e781728, 0x7e6b9933, 0x7e5d8a1a, 0x7e4d9ded, + 0x7e3b737a, 0x7e268c2f, 0x7e0e3ff5, 0x7df1aa5d, 0x7dcf8c72, + 0x7da61a1e, 0x7d72a0fb, 0x7d30e097, 0x7cd9b4ab, 0x7c600f1a, + 0x7ba90bdc, 0x7a722176, 0x77d664e5, +} +var wn = [128]float32{ + 1.7290405e-09, 1.2680929e-10, 1.6897518e-10, 1.9862688e-10, + 2.2232431e-10, 2.4244937e-10, 2.601613e-10, 2.7611988e-10, + 2.9073963e-10, 3.042997e-10, 3.1699796e-10, 3.289802e-10, + 3.4035738e-10, 3.5121603e-10, 3.616251e-10, 3.7164058e-10, + 3.8130857e-10, 3.9066758e-10, 3.9975012e-10, 4.08584e-10, + 4.1719309e-10, 4.2559822e-10, 4.338176e-10, 4.418672e-10, + 4.497613e-10, 4.5751258e-10, 4.651324e-10, 4.7263105e-10, + 4.8001775e-10, 4.87301e-10, 4.944885e-10, 5.015873e-10, + 5.0860405e-10, 5.155446e-10, 5.2241467e-10, 5.2921934e-10, + 5.359635e-10, 5.426517e-10, 5.4928817e-10, 5.5587696e-10, + 5.624219e-10, 5.6892646e-10, 5.753941e-10, 5.818282e-10, + 5.882317e-10, 5.946077e-10, 6.00959e-10, 6.072884e-10, + 6.135985e-10, 6.19892e-10, 6.2617134e-10, 6.3243905e-10, + 6.386974e-10, 6.449488e-10, 6.511956e-10, 6.5744005e-10, + 6.6368433e-10, 6.699307e-10, 6.7618144e-10, 6.824387e-10, + 6.8870465e-10, 6.949815e-10, 7.012715e-10, 7.075768e-10, + 7.1389966e-10, 7.202424e-10, 7.266073e-10, 7.329966e-10, + 7.394128e-10, 7.4585826e-10, 7.5233547e-10, 7.58847e-10, + 7.653954e-10, 7.719835e-10, 7.7861395e-10, 7.852897e-10, + 7.920138e-10, 7.987892e-10, 8.0561924e-10, 8.125073e-10, + 8.194569e-10, 8.2647167e-10, 8.3355556e-10, 8.407127e-10, + 8.479473e-10, 8.55264e-10, 8.6266755e-10, 8.7016316e-10, + 8.777562e-10, 8.8545243e-10, 8.932582e-10, 9.0117996e-10, + 9.09225e-10, 9.174008e-10, 9.2571584e-10, 9.341788e-10, + 9.427997e-10, 9.515889e-10, 9.605579e-10, 9.697193e-10, + 9.790869e-10, 9.88676e-10, 9.985036e-10, 1.0085882e-09, + 1.0189509e-09, 1.0296151e-09, 1.0406069e-09, 1.0519566e-09, + 1.063698e-09, 1.0758702e-09, 1.0885183e-09, 1.1016947e-09, + 1.1154611e-09, 1.1298902e-09, 1.1450696e-09, 1.1611052e-09, + 1.1781276e-09, 1.1962995e-09, 1.2158287e-09, 1.2369856e-09, + 1.2601323e-09, 1.2857697e-09, 1.3146202e-09, 1.347784e-09, + 1.3870636e-09, 1.4357403e-09, 1.5008659e-09, 1.6030948e-09, +} +var fn = [128]float32{ + 1, 0.9635997, 0.9362827, 0.9130436, 0.89228165, 0.87324303, + 0.8555006, 0.8387836, 0.8229072, 0.8077383, 0.793177, + 0.7791461, 0.7655842, 0.7524416, 0.73967725, 0.7272569, + 0.7151515, 0.7033361, 0.69178915, 0.68049186, 0.6694277, + 0.658582, 0.6479418, 0.63749546, 0.6272325, 0.6171434, + 0.6072195, 0.5974532, 0.58783704, 0.5783647, 0.56903, + 0.5598274, 0.5507518, 0.54179835, 0.5329627, 0.52424055, + 0.5156282, 0.50712204, 0.49871865, 0.49041483, 0.48220766, + 0.4740943, 0.46607214, 0.4581387, 0.45029163, 0.44252872, + 0.43484783, 0.427247, 0.41972435, 0.41227803, 0.40490642, + 0.39760786, 0.3903808, 0.3832238, 0.37613547, 0.36911446, + 0.3621595, 0.35526937, 0.34844297, 0.34167916, 0.33497685, + 0.3283351, 0.3217529, 0.3152294, 0.30876362, 0.30235484, + 0.29600215, 0.28970486, 0.2834622, 0.2772735, 0.27113807, + 0.2650553, 0.25902456, 0.2530453, 0.24711695, 0.241239, + 0.23541094, 0.22963232, 0.2239027, 0.21822165, 0.21258877, + 0.20700371, 0.20146611, 0.19597565, 0.19053204, 0.18513499, + 0.17978427, 0.17447963, 0.1692209, 0.16400786, 0.15884037, + 0.15371831, 0.14864157, 0.14361008, 0.13862377, 0.13368265, + 0.12878671, 0.12393598, 0.119130544, 0.11437051, 0.10965602, + 0.104987256, 0.10036444, 0.095787846, 0.0912578, 0.08677467, + 0.0823389, 0.077950984, 0.073611505, 0.06932112, 0.06508058, + 0.06089077, 0.056752663, 0.0526674, 0.048636295, 0.044660863, + 0.040742867, 0.03688439, 0.033087887, 0.029356318, + 0.025693292, 0.022103304, 0.018592102, 0.015167298, + 0.011839478, 0.008624485, 0.005548995, 0.0026696292, +} diff --git a/src/math/rand/race_test.go b/src/math/rand/race_test.go new file mode 100644 index 0000000..e7d1036 --- /dev/null +++ b/src/math/rand/race_test.go @@ -0,0 +1,49 @@ +// Copyright 2016 The Go Authors. All rights reserved. +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. + +package rand_test + +import ( + . "math/rand" + "sync" + "testing" +) + +// TestConcurrent exercises the rand API concurrently, triggering situations +// where the race detector is likely to detect issues. +func TestConcurrent(t *testing.T) { + const ( + numRoutines = 10 + numCycles = 10 + ) + var wg sync.WaitGroup + defer wg.Wait() + wg.Add(numRoutines) + for i := 0; i < numRoutines; i++ { + go func(i int) { + defer wg.Done() + buf := make([]byte, 997) + for j := 0; j < numCycles; j++ { + var seed int64 + seed += int64(ExpFloat64()) + seed += int64(Float32()) + seed += int64(Float64()) + seed += int64(Intn(Int())) + seed += int64(Int31n(Int31())) + seed += int64(Int63n(Int63())) + seed += int64(NormFloat64()) + seed += int64(Uint32()) + seed += int64(Uint64()) + for _, p := range Perm(10) { + seed += int64(p) + } + Read(buf) + for _, b := range buf { + seed += int64(b) + } + Seed(int64(i*j) * seed) + } + }(i) + } +} diff --git a/src/math/rand/rand.go b/src/math/rand/rand.go new file mode 100644 index 0000000..13f20ca --- /dev/null +++ b/src/math/rand/rand.go @@ -0,0 +1,421 @@ +// Copyright 2009 The Go Authors. All rights reserved. +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. + +// Package rand implements pseudo-random number generators unsuitable for +// security-sensitive work. +// +// Random numbers are generated by a Source. Top-level functions, such as +// Float64 and Int, use a default shared Source that produces a deterministic +// sequence of values each time a program is run. Use the Seed function to +// initialize the default Source if different behavior is required for each run. +// The default Source is safe for concurrent use by multiple goroutines, but +// Sources created by NewSource are not. +// +// This package's outputs might be easily predictable regardless of how it's +// seeded. For random numbers suitable for security-sensitive work, see the +// crypto/rand package. +package rand + +import "sync" + +// A Source represents a source of uniformly-distributed +// pseudo-random int64 values in the range [0, 1<<63). +type Source interface { + Int63() int64 + Seed(seed int64) +} + +// A Source64 is a Source that can also generate +// uniformly-distributed pseudo-random uint64 values in +// the range [0, 1<<64) directly. +// If a Rand r's underlying Source s implements Source64, +// then r.Uint64 returns the result of one call to s.Uint64 +// instead of making two calls to s.Int63. +type Source64 interface { + Source + Uint64() uint64 +} + +// NewSource returns a new pseudo-random Source seeded with the given value. +// Unlike the default Source used by top-level functions, this source is not +// safe for concurrent use by multiple goroutines. +func NewSource(seed int64) Source { + var rng rngSource + rng.Seed(seed) + return &rng +} + +// A Rand is a source of random numbers. +type Rand struct { + src Source + s64 Source64 // non-nil if src is source64 + + // readVal contains remainder of 63-bit integer used for bytes + // generation during most recent Read call. + // It is saved so next Read call can start where the previous + // one finished. + readVal int64 + // readPos indicates the number of low-order bytes of readVal + // that are still valid. + readPos int8 +} + +// New returns a new Rand that uses random values from src +// to generate other random values. +func New(src Source) *Rand { + s64, _ := src.(Source64) + return &Rand{src: src, s64: s64} +} + +// Seed uses the provided seed value to initialize the generator to a deterministic state. +// Seed should not be called concurrently with any other Rand method. +func (r *Rand) Seed(seed int64) { + if lk, ok := r.src.(*lockedSource); ok { + lk.seedPos(seed, &r.readPos) + return + } + + r.src.Seed(seed) + r.readPos = 0 +} + +// Int63 returns a non-negative pseudo-random 63-bit integer as an int64. +func (r *Rand) Int63() int64 { return r.src.Int63() } + +// Uint32 returns a pseudo-random 32-bit value as a uint32. +func (r *Rand) Uint32() uint32 { return uint32(r.Int63() >> 31) } + +// Uint64 returns a pseudo-random 64-bit value as a uint64. +func (r *Rand) Uint64() uint64 { + if r.s64 != nil { + return r.s64.Uint64() + } + return uint64(r.Int63())>>31 | uint64(r.Int63())<<32 +} + +// Int31 returns a non-negative pseudo-random 31-bit integer as an int32. +func (r *Rand) Int31() int32 { return int32(r.Int63() >> 32) } + +// Int returns a non-negative pseudo-random int. +func (r *Rand) Int() int { + u := uint(r.Int63()) + return int(u << 1 >> 1) // clear sign bit if int == int32 +} + +// Int63n returns, as an int64, a non-negative pseudo-random number in the half-open interval [0,n). +// It panics if n <= 0. +func (r *Rand) Int63n(n int64) int64 { + if n <= 0 { + panic("invalid argument to Int63n") + } + if n&(n-1) == 0 { // n is power of two, can mask + return r.Int63() & (n - 1) + } + max := int64((1 << 63) - 1 - (1<<63)%uint64(n)) + v := r.Int63() + for v > max { + v = r.Int63() + } + return v % n +} + +// Int31n returns, as an int32, a non-negative pseudo-random number in the half-open interval [0,n). +// It panics if n <= 0. +func (r *Rand) Int31n(n int32) int32 { + if n <= 0 { + panic("invalid argument to Int31n") + } + if n&(n-1) == 0 { // n is power of two, can mask + return r.Int31() & (n - 1) + } + max := int32((1 << 31) - 1 - (1<<31)%uint32(n)) + v := r.Int31() + for v > max { + v = r.Int31() + } + return v % n +} + +// int31n returns, as an int32, a non-negative pseudo-random number in the half-open interval [0,n). +// n must be > 0, but int31n does not check this; the caller must ensure it. +// int31n exists because Int31n is inefficient, but Go 1 compatibility +// requires that the stream of values produced by math/rand remain unchanged. +// int31n can thus only be used internally, by newly introduced APIs. +// +// For implementation details, see: +// https://lemire.me/blog/2016/06/27/a-fast-alternative-to-the-modulo-reduction +// https://lemire.me/blog/2016/06/30/fast-random-shuffling +func (r *Rand) int31n(n int32) int32 { + v := r.Uint32() + prod := uint64(v) * uint64(n) + low := uint32(prod) + if low < uint32(n) { + thresh := uint32(-n) % uint32(n) + for low < thresh { + v = r.Uint32() + prod = uint64(v) * uint64(n) + low = uint32(prod) + } + } + return int32(prod >> 32) +} + +// Intn returns, as an int, a non-negative pseudo-random number in the half-open interval [0,n). +// It panics if n <= 0. +func (r *Rand) Intn(n int) int { + if n <= 0 { + panic("invalid argument to Intn") + } + if n <= 1<<31-1 { + return int(r.Int31n(int32(n))) + } + return int(r.Int63n(int64(n))) +} + +// Float64 returns, as a float64, a pseudo-random number in the half-open interval [0.0,1.0). +func (r *Rand) Float64() float64 { + // A clearer, simpler implementation would be: + // return float64(r.Int63n(1<<53)) / (1<<53) + // However, Go 1 shipped with + // return float64(r.Int63()) / (1 << 63) + // and we want to preserve that value stream. + // + // There is one bug in the value stream: r.Int63() may be so close + // to 1<<63 that the division rounds up to 1.0, and we've guaranteed + // that the result is always less than 1.0. + // + // We tried to fix this by mapping 1.0 back to 0.0, but since float64 + // values near 0 are much denser than near 1, mapping 1 to 0 caused + // a theoretically significant overshoot in the probability of returning 0. + // Instead of that, if we round up to 1, just try again. + // Getting 1 only happens 1/2⁵³ of the time, so most clients + // will not observe it anyway. +again: + f := float64(r.Int63()) / (1 << 63) + if f == 1 { + goto again // resample; this branch is taken O(never) + } + return f +} + +// Float32 returns, as a float32, a pseudo-random number in the half-open interval [0.0,1.0). +func (r *Rand) Float32() float32 { + // Same rationale as in Float64: we want to preserve the Go 1 value + // stream except we want to fix it not to return 1.0 + // This only happens 1/2²⁴ of the time (plus the 1/2⁵³ of the time in Float64). +again: + f := float32(r.Float64()) + if f == 1 { + goto again // resample; this branch is taken O(very rarely) + } + return f +} + +// Perm returns, as a slice of n ints, a pseudo-random permutation of the integers +// in the half-open interval [0,n). +func (r *Rand) Perm(n int) []int { + m := make([]int, n) + // In the following loop, the iteration when i=0 always swaps m[0] with m[0]. + // A change to remove this useless iteration is to assign 1 to i in the init + // statement. But Perm also effects r. Making this change will affect + // the final state of r. So this change can't be made for compatibility + // reasons for Go 1. + for i := 0; i < n; i++ { + j := r.Intn(i + 1) + m[i] = m[j] + m[j] = i + } + return m +} + +// Shuffle pseudo-randomizes the order of elements. +// n is the number of elements. Shuffle panics if n < 0. +// swap swaps the elements with indexes i and j. +func (r *Rand) Shuffle(n int, swap func(i, j int)) { + if n < 0 { + panic("invalid argument to Shuffle") + } + + // Fisher-Yates shuffle: https://en.wikipedia.org/wiki/Fisher%E2%80%93Yates_shuffle + // Shuffle really ought not be called with n that doesn't fit in 32 bits. + // Not only will it take a very long time, but with 2³¹! possible permutations, + // there's no way that any PRNG can have a big enough internal state to + // generate even a minuscule percentage of the possible permutations. + // Nevertheless, the right API signature accepts an int n, so handle it as best we can. + i := n - 1 + for ; i > 1<<31-1-1; i-- { + j := int(r.Int63n(int64(i + 1))) + swap(i, j) + } + for ; i > 0; i-- { + j := int(r.int31n(int32(i + 1))) + swap(i, j) + } +} + +// Read generates len(p) random bytes and writes them into p. It +// always returns len(p) and a nil error. +// Read should not be called concurrently with any other Rand method. +func (r *Rand) Read(p []byte) (n int, err error) { + if lk, ok := r.src.(*lockedSource); ok { + return lk.read(p, &r.readVal, &r.readPos) + } + return read(p, r.src, &r.readVal, &r.readPos) +} + +func read(p []byte, src Source, readVal *int64, readPos *int8) (n int, err error) { + pos := *readPos + val := *readVal + rng, _ := src.(*rngSource) + for n = 0; n < len(p); n++ { + if pos == 0 { + if rng != nil { + val = rng.Int63() + } else { + val = src.Int63() + } + pos = 7 + } + p[n] = byte(val) + val >>= 8 + pos-- + } + *readPos = pos + *readVal = val + return +} + +/* + * Top-level convenience functions + */ + +var globalRand = New(&lockedSource{src: NewSource(1).(*rngSource)}) + +// Type assert that globalRand's source is a lockedSource whose src is a *rngSource. +var _ *rngSource = globalRand.src.(*lockedSource).src + +// Seed uses the provided seed value to initialize the default Source to a +// deterministic state. If Seed is not called, the generator behaves as +// if seeded by Seed(1). Seed values that have the same remainder when +// divided by 2³¹-1 generate the same pseudo-random sequence. +// Seed, unlike the Rand.Seed method, is safe for concurrent use. +func Seed(seed int64) { globalRand.Seed(seed) } + +// Int63 returns a non-negative pseudo-random 63-bit integer as an int64 +// from the default Source. +func Int63() int64 { return globalRand.Int63() } + +// Uint32 returns a pseudo-random 32-bit value as a uint32 +// from the default Source. +func Uint32() uint32 { return globalRand.Uint32() } + +// Uint64 returns a pseudo-random 64-bit value as a uint64 +// from the default Source. +func Uint64() uint64 { return globalRand.Uint64() } + +// Int31 returns a non-negative pseudo-random 31-bit integer as an int32 +// from the default Source. +func Int31() int32 { return globalRand.Int31() } + +// Int returns a non-negative pseudo-random int from the default Source. +func Int() int { return globalRand.Int() } + +// Int63n returns, as an int64, a non-negative pseudo-random number in the half-open interval [0,n) +// from the default Source. +// It panics if n <= 0. +func Int63n(n int64) int64 { return globalRand.Int63n(n) } + +// Int31n returns, as an int32, a non-negative pseudo-random number in the half-open interval [0,n) +// from the default Source. +// It panics if n <= 0. +func Int31n(n int32) int32 { return globalRand.Int31n(n) } + +// Intn returns, as an int, a non-negative pseudo-random number in the half-open interval [0,n) +// from the default Source. +// It panics if n <= 0. +func Intn(n int) int { return globalRand.Intn(n) } + +// Float64 returns, as a float64, a pseudo-random number in the half-open interval [0.0,1.0) +// from the default Source. +func Float64() float64 { return globalRand.Float64() } + +// Float32 returns, as a float32, a pseudo-random number in the half-open interval [0.0,1.0) +// from the default Source. +func Float32() float32 { return globalRand.Float32() } + +// Perm returns, as a slice of n ints, a pseudo-random permutation of the integers +// in the half-open interval [0,n) from the default Source. +func Perm(n int) []int { return globalRand.Perm(n) } + +// Shuffle pseudo-randomizes the order of elements using the default Source. +// n is the number of elements. Shuffle panics if n < 0. +// swap swaps the elements with indexes i and j. +func Shuffle(n int, swap func(i, j int)) { globalRand.Shuffle(n, swap) } + +// Read generates len(p) random bytes from the default Source and +// writes them into p. It always returns len(p) and a nil error. +// Read, unlike the Rand.Read method, is safe for concurrent use. +func Read(p []byte) (n int, err error) { return globalRand.Read(p) } + +// NormFloat64 returns a normally distributed float64 in the range +// [-math.MaxFloat64, +math.MaxFloat64] with +// standard normal distribution (mean = 0, stddev = 1) +// from the default Source. +// To produce a different normal distribution, callers can +// adjust the output using: +// +// sample = NormFloat64() * desiredStdDev + desiredMean +// +func NormFloat64() float64 { return globalRand.NormFloat64() } + +// ExpFloat64 returns an exponentially distributed float64 in the range +// (0, +math.MaxFloat64] with an exponential distribution whose rate parameter +// (lambda) is 1 and whose mean is 1/lambda (1) from the default Source. +// To produce a distribution with a different rate parameter, +// callers can adjust the output using: +// +// sample = ExpFloat64() / desiredRateParameter +// +func ExpFloat64() float64 { return globalRand.ExpFloat64() } + +type lockedSource struct { + lk sync.Mutex + src *rngSource +} + +func (r *lockedSource) Int63() (n int64) { + r.lk.Lock() + n = r.src.Int63() + r.lk.Unlock() + return +} + +func (r *lockedSource) Uint64() (n uint64) { + r.lk.Lock() + n = r.src.Uint64() + r.lk.Unlock() + return +} + +func (r *lockedSource) Seed(seed int64) { + r.lk.Lock() + r.src.Seed(seed) + r.lk.Unlock() +} + +// seedPos implements Seed for a lockedSource without a race condition. +func (r *lockedSource) seedPos(seed int64, readPos *int8) { + r.lk.Lock() + r.src.Seed(seed) + *readPos = 0 + r.lk.Unlock() +} + +// read implements Read for a lockedSource without a race condition. +func (r *lockedSource) read(p []byte, readVal *int64, readPos *int8) (n int, err error) { + r.lk.Lock() + n, err = read(p, r.src, readVal, readPos) + r.lk.Unlock() + return +} diff --git a/src/math/rand/rand_test.go b/src/math/rand/rand_test.go new file mode 100644 index 0000000..462de8b --- /dev/null +++ b/src/math/rand/rand_test.go @@ -0,0 +1,685 @@ +// Copyright 2009 The Go Authors. All rights reserved. +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. + +package rand_test + +import ( + "bytes" + "errors" + "fmt" + "internal/testenv" + "io" + "math" + . "math/rand" + "os" + "runtime" + "testing" + "testing/iotest" +) + +const ( + numTestSamples = 10000 +) + +var rn, kn, wn, fn = GetNormalDistributionParameters() +var re, ke, we, fe = GetExponentialDistributionParameters() + +type statsResults struct { + mean float64 + stddev float64 + closeEnough float64 + maxError float64 +} + +func max(a, b float64) float64 { + if a > b { + return a + } + return b +} + +func nearEqual(a, b, closeEnough, maxError float64) bool { + absDiff := math.Abs(a - b) + if absDiff < closeEnough { // Necessary when one value is zero and one value is close to zero. + return true + } + return absDiff/max(math.Abs(a), math.Abs(b)) < maxError +} + +var testSeeds = []int64{1, 1754801282, 1698661970, 1550503961} + +// checkSimilarDistribution returns success if the mean and stddev of the +// two statsResults are similar. +func (this *statsResults) checkSimilarDistribution(expected *statsResults) error { + if !nearEqual(this.mean, expected.mean, expected.closeEnough, expected.maxError) { + s := fmt.Sprintf("mean %v != %v (allowed error %v, %v)", this.mean, expected.mean, expected.closeEnough, expected.maxError) + fmt.Println(s) + return errors.New(s) + } + if !nearEqual(this.stddev, expected.stddev, expected.closeEnough, expected.maxError) { + s := fmt.Sprintf("stddev %v != %v (allowed error %v, %v)", this.stddev, expected.stddev, expected.closeEnough, expected.maxError) + fmt.Println(s) + return errors.New(s) + } + return nil +} + +func getStatsResults(samples []float64) *statsResults { + res := new(statsResults) + var sum, squaresum float64 + for _, s := range samples { + sum += s + squaresum += s * s + } + res.mean = sum / float64(len(samples)) + res.stddev = math.Sqrt(squaresum/float64(len(samples)) - res.mean*res.mean) + return res +} + +func checkSampleDistribution(t *testing.T, samples []float64, expected *statsResults) { + t.Helper() + actual := getStatsResults(samples) + err := actual.checkSimilarDistribution(expected) + if err != nil { + t.Errorf(err.Error()) + } +} + +func checkSampleSliceDistributions(t *testing.T, samples []float64, nslices int, expected *statsResults) { + t.Helper() + chunk := len(samples) / nslices + for i := 0; i < nslices; i++ { + low := i * chunk + var high int + if i == nslices-1 { + high = len(samples) - 1 + } else { + high = (i + 1) * chunk + } + checkSampleDistribution(t, samples[low:high], expected) + } +} + +// +// Normal distribution tests +// + +func generateNormalSamples(nsamples int, mean, stddev float64, seed int64) []float64 { + r := New(NewSource(seed)) + samples := make([]float64, nsamples) + for i := range samples { + samples[i] = r.NormFloat64()*stddev + mean + } + return samples +} + +func testNormalDistribution(t *testing.T, nsamples int, mean, stddev float64, seed int64) { + //fmt.Printf("testing nsamples=%v mean=%v stddev=%v seed=%v\n", nsamples, mean, stddev, seed); + + samples := generateNormalSamples(nsamples, mean, stddev, seed) + errorScale := max(1.0, stddev) // Error scales with stddev + expected := &statsResults{mean, stddev, 0.10 * errorScale, 0.08 * errorScale} + + // Make sure that the entire set matches the expected distribution. + checkSampleDistribution(t, samples, expected) + + // Make sure that each half of the set matches the expected distribution. + checkSampleSliceDistributions(t, samples, 2, expected) + + // Make sure that each 7th of the set matches the expected distribution. + checkSampleSliceDistributions(t, samples, 7, expected) +} + +// Actual tests + +func TestStandardNormalValues(t *testing.T) { + for _, seed := range testSeeds { + testNormalDistribution(t, numTestSamples, 0, 1, seed) + } +} + +func TestNonStandardNormalValues(t *testing.T) { + sdmax := 1000.0 + mmax := 1000.0 + if testing.Short() { + sdmax = 5 + mmax = 5 + } + for sd := 0.5; sd < sdmax; sd *= 2 { + for m := 0.5; m < mmax; m *= 2 { + for _, seed := range testSeeds { + testNormalDistribution(t, numTestSamples, m, sd, seed) + if testing.Short() { + break + } + } + } + } +} + +// +// Exponential distribution tests +// + +func generateExponentialSamples(nsamples int, rate float64, seed int64) []float64 { + r := New(NewSource(seed)) + samples := make([]float64, nsamples) + for i := range samples { + samples[i] = r.ExpFloat64() / rate + } + return samples +} + +func testExponentialDistribution(t *testing.T, nsamples int, rate float64, seed int64) { + //fmt.Printf("testing nsamples=%v rate=%v seed=%v\n", nsamples, rate, seed); + + mean := 1 / rate + stddev := mean + + samples := generateExponentialSamples(nsamples, rate, seed) + errorScale := max(1.0, 1/rate) // Error scales with the inverse of the rate + expected := &statsResults{mean, stddev, 0.10 * errorScale, 0.20 * errorScale} + + // Make sure that the entire set matches the expected distribution. + checkSampleDistribution(t, samples, expected) + + // Make sure that each half of the set matches the expected distribution. + checkSampleSliceDistributions(t, samples, 2, expected) + + // Make sure that each 7th of the set matches the expected distribution. + checkSampleSliceDistributions(t, samples, 7, expected) +} + +// Actual tests + +func TestStandardExponentialValues(t *testing.T) { + for _, seed := range testSeeds { + testExponentialDistribution(t, numTestSamples, 1, seed) + } +} + +func TestNonStandardExponentialValues(t *testing.T) { + for rate := 0.05; rate < 10; rate *= 2 { + for _, seed := range testSeeds { + testExponentialDistribution(t, numTestSamples, rate, seed) + if testing.Short() { + break + } + } + } +} + +// +// Table generation tests +// + +func initNorm() (testKn []uint32, testWn, testFn []float32) { + const m1 = 1 << 31 + var ( + dn float64 = rn + tn = dn + vn float64 = 9.91256303526217e-3 + ) + + testKn = make([]uint32, 128) + testWn = make([]float32, 128) + testFn = make([]float32, 128) + + q := vn / math.Exp(-0.5*dn*dn) + testKn[0] = uint32((dn / q) * m1) + testKn[1] = 0 + testWn[0] = float32(q / m1) + testWn[127] = float32(dn / m1) + testFn[0] = 1.0 + testFn[127] = float32(math.Exp(-0.5 * dn * dn)) + for i := 126; i >= 1; i-- { + dn = math.Sqrt(-2.0 * math.Log(vn/dn+math.Exp(-0.5*dn*dn))) + testKn[i+1] = uint32((dn / tn) * m1) + tn = dn + testFn[i] = float32(math.Exp(-0.5 * dn * dn)) + testWn[i] = float32(dn / m1) + } + return +} + +func initExp() (testKe []uint32, testWe, testFe []float32) { + const m2 = 1 << 32 + var ( + de float64 = re + te = de + ve float64 = 3.9496598225815571993e-3 + ) + + testKe = make([]uint32, 256) + testWe = make([]float32, 256) + testFe = make([]float32, 256) + + q := ve / math.Exp(-de) + testKe[0] = uint32((de / q) * m2) + testKe[1] = 0 + testWe[0] = float32(q / m2) + testWe[255] = float32(de / m2) + testFe[0] = 1.0 + testFe[255] = float32(math.Exp(-de)) + for i := 254; i >= 1; i-- { + de = -math.Log(ve/de + math.Exp(-de)) + testKe[i+1] = uint32((de / te) * m2) + te = de + testFe[i] = float32(math.Exp(-de)) + testWe[i] = float32(de / m2) + } + return +} + +// compareUint32Slices returns the first index where the two slices +// disagree, or <0 if the lengths are the same and all elements +// are identical. +func compareUint32Slices(s1, s2 []uint32) int { + if len(s1) != len(s2) { + if len(s1) > len(s2) { + return len(s2) + 1 + } + return len(s1) + 1 + } + for i := range s1 { + if s1[i] != s2[i] { + return i + } + } + return -1 +} + +// compareFloat32Slices returns the first index where the two slices +// disagree, or <0 if the lengths are the same and all elements +// are identical. +func compareFloat32Slices(s1, s2 []float32) int { + if len(s1) != len(s2) { + if len(s1) > len(s2) { + return len(s2) + 1 + } + return len(s1) + 1 + } + for i := range s1 { + if !nearEqual(float64(s1[i]), float64(s2[i]), 0, 1e-7) { + return i + } + } + return -1 +} + +func TestNormTables(t *testing.T) { + testKn, testWn, testFn := initNorm() + if i := compareUint32Slices(kn[0:], testKn); i >= 0 { + t.Errorf("kn disagrees at index %v; %v != %v", i, kn[i], testKn[i]) + } + if i := compareFloat32Slices(wn[0:], testWn); i >= 0 { + t.Errorf("wn disagrees at index %v; %v != %v", i, wn[i], testWn[i]) + } + if i := compareFloat32Slices(fn[0:], testFn); i >= 0 { + t.Errorf("fn disagrees at index %v; %v != %v", i, fn[i], testFn[i]) + } +} + +func TestExpTables(t *testing.T) { + testKe, testWe, testFe := initExp() + if i := compareUint32Slices(ke[0:], testKe); i >= 0 { + t.Errorf("ke disagrees at index %v; %v != %v", i, ke[i], testKe[i]) + } + if i := compareFloat32Slices(we[0:], testWe); i >= 0 { + t.Errorf("we disagrees at index %v; %v != %v", i, we[i], testWe[i]) + } + if i := compareFloat32Slices(fe[0:], testFe); i >= 0 { + t.Errorf("fe disagrees at index %v; %v != %v", i, fe[i], testFe[i]) + } +} + +func hasSlowFloatingPoint() bool { + switch runtime.GOARCH { + case "arm": + return os.Getenv("GOARM") == "5" + case "mips", "mipsle", "mips64", "mips64le": + // Be conservative and assume that all mips boards + // have emulated floating point. + // TODO: detect what it actually has. + return true + } + return false +} + +func TestFloat32(t *testing.T) { + // For issue 6721, the problem came after 7533753 calls, so check 10e6. + num := int(10e6) + // But do the full amount only on builders (not locally). + // But ARM5 floating point emulation is slow (Issue 10749), so + // do less for that builder: + if testing.Short() && (testenv.Builder() == "" || hasSlowFloatingPoint()) { + num /= 100 // 1.72 seconds instead of 172 seconds + } + + r := New(NewSource(1)) + for ct := 0; ct < num; ct++ { + f := r.Float32() + if f >= 1 { + t.Fatal("Float32() should be in range [0,1). ct:", ct, "f:", f) + } + } +} + +func testReadUniformity(t *testing.T, n int, seed int64) { + r := New(NewSource(seed)) + buf := make([]byte, n) + nRead, err := r.Read(buf) + if err != nil { + t.Errorf("Read err %v", err) + } + if nRead != n { + t.Errorf("Read returned unexpected n; %d != %d", nRead, n) + } + + // Expect a uniform distribution of byte values, which lie in [0, 255]. + var ( + mean = 255.0 / 2 + stddev = 256.0 / math.Sqrt(12.0) + errorScale = stddev / math.Sqrt(float64(n)) + ) + + expected := &statsResults{mean, stddev, 0.10 * errorScale, 0.08 * errorScale} + + // Cast bytes as floats to use the common distribution-validity checks. + samples := make([]float64, n) + for i, val := range buf { + samples[i] = float64(val) + } + // Make sure that the entire set matches the expected distribution. + checkSampleDistribution(t, samples, expected) +} + +func TestReadUniformity(t *testing.T) { + testBufferSizes := []int{ + 2, 4, 7, 64, 1024, 1 << 16, 1 << 20, + } + for _, seed := range testSeeds { + for _, n := range testBufferSizes { + testReadUniformity(t, n, seed) + } + } +} + +func TestReadEmpty(t *testing.T) { + r := New(NewSource(1)) + buf := make([]byte, 0) + n, err := r.Read(buf) + if err != nil { + t.Errorf("Read err into empty buffer; %v", err) + } + if n != 0 { + t.Errorf("Read into empty buffer returned unexpected n of %d", n) + } +} + +func TestReadByOneByte(t *testing.T) { + r := New(NewSource(1)) + b1 := make([]byte, 100) + _, err := io.ReadFull(iotest.OneByteReader(r), b1) + if err != nil { + t.Errorf("read by one byte: %v", err) + } + r = New(NewSource(1)) + b2 := make([]byte, 100) + _, err = r.Read(b2) + if err != nil { + t.Errorf("read: %v", err) + } + if !bytes.Equal(b1, b2) { + t.Errorf("read by one byte vs single read:\n%x\n%x", b1, b2) + } +} + +func TestReadSeedReset(t *testing.T) { + r := New(NewSource(42)) + b1 := make([]byte, 128) + _, err := r.Read(b1) + if err != nil { + t.Errorf("read: %v", err) + } + r.Seed(42) + b2 := make([]byte, 128) + _, err = r.Read(b2) + if err != nil { + t.Errorf("read: %v", err) + } + if !bytes.Equal(b1, b2) { + t.Errorf("mismatch after re-seed:\n%x\n%x", b1, b2) + } +} + +func TestShuffleSmall(t *testing.T) { + // Check that Shuffle allows n=0 and n=1, but that swap is never called for them. + r := New(NewSource(1)) + for n := 0; n <= 1; n++ { + r.Shuffle(n, func(i, j int) { t.Fatalf("swap called, n=%d i=%d j=%d", n, i, j) }) + } +} + +// encodePerm converts from a permuted slice of length n, such as Perm generates, to an int in [0, n!). +// See https://en.wikipedia.org/wiki/Lehmer_code. +// encodePerm modifies the input slice. +func encodePerm(s []int) int { + // Convert to Lehmer code. + for i, x := range s { + r := s[i+1:] + for j, y := range r { + if y > x { + r[j]-- + } + } + } + // Convert to int in [0, n!). + m := 0 + fact := 1 + for i := len(s) - 1; i >= 0; i-- { + m += s[i] * fact + fact *= len(s) - i + } + return m +} + +// TestUniformFactorial tests several ways of generating a uniform value in [0, n!). +func TestUniformFactorial(t *testing.T) { + r := New(NewSource(testSeeds[0])) + top := 6 + if testing.Short() { + top = 3 + } + for n := 3; n <= top; n++ { + t.Run(fmt.Sprintf("n=%d", n), func(t *testing.T) { + // Calculate n!. + nfact := 1 + for i := 2; i <= n; i++ { + nfact *= i + } + + // Test a few different ways to generate a uniform distribution. + p := make([]int, n) // re-usable slice for Shuffle generator + tests := [...]struct { + name string + fn func() int + }{ + {name: "Int31n", fn: func() int { return int(r.Int31n(int32(nfact))) }}, + {name: "int31n", fn: func() int { return int(Int31nForTest(r, int32(nfact))) }}, + {name: "Perm", fn: func() int { return encodePerm(r.Perm(n)) }}, + {name: "Shuffle", fn: func() int { + // Generate permutation using Shuffle. + for i := range p { + p[i] = i + } + r.Shuffle(n, func(i, j int) { p[i], p[j] = p[j], p[i] }) + return encodePerm(p) + }}, + } + + for _, test := range tests { + t.Run(test.name, func(t *testing.T) { + // Gather chi-squared values and check that they follow + // the expected normal distribution given n!-1 degrees of freedom. + // See https://en.wikipedia.org/wiki/Pearson%27s_chi-squared_test and + // https://www.johndcook.com/Beautiful_Testing_ch10.pdf. + nsamples := 10 * nfact + if nsamples < 200 { + nsamples = 200 + } + samples := make([]float64, nsamples) + for i := range samples { + // Generate some uniformly distributed values and count their occurrences. + const iters = 1000 + counts := make([]int, nfact) + for i := 0; i < iters; i++ { + counts[test.fn()]++ + } + // Calculate chi-squared and add to samples. + want := iters / float64(nfact) + var χ2 float64 + for _, have := range counts { + err := float64(have) - want + χ2 += err * err + } + χ2 /= want + samples[i] = χ2 + } + + // Check that our samples approximate the appropriate normal distribution. + dof := float64(nfact - 1) + expected := &statsResults{mean: dof, stddev: math.Sqrt(2 * dof)} + errorScale := max(1.0, expected.stddev) + expected.closeEnough = 0.10 * errorScale + expected.maxError = 0.08 // TODO: What is the right value here? See issue 21211. + checkSampleDistribution(t, samples, expected) + }) + } + }) + } +} + +// Benchmarks + +func BenchmarkInt63Threadsafe(b *testing.B) { + for n := b.N; n > 0; n-- { + Int63() + } +} + +func BenchmarkInt63ThreadsafeParallel(b *testing.B) { + b.RunParallel(func(pb *testing.PB) { + for pb.Next() { + Int63() + } + }) +} + +func BenchmarkInt63Unthreadsafe(b *testing.B) { + r := New(NewSource(1)) + for n := b.N; n > 0; n-- { + r.Int63() + } +} + +func BenchmarkIntn1000(b *testing.B) { + r := New(NewSource(1)) + for n := b.N; n > 0; n-- { + r.Intn(1000) + } +} + +func BenchmarkInt63n1000(b *testing.B) { + r := New(NewSource(1)) + for n := b.N; n > 0; n-- { + r.Int63n(1000) + } +} + +func BenchmarkInt31n1000(b *testing.B) { + r := New(NewSource(1)) + for n := b.N; n > 0; n-- { + r.Int31n(1000) + } +} + +func BenchmarkFloat32(b *testing.B) { + r := New(NewSource(1)) + for n := b.N; n > 0; n-- { + r.Float32() + } +} + +func BenchmarkFloat64(b *testing.B) { + r := New(NewSource(1)) + for n := b.N; n > 0; n-- { + r.Float64() + } +} + +func BenchmarkPerm3(b *testing.B) { + r := New(NewSource(1)) + for n := b.N; n > 0; n-- { + r.Perm(3) + } +} + +func BenchmarkPerm30(b *testing.B) { + r := New(NewSource(1)) + for n := b.N; n > 0; n-- { + r.Perm(30) + } +} + +func BenchmarkPerm30ViaShuffle(b *testing.B) { + r := New(NewSource(1)) + for n := b.N; n > 0; n-- { + p := make([]int, 30) + for i := range p { + p[i] = i + } + r.Shuffle(30, func(i, j int) { p[i], p[j] = p[j], p[i] }) + } +} + +// BenchmarkShuffleOverhead uses a minimal swap function +// to measure just the shuffling overhead. +func BenchmarkShuffleOverhead(b *testing.B) { + r := New(NewSource(1)) + for n := b.N; n > 0; n-- { + r.Shuffle(52, func(i, j int) { + if i < 0 || i >= 52 || j < 0 || j >= 52 { + b.Fatalf("bad swap(%d, %d)", i, j) + } + }) + } +} + +func BenchmarkRead3(b *testing.B) { + r := New(NewSource(1)) + buf := make([]byte, 3) + b.ResetTimer() + for n := b.N; n > 0; n-- { + r.Read(buf) + } +} + +func BenchmarkRead64(b *testing.B) { + r := New(NewSource(1)) + buf := make([]byte, 64) + b.ResetTimer() + for n := b.N; n > 0; n-- { + r.Read(buf) + } +} + +func BenchmarkRead1000(b *testing.B) { + r := New(NewSource(1)) + buf := make([]byte, 1000) + b.ResetTimer() + for n := b.N; n > 0; n-- { + r.Read(buf) + } +} diff --git a/src/math/rand/regress_test.go b/src/math/rand/regress_test.go new file mode 100644 index 0000000..813098e --- /dev/null +++ b/src/math/rand/regress_test.go @@ -0,0 +1,404 @@ +// Copyright 2014 The Go Authors. All rights reserved. +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. + +// Test that random number sequences generated by a specific seed +// do not change from version to version. +// +// Do NOT make changes to the golden outputs. If bugs need to be fixed +// in the underlying code, find ways to fix them that do not affect the +// outputs. + +package rand_test + +import ( + "flag" + "fmt" + . "math/rand" + "reflect" + "testing" +) + +var printgolden = flag.Bool("printgolden", false, "print golden results for regression test") + +func TestRegress(t *testing.T) { + var int32s = []int32{1, 10, 32, 1 << 20, 1<<20 + 1, 1000000000, 1 << 30, 1<<31 - 2, 1<<31 - 1} + var int64s = []int64{1, 10, 32, 1 << 20, 1<<20 + 1, 1000000000, 1 << 30, 1<<31 - 2, 1<<31 - 1, 1000000000000000000, 1 << 60, 1<<63 - 2, 1<<63 - 1} + var permSizes = []int{0, 1, 5, 8, 9, 10, 16} + var readBufferSizes = []int{1, 7, 8, 9, 10} + r := New(NewSource(0)) + + rv := reflect.ValueOf(r) + n := rv.NumMethod() + p := 0 + if *printgolden { + fmt.Printf("var regressGolden = []interface{}{\n") + } + for i := 0; i < n; i++ { + m := rv.Type().Method(i) + mv := rv.Method(i) + mt := mv.Type() + if mt.NumOut() == 0 { + continue + } + r.Seed(0) + for repeat := 0; repeat < 20; repeat++ { + var args []reflect.Value + var argstr string + if mt.NumIn() == 1 { + var x any + switch mt.In(0).Kind() { + default: + t.Fatalf("unexpected argument type for r.%s", m.Name) + + case reflect.Int: + if m.Name == "Perm" { + x = permSizes[repeat%len(permSizes)] + break + } + big := int64s[repeat%len(int64s)] + if int64(int(big)) != big { + r.Int63n(big) // what would happen on 64-bit machine, to keep stream in sync + if *printgolden { + fmt.Printf("\tskipped, // must run printgolden on 64-bit machine\n") + } + p++ + continue + } + x = int(big) + + case reflect.Int32: + x = int32s[repeat%len(int32s)] + + case reflect.Int64: + x = int64s[repeat%len(int64s)] + + case reflect.Slice: + if m.Name == "Read" { + n := readBufferSizes[repeat%len(readBufferSizes)] + x = make([]byte, n) + } + } + argstr = fmt.Sprint(x) + args = append(args, reflect.ValueOf(x)) + } + + var out any + out = mv.Call(args)[0].Interface() + if m.Name == "Int" || m.Name == "Intn" { + out = int64(out.(int)) + } + if m.Name == "Read" { + out = args[0].Interface().([]byte) + } + if *printgolden { + var val string + big := int64(1 << 60) + if int64(int(big)) != big && (m.Name == "Int" || m.Name == "Intn") { + // 32-bit machine cannot print 64-bit results + val = "truncated" + } else if reflect.TypeOf(out).Kind() == reflect.Slice { + val = fmt.Sprintf("%#v", out) + } else { + val = fmt.Sprintf("%T(%v)", out, out) + } + fmt.Printf("\t%s, // %s(%s)\n", val, m.Name, argstr) + } else { + want := regressGolden[p] + if m.Name == "Int" { + want = int64(int(uint(want.(int64)) << 1 >> 1)) + } + if !reflect.DeepEqual(out, want) { + t.Errorf("r.%s(%s) = %v, want %v", m.Name, argstr, out, want) + } + } + p++ + } + } + if *printgolden { + fmt.Printf("}\n") + } +} + +var regressGolden = []any{ + float64(4.668112973579268), // ExpFloat64() + float64(0.1601593871172866), // ExpFloat64() + float64(3.0465834105636), // ExpFloat64() + float64(0.06385839451671879), // ExpFloat64() + float64(1.8578917487258961), // ExpFloat64() + float64(0.784676123472182), // ExpFloat64() + float64(0.11225477361256932), // ExpFloat64() + float64(0.20173283329802255), // ExpFloat64() + float64(0.3468619496201105), // ExpFloat64() + float64(0.35601103454384536), // ExpFloat64() + float64(0.888376329507869), // ExpFloat64() + float64(1.4081362450365698), // ExpFloat64() + float64(1.0077753823151994), // ExpFloat64() + float64(0.23594100766227588), // ExpFloat64() + float64(2.777245612300007), // ExpFloat64() + float64(0.5202997830662377), // ExpFloat64() + float64(1.2842705247770294), // ExpFloat64() + float64(0.030307408362776206), // ExpFloat64() + float64(2.204156824853721), // ExpFloat64() + float64(2.09891923895058), // ExpFloat64() + float32(0.94519615), // Float32() + float32(0.24496509), // Float32() + float32(0.65595627), // Float32() + float32(0.05434384), // Float32() + float32(0.3675872), // Float32() + float32(0.28948045), // Float32() + float32(0.1924386), // Float32() + float32(0.65533215), // Float32() + float32(0.8971697), // Float32() + float32(0.16735445), // Float32() + float32(0.28858566), // Float32() + float32(0.9026048), // Float32() + float32(0.84978026), // Float32() + float32(0.2730468), // Float32() + float32(0.6090802), // Float32() + float32(0.253656), // Float32() + float32(0.7746542), // Float32() + float32(0.017480763), // Float32() + float32(0.78707397), // Float32() + float32(0.7993937), // Float32() + float64(0.9451961492941164), // Float64() + float64(0.24496508529377975), // Float64() + float64(0.6559562651954052), // Float64() + float64(0.05434383959970039), // Float64() + float64(0.36758720663245853), // Float64() + float64(0.2894804331565928), // Float64() + float64(0.19243860967493215), // Float64() + float64(0.6553321508148324), // Float64() + float64(0.897169713149801), // Float64() + float64(0.16735444255905835), // Float64() + float64(0.2885856518054551), // Float64() + float64(0.9026048462705047), // Float64() + float64(0.8497802817628735), // Float64() + float64(0.2730468047134829), // Float64() + float64(0.6090801919903561), // Float64() + float64(0.25365600644283687), // Float64() + float64(0.7746542391859803), // Float64() + float64(0.017480762156647272), // Float64() + float64(0.7870739563039942), // Float64() + float64(0.7993936979594545), // Float64() + int64(8717895732742165505), // Int() + int64(2259404117704393152), // Int() + int64(6050128673802995827), // Int() + int64(501233450539197794), // Int() + int64(3390393562759376202), // Int() + int64(2669985732393126063), // Int() + int64(1774932891286980153), // Int() + int64(6044372234677422456), // Int() + int64(8274930044578894929), // Int() + int64(1543572285742637646), // Int() + int64(2661732831099943416), // Int() + int64(8325060299420976708), // Int() + int64(7837839688282259259), // Int() + int64(2518412263346885298), // Int() + int64(5617773211005988520), // Int() + int64(2339563716805116249), // Int() + int64(7144924247938981575), // Int() + int64(161231572858529631), // Int() + int64(7259475919510918339), // Int() + int64(7373105480197164748), // Int() + int32(2029793274), // Int31() + int32(526058514), // Int31() + int32(1408655353), // Int31() + int32(116702506), // Int31() + int32(789387515), // Int31() + int32(621654496), // Int31() + int32(413258767), // Int31() + int32(1407315077), // Int31() + int32(1926657288), // Int31() + int32(359390928), // Int31() + int32(619732968), // Int31() + int32(1938329147), // Int31() + int32(1824889259), // Int31() + int32(586363548), // Int31() + int32(1307989752), // Int31() + int32(544722126), // Int31() + int32(1663557311), // Int31() + int32(37539650), // Int31() + int32(1690228450), // Int31() + int32(1716684894), // Int31() + int32(0), // Int31n(1) + int32(4), // Int31n(10) + int32(25), // Int31n(32) + int32(310570), // Int31n(1048576) + int32(857611), // Int31n(1048577) + int32(621654496), // Int31n(1000000000) + int32(413258767), // Int31n(1073741824) + int32(1407315077), // Int31n(2147483646) + int32(1926657288), // Int31n(2147483647) + int32(0), // Int31n(1) + int32(8), // Int31n(10) + int32(27), // Int31n(32) + int32(367019), // Int31n(1048576) + int32(209005), // Int31n(1048577) + int32(307989752), // Int31n(1000000000) + int32(544722126), // Int31n(1073741824) + int32(1663557311), // Int31n(2147483646) + int32(37539650), // Int31n(2147483647) + int32(0), // Int31n(1) + int32(4), // Int31n(10) + int64(8717895732742165505), // Int63() + int64(2259404117704393152), // Int63() + int64(6050128673802995827), // Int63() + int64(501233450539197794), // Int63() + int64(3390393562759376202), // Int63() + int64(2669985732393126063), // Int63() + int64(1774932891286980153), // Int63() + int64(6044372234677422456), // Int63() + int64(8274930044578894929), // Int63() + int64(1543572285742637646), // Int63() + int64(2661732831099943416), // Int63() + int64(8325060299420976708), // Int63() + int64(7837839688282259259), // Int63() + int64(2518412263346885298), // Int63() + int64(5617773211005988520), // Int63() + int64(2339563716805116249), // Int63() + int64(7144924247938981575), // Int63() + int64(161231572858529631), // Int63() + int64(7259475919510918339), // Int63() + int64(7373105480197164748), // Int63() + int64(0), // Int63n(1) + int64(2), // Int63n(10) + int64(19), // Int63n(32) + int64(959842), // Int63n(1048576) + int64(688912), // Int63n(1048577) + int64(393126063), // Int63n(1000000000) + int64(89212473), // Int63n(1073741824) + int64(834026388), // Int63n(2147483646) + int64(1577188963), // Int63n(2147483647) + int64(543572285742637646), // Int63n(1000000000000000000) + int64(355889821886249464), // Int63n(1152921504606846976) + int64(8325060299420976708), // Int63n(9223372036854775806) + int64(7837839688282259259), // Int63n(9223372036854775807) + int64(0), // Int63n(1) + int64(0), // Int63n(10) + int64(25), // Int63n(32) + int64(679623), // Int63n(1048576) + int64(882178), // Int63n(1048577) + int64(510918339), // Int63n(1000000000) + int64(782454476), // Int63n(1073741824) + int64(0), // Intn(1) + int64(4), // Intn(10) + int64(25), // Intn(32) + int64(310570), // Intn(1048576) + int64(857611), // Intn(1048577) + int64(621654496), // Intn(1000000000) + int64(413258767), // Intn(1073741824) + int64(1407315077), // Intn(2147483646) + int64(1926657288), // Intn(2147483647) + int64(543572285742637646), // Intn(1000000000000000000) + int64(355889821886249464), // Intn(1152921504606846976) + int64(8325060299420976708), // Intn(9223372036854775806) + int64(7837839688282259259), // Intn(9223372036854775807) + int64(0), // Intn(1) + int64(2), // Intn(10) + int64(14), // Intn(32) + int64(515775), // Intn(1048576) + int64(839455), // Intn(1048577) + int64(690228450), // Intn(1000000000) + int64(642943070), // Intn(1073741824) + float64(-0.28158587086436215), // NormFloat64() + float64(0.570933095808067), // NormFloat64() + float64(-1.6920196326157044), // NormFloat64() + float64(0.1996229111693099), // NormFloat64() + float64(1.9195199291234621), // NormFloat64() + float64(0.8954838794918353), // NormFloat64() + float64(0.41457072128813166), // NormFloat64() + float64(-0.48700161491544713), // NormFloat64() + float64(-0.1684059662402393), // NormFloat64() + float64(0.37056410998929545), // NormFloat64() + float64(1.0156889027029008), // NormFloat64() + float64(-0.5174422210625114), // NormFloat64() + float64(-0.5565834214413804), // NormFloat64() + float64(0.778320596648391), // NormFloat64() + float64(-1.8970718197702225), // NormFloat64() + float64(0.5229525761688676), // NormFloat64() + float64(-1.5515595563231523), // NormFloat64() + float64(0.0182029289376123), // NormFloat64() + float64(-0.6820951356608795), // NormFloat64() + float64(-0.5987943422687668), // NormFloat64() + []int{}, // Perm(0) + []int{0}, // Perm(1) + []int{0, 4, 1, 3, 2}, // Perm(5) + []int{3, 1, 0, 4, 7, 5, 2, 6}, // Perm(8) + []int{5, 0, 3, 6, 7, 4, 2, 1, 8}, // Perm(9) + []int{4, 5, 0, 2, 6, 9, 3, 1, 8, 7}, // Perm(10) + []int{14, 2, 0, 8, 3, 5, 13, 12, 1, 4, 6, 7, 11, 9, 15, 10}, // Perm(16) + []int{}, // Perm(0) + []int{0}, // Perm(1) + []int{3, 0, 1, 2, 4}, // Perm(5) + []int{5, 1, 2, 0, 4, 7, 3, 6}, // Perm(8) + []int{4, 0, 6, 8, 1, 5, 2, 7, 3}, // Perm(9) + []int{8, 6, 1, 7, 5, 4, 3, 2, 9, 0}, // Perm(10) + []int{0, 3, 13, 2, 15, 4, 10, 1, 8, 14, 7, 6, 12, 9, 5, 11}, // Perm(16) + []int{}, // Perm(0) + []int{0}, // Perm(1) + []int{0, 4, 2, 1, 3}, // Perm(5) + []int{2, 1, 7, 0, 6, 3, 4, 5}, // Perm(8) + []int{8, 7, 5, 3, 4, 6, 0, 1, 2}, // Perm(9) + []int{1, 0, 2, 5, 7, 6, 9, 8, 3, 4}, // Perm(10) + []byte{0x1}, // Read([0]) + []byte{0x94, 0xfd, 0xc2, 0xfa, 0x2f, 0xfc, 0xc0}, // Read([0 0 0 0 0 0 0]) + []byte{0x41, 0xd3, 0xff, 0x12, 0x4, 0x5b, 0x73, 0xc8}, // Read([0 0 0 0 0 0 0 0]) + []byte{0x6e, 0x4f, 0xf9, 0x5f, 0xf6, 0x62, 0xa5, 0xee, 0xe8}, // Read([0 0 0 0 0 0 0 0 0]) + []byte{0x2a, 0xbd, 0xf4, 0x4a, 0x2d, 0xb, 0x75, 0xfb, 0x18, 0xd}, // Read([0 0 0 0 0 0 0 0 0 0]) + []byte{0xaf}, // Read([0]) + []byte{0x48, 0xa7, 0x9e, 0xe0, 0xb1, 0xd, 0x39}, // Read([0 0 0 0 0 0 0]) + []byte{0x46, 0x51, 0x85, 0xf, 0xd4, 0xa1, 0x78, 0x89}, // Read([0 0 0 0 0 0 0 0]) + []byte{0x2e, 0xe2, 0x85, 0xec, 0xe1, 0x51, 0x14, 0x55, 0x78}, // Read([0 0 0 0 0 0 0 0 0]) + []byte{0x8, 0x75, 0xd6, 0x4e, 0xe2, 0xd3, 0xd0, 0xd0, 0xde, 0x6b}, // Read([0 0 0 0 0 0 0 0 0 0]) + []byte{0xf8}, // Read([0]) + []byte{0xf9, 0xb4, 0x4c, 0xe8, 0x5f, 0xf0, 0x44}, // Read([0 0 0 0 0 0 0]) + []byte{0xc6, 0xb1, 0xf8, 0x3b, 0x8e, 0x88, 0x3b, 0xbf}, // Read([0 0 0 0 0 0 0 0]) + []byte{0x85, 0x7a, 0xab, 0x99, 0xc5, 0xb2, 0x52, 0xc7, 0x42}, // Read([0 0 0 0 0 0 0 0 0]) + []byte{0x9c, 0x32, 0xf3, 0xa8, 0xae, 0xb7, 0x9e, 0xf8, 0x56, 0xf6}, // Read([0 0 0 0 0 0 0 0 0 0]) + []byte{0x59}, // Read([0]) + []byte{0xc1, 0x8f, 0xd, 0xce, 0xcc, 0x77, 0xc7}, // Read([0 0 0 0 0 0 0]) + []byte{0x5e, 0x7a, 0x81, 0xbf, 0xde, 0x27, 0x5f, 0x67}, // Read([0 0 0 0 0 0 0 0]) + []byte{0xcf, 0xe2, 0x42, 0xcf, 0x3c, 0xc3, 0x54, 0xf3, 0xed}, // Read([0 0 0 0 0 0 0 0 0]) + []byte{0xe2, 0xd6, 0xbe, 0xcc, 0x4e, 0xa3, 0xae, 0x5e, 0x88, 0x52}, // Read([0 0 0 0 0 0 0 0 0 0]) + uint32(4059586549), // Uint32() + uint32(1052117029), // Uint32() + uint32(2817310706), // Uint32() + uint32(233405013), // Uint32() + uint32(1578775030), // Uint32() + uint32(1243308993), // Uint32() + uint32(826517535), // Uint32() + uint32(2814630155), // Uint32() + uint32(3853314576), // Uint32() + uint32(718781857), // Uint32() + uint32(1239465936), // Uint32() + uint32(3876658295), // Uint32() + uint32(3649778518), // Uint32() + uint32(1172727096), // Uint32() + uint32(2615979505), // Uint32() + uint32(1089444252), // Uint32() + uint32(3327114623), // Uint32() + uint32(75079301), // Uint32() + uint32(3380456901), // Uint32() + uint32(3433369789), // Uint32() + uint64(8717895732742165505), // Uint64() + uint64(2259404117704393152), // Uint64() + uint64(6050128673802995827), // Uint64() + uint64(9724605487393973602), // Uint64() + uint64(12613765599614152010), // Uint64() + uint64(11893357769247901871), // Uint64() + uint64(1774932891286980153), // Uint64() + uint64(15267744271532198264), // Uint64() + uint64(17498302081433670737), // Uint64() + uint64(1543572285742637646), // Uint64() + uint64(11885104867954719224), // Uint64() + uint64(17548432336275752516), // Uint64() + uint64(7837839688282259259), // Uint64() + uint64(2518412263346885298), // Uint64() + uint64(5617773211005988520), // Uint64() + uint64(11562935753659892057), // Uint64() + uint64(16368296284793757383), // Uint64() + uint64(161231572858529631), // Uint64() + uint64(16482847956365694147), // Uint64() + uint64(16596477517051940556), // Uint64() +} diff --git a/src/math/rand/rng.go b/src/math/rand/rng.go new file mode 100644 index 0000000..f305df1 --- /dev/null +++ b/src/math/rand/rng.go @@ -0,0 +1,252 @@ +// Copyright 2009 The Go Authors. All rights reserved. +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. + +package rand + +/* + * Uniform distribution + * + * algorithm by + * DP Mitchell and JA Reeds + */ + +const ( + rngLen = 607 + rngTap = 273 + rngMax = 1 << 63 + rngMask = rngMax - 1 + int32max = (1 << 31) - 1 +) + +var ( + // rngCooked used for seeding. See gen_cooked.go for details. + rngCooked [rngLen]int64 = [...]int64{ + -4181792142133755926, -4576982950128230565, 1395769623340756751, 5333664234075297259, + -6347679516498800754, 9033628115061424579, 7143218595135194537, 4812947590706362721, + 7937252194349799378, 5307299880338848416, 8209348851763925077, -7107630437535961764, + 4593015457530856296, 8140875735541888011, -5903942795589686782, -603556388664454774, + -7496297993371156308, 113108499721038619, 4569519971459345583, -4160538177779461077, + -6835753265595711384, -6507240692498089696, 6559392774825876886, 7650093201692370310, + 7684323884043752161, -8965504200858744418, -2629915517445760644, 271327514973697897, + -6433985589514657524, 1065192797246149621, 3344507881999356393, -4763574095074709175, + 7465081662728599889, 1014950805555097187, -4773931307508785033, -5742262670416273165, + 2418672789110888383, 5796562887576294778, 4484266064449540171, 3738982361971787048, + -4699774852342421385, 10530508058128498, -589538253572429690, -6598062107225984180, + 8660405965245884302, 10162832508971942, -2682657355892958417, 7031802312784620857, + 6240911277345944669, 831864355460801054, -1218937899312622917, 2116287251661052151, + 2202309800992166967, 9161020366945053561, 4069299552407763864, 4936383537992622449, + 457351505131524928, -8881176990926596454, -6375600354038175299, -7155351920868399290, + 4368649989588021065, 887231587095185257, -3659780529968199312, -2407146836602825512, + 5616972787034086048, -751562733459939242, 1686575021641186857, -5177887698780513806, + -4979215821652996885, -1375154703071198421, 5632136521049761902, -8390088894796940536, + -193645528485698615, -5979788902190688516, -4907000935050298721, -285522056888777828, + -2776431630044341707, 1679342092332374735, 6050638460742422078, -2229851317345194226, + -1582494184340482199, 5881353426285907985, 812786550756860885, 4541845584483343330, + -6497901820577766722, 4980675660146853729, -4012602956251539747, -329088717864244987, + -2896929232104691526, 1495812843684243920, -2153620458055647789, 7370257291860230865, + -2466442761497833547, 4706794511633873654, -1398851569026877145, 8549875090542453214, + -9189721207376179652, -7894453601103453165, 7297902601803624459, 1011190183918857495, + -6985347000036920864, 5147159997473910359, -8326859945294252826, 2659470849286379941, + 6097729358393448602, -7491646050550022124, -5117116194870963097, -896216826133240300, + -745860416168701406, 5803876044675762232, -787954255994554146, -3234519180203704564, + -4507534739750823898, -1657200065590290694, 505808562678895611, -4153273856159712438, + -8381261370078904295, 572156825025677802, 1791881013492340891, 3393267094866038768, + -5444650186382539299, 2352769483186201278, -7930912453007408350, -325464993179687389, + -3441562999710612272, -6489413242825283295, 5092019688680754699, -227247482082248967, + 4234737173186232084, 5027558287275472836, 4635198586344772304, -536033143587636457, + 5907508150730407386, -8438615781380831356, 972392927514829904, -3801314342046600696, + -4064951393885491917, -174840358296132583, 2407211146698877100, -1640089820333676239, + 3940796514530962282, -5882197405809569433, 3095313889586102949, -1818050141166537098, + 5832080132947175283, 7890064875145919662, 8184139210799583195, -8073512175445549678, + -7758774793014564506, -4581724029666783935, 3516491885471466898, -8267083515063118116, + 6657089965014657519, 5220884358887979358, 1796677326474620641, 5340761970648932916, + 1147977171614181568, 5066037465548252321, 2574765911837859848, 1085848279845204775, + -5873264506986385449, 6116438694366558490, 2107701075971293812, -7420077970933506541, + 2469478054175558874, -1855128755834809824, -5431463669011098282, -9038325065738319171, + -6966276280341336160, 7217693971077460129, -8314322083775271549, 7196649268545224266, + -3585711691453906209, -5267827091426810625, 8057528650917418961, -5084103596553648165, + -2601445448341207749, -7850010900052094367, 6527366231383600011, 3507654575162700890, + 9202058512774729859, 1954818376891585542, -2582991129724600103, 8299563319178235687, + -5321504681635821435, 7046310742295574065, -2376176645520785576, -7650733936335907755, + 8850422670118399721, 3631909142291992901, 5158881091950831288, -6340413719511654215, + 4763258931815816403, 6280052734341785344, -4979582628649810958, 2043464728020827976, + -2678071570832690343, 4562580375758598164, 5495451168795427352, -7485059175264624713, + 553004618757816492, 6895160632757959823, -989748114590090637, 7139506338801360852, + -672480814466784139, 5535668688139305547, 2430933853350256242, -3821430778991574732, + -1063731997747047009, -3065878205254005442, 7632066283658143750, 6308328381617103346, + 3681878764086140361, 3289686137190109749, 6587997200611086848, 244714774258135476, + -5143583659437639708, 8090302575944624335, 2945117363431356361, -8359047641006034763, + 3009039260312620700, -793344576772241777, 401084700045993341, -1968749590416080887, + 4707864159563588614, -3583123505891281857, -3240864324164777915, -5908273794572565703, + -3719524458082857382, -5281400669679581926, 8118566580304798074, 3839261274019871296, + 7062410411742090847, -8481991033874568140, 6027994129690250817, -6725542042704711878, + -2971981702428546974, -7854441788951256975, 8809096399316380241, 6492004350391900708, + 2462145737463489636, -8818543617934476634, -5070345602623085213, -8961586321599299868, + -3758656652254704451, -8630661632476012791, 6764129236657751224, -709716318315418359, + -3403028373052861600, -8838073512170985897, -3999237033416576341, -2920240395515973663, + -2073249475545404416, 368107899140673753, -6108185202296464250, -6307735683270494757, + 4782583894627718279, 6718292300699989587, 8387085186914375220, 3387513132024756289, + 4654329375432538231, -292704475491394206, -3848998599978456535, 7623042350483453954, + 7725442901813263321, 9186225467561587250, -5132344747257272453, -6865740430362196008, + 2530936820058611833, 1636551876240043639, -3658707362519810009, 1452244145334316253, + -7161729655835084979, -7943791770359481772, 9108481583171221009, -3200093350120725999, + 5007630032676973346, 2153168792952589781, 6720334534964750538, -3181825545719981703, + 3433922409283786309, 2285479922797300912, 3110614940896576130, -2856812446131932915, + -3804580617188639299, 7163298419643543757, 4891138053923696990, 580618510277907015, + 1684034065251686769, 4429514767357295841, -8893025458299325803, -8103734041042601133, + 7177515271653460134, 4589042248470800257, -1530083407795771245, 143607045258444228, + 246994305896273627, -8356954712051676521, 6473547110565816071, 3092379936208876896, + 2058427839513754051, -4089587328327907870, 8785882556301281247, -3074039370013608197, + -637529855400303673, 6137678347805511274, -7152924852417805802, 5708223427705576541, + -3223714144396531304, 4358391411789012426, 325123008708389849, 6837621693887290924, + 4843721905315627004, -3212720814705499393, -3825019837890901156, 4602025990114250980, + 1044646352569048800, 9106614159853161675, -8394115921626182539, -4304087667751778808, + 2681532557646850893, 3681559472488511871, -3915372517896561773, -2889241648411946534, + -6564663803938238204, -8060058171802589521, 581945337509520675, 3648778920718647903, + -4799698790548231394, -7602572252857820065, 220828013409515943, -1072987336855386047, + 4287360518296753003, -4633371852008891965, 5513660857261085186, -2258542936462001533, + -8744380348503999773, 8746140185685648781, 228500091334420247, 1356187007457302238, + 3019253992034194581, 3152601605678500003, -8793219284148773595, 5559581553696971176, + 4916432985369275664, -8559797105120221417, -5802598197927043732, 2868348622579915573, + -7224052902810357288, -5894682518218493085, 2587672709781371173, -7706116723325376475, + 3092343956317362483, -5561119517847711700, 972445599196498113, -1558506600978816441, + 1708913533482282562, -2305554874185907314, -6005743014309462908, -6653329009633068701, + -483583197311151195, 2488075924621352812, -4529369641467339140, -4663743555056261452, + 2997203966153298104, 1282559373026354493, 240113143146674385, 8665713329246516443, + 628141331766346752, -4651421219668005332, -7750560848702540400, 7596648026010355826, + -3132152619100351065, 7834161864828164065, 7103445518877254909, 4390861237357459201, + -4780718172614204074, -319889632007444440, 622261699494173647, -3186110786557562560, + -8718967088789066690, -1948156510637662747, -8212195255998774408, -7028621931231314745, + 2623071828615234808, -4066058308780939700, -5484966924888173764, -6683604512778046238, + -6756087640505506466, 5256026990536851868, 7841086888628396109, 6640857538655893162, + -8021284697816458310, -7109857044414059830, -1689021141511844405, -4298087301956291063, + -4077748265377282003, -998231156719803476, 2719520354384050532, 9132346697815513771, + 4332154495710163773, -2085582442760428892, 6994721091344268833, -2556143461985726874, + -8567931991128098309, 59934747298466858, -3098398008776739403, -265597256199410390, + 2332206071942466437, -7522315324568406181, 3154897383618636503, -7585605855467168281, + -6762850759087199275, 197309393502684135, -8579694182469508493, 2543179307861934850, + 4350769010207485119, -4468719947444108136, -7207776534213261296, -1224312577878317200, + 4287946071480840813, 8362686366770308971, 6486469209321732151, -5605644191012979782, + -1669018511020473564, 4450022655153542367, -7618176296641240059, -3896357471549267421, + -4596796223304447488, -6531150016257070659, -8982326463137525940, -4125325062227681798, + -1306489741394045544, -8338554946557245229, 5329160409530630596, 7790979528857726136, + 4955070238059373407, -4304834761432101506, -6215295852904371179, 3007769226071157901, + -6753025801236972788, 8928702772696731736, 7856187920214445904, -4748497451462800923, + 7900176660600710914, -7082800908938549136, -6797926979589575837, -6737316883512927978, + 4186670094382025798, 1883939007446035042, -414705992779907823, 3734134241178479257, + 4065968871360089196, 6953124200385847784, -7917685222115876751, -7585632937840318161, + -5567246375906782599, -5256612402221608788, 3106378204088556331, -2894472214076325998, + 4565385105440252958, 1979884289539493806, -6891578849933910383, 3783206694208922581, + 8464961209802336085, 2843963751609577687, 3030678195484896323, -4429654462759003204, + 4459239494808162889, 402587895800087237, 8057891408711167515, 4541888170938985079, + 1042662272908816815, -3666068979732206850, 2647678726283249984, 2144477441549833761, + -3417019821499388721, -2105601033380872185, 5916597177708541638, -8760774321402454447, + 8833658097025758785, 5970273481425315300, 563813119381731307, -6455022486202078793, + 1598828206250873866, -4016978389451217698, -2988328551145513985, -6071154634840136312, + 8469693267274066490, 125672920241807416, -3912292412830714870, -2559617104544284221, + -486523741806024092, -4735332261862713930, 5923302823487327109, -9082480245771672572, + -1808429243461201518, 7990420780896957397, 4317817392807076702, 3625184369705367340, + -6482649271566653105, -3480272027152017464, -3225473396345736649, -368878695502291645, + -3981164001421868007, -8522033136963788610, 7609280429197514109, 3020985755112334161, + -2572049329799262942, 2635195723621160615, 5144520864246028816, -8188285521126945980, + 1567242097116389047, 8172389260191636581, -2885551685425483535, -7060359469858316883, + -6480181133964513127, -7317004403633452381, 6011544915663598137, 5932255307352610768, + 2241128460406315459, -8327867140638080220, 3094483003111372717, 4583857460292963101, + 9079887171656594975, -384082854924064405, -3460631649611717935, 4225072055348026230, + -7385151438465742745, 3801620336801580414, -399845416774701952, -7446754431269675473, + 7899055018877642622, 5421679761463003041, 5521102963086275121, -4975092593295409910, + 8735487530905098534, -7462844945281082830, -2080886987197029914, -1000715163927557685, + -4253840471931071485, -5828896094657903328, 6424174453260338141, 359248545074932887, + -5949720754023045210, -2426265837057637212, 3030918217665093212, -9077771202237461772, + -3186796180789149575, 740416251634527158, -2142944401404840226, 6951781370868335478, + 399922722363687927, -8928469722407522623, -1378421100515597285, -8343051178220066766, + -3030716356046100229, -8811767350470065420, 9026808440365124461, 6440783557497587732, + 4615674634722404292, 539897290441580544, 2096238225866883852, 8751955639408182687, + -7316147128802486205, 7381039757301768559, 6157238513393239656, -1473377804940618233, + 8629571604380892756, 5280433031239081479, 7101611890139813254, 2479018537985767835, + 7169176924412769570, -1281305539061572506, -7865612307799218120, 2278447439451174845, + 3625338785743880657, 6477479539006708521, 8976185375579272206, -3712000482142939688, + 1326024180520890843, 7537449876596048829, 5464680203499696154, 3189671183162196045, + 6346751753565857109, -8982212049534145501, -6127578587196093755, -245039190118465649, + -6320577374581628592, 7208698530190629697, 7276901792339343736, -7490986807540332668, + 4133292154170828382, 2918308698224194548, -7703910638917631350, -3929437324238184044, + -4300543082831323144, -6344160503358350167, 5896236396443472108, -758328221503023383, + -1894351639983151068, -307900319840287220, -6278469401177312761, -2171292963361310674, + 8382142935188824023, 9103922860780351547, 4152330101494654406, + } +) + +type rngSource struct { + tap int // index into vec + feed int // index into vec + vec [rngLen]int64 // current feedback register +} + +// seed rng x[n+1] = 48271 * x[n] mod (2**31 - 1) +func seedrand(x int32) int32 { + const ( + A = 48271 + Q = 44488 + R = 3399 + ) + + hi := x / Q + lo := x % Q + x = A*lo - R*hi + if x < 0 { + x += int32max + } + return x +} + +// Seed uses the provided seed value to initialize the generator to a deterministic state. +func (rng *rngSource) Seed(seed int64) { + rng.tap = 0 + rng.feed = rngLen - rngTap + + seed = seed % int32max + if seed < 0 { + seed += int32max + } + if seed == 0 { + seed = 89482311 + } + + x := int32(seed) + for i := -20; i < rngLen; i++ { + x = seedrand(x) + if i >= 0 { + var u int64 + u = int64(x) << 40 + x = seedrand(x) + u ^= int64(x) << 20 + x = seedrand(x) + u ^= int64(x) + u ^= rngCooked[i] + rng.vec[i] = u + } + } +} + +// Int63 returns a non-negative pseudo-random 63-bit integer as an int64. +func (rng *rngSource) Int63() int64 { + return int64(rng.Uint64() & rngMask) +} + +// Uint64 returns a non-negative pseudo-random 64-bit integer as an uint64. +func (rng *rngSource) Uint64() uint64 { + rng.tap-- + if rng.tap < 0 { + rng.tap += rngLen + } + + rng.feed-- + if rng.feed < 0 { + rng.feed += rngLen + } + + x := rng.vec[rng.feed] + rng.vec[rng.tap] + rng.vec[rng.feed] = x + return uint64(x) +} diff --git a/src/math/rand/zipf.go b/src/math/rand/zipf.go new file mode 100644 index 0000000..f04c814 --- /dev/null +++ b/src/math/rand/zipf.go @@ -0,0 +1,77 @@ +// Copyright 2009 The Go Authors. All rights reserved. +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. + +// W.Hormann, G.Derflinger: +// "Rejection-Inversion to Generate Variates +// from Monotone Discrete Distributions" +// http://eeyore.wu-wien.ac.at/papers/96-04-04.wh-der.ps.gz + +package rand + +import "math" + +// A Zipf generates Zipf distributed variates. +type Zipf struct { + r *Rand + imax float64 + v float64 + q float64 + s float64 + oneminusQ float64 + oneminusQinv float64 + hxm float64 + hx0minusHxm float64 +} + +func (z *Zipf) h(x float64) float64 { + return math.Exp(z.oneminusQ*math.Log(z.v+x)) * z.oneminusQinv +} + +func (z *Zipf) hinv(x float64) float64 { + return math.Exp(z.oneminusQinv*math.Log(z.oneminusQ*x)) - z.v +} + +// NewZipf returns a Zipf variate generator. +// The generator generates values k ∈ [0, imax] +// such that P(k) is proportional to (v + k) ** (-s). +// Requirements: s > 1 and v >= 1. +func NewZipf(r *Rand, s float64, v float64, imax uint64) *Zipf { + z := new(Zipf) + if s <= 1.0 || v < 1 { + return nil + } + z.r = r + z.imax = float64(imax) + z.v = v + z.q = s + z.oneminusQ = 1.0 - z.q + z.oneminusQinv = 1.0 / z.oneminusQ + z.hxm = z.h(z.imax + 0.5) + z.hx0minusHxm = z.h(0.5) - math.Exp(math.Log(z.v)*(-z.q)) - z.hxm + z.s = 1 - z.hinv(z.h(1.5)-math.Exp(-z.q*math.Log(z.v+1.0))) + return z +} + +// Uint64 returns a value drawn from the Zipf distribution described +// by the Zipf object. +func (z *Zipf) Uint64() uint64 { + if z == nil { + panic("rand: nil Zipf") + } + k := 0.0 + + for { + r := z.r.Float64() // r on [0,1] + ur := z.hxm + r*z.hx0minusHxm + x := z.hinv(ur) + k = math.Floor(x + 0.5) + if k-x <= z.s { + break + } + if ur >= z.h(k+0.5)-math.Exp(-math.Log(k+z.v)*z.q) { + break + } + } + return uint64(k) +} diff --git a/src/math/remainder.go b/src/math/remainder.go new file mode 100644 index 0000000..bf8bfd5 --- /dev/null +++ b/src/math/remainder.go @@ -0,0 +1,94 @@ +// Copyright 2010 The Go Authors. All rights reserved. +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. + +package math + +// The original C code and the comment below are from +// FreeBSD's /usr/src/lib/msun/src/e_remainder.c and came +// with this notice. The go code is a simplified version of +// the original C. +// +// ==================================================== +// Copyright (C) 1993 by Sun Microsystems, Inc. All rights reserved. +// +// Developed at SunPro, a Sun Microsystems, Inc. business. +// Permission to use, copy, modify, and distribute this +// software is freely granted, provided that this notice +// is preserved. +// ==================================================== +// +// __ieee754_remainder(x,y) +// Return : +// returns x REM y = x - [x/y]*y as if in infinite +// precision arithmetic, where [x/y] is the (infinite bit) +// integer nearest x/y (in half way cases, choose the even one). +// Method : +// Based on Mod() returning x - [x/y]chopped * y exactly. + +// Remainder returns the IEEE 754 floating-point remainder of x/y. +// +// Special cases are: +// Remainder(±Inf, y) = NaN +// Remainder(NaN, y) = NaN +// Remainder(x, 0) = NaN +// Remainder(x, ±Inf) = x +// Remainder(x, NaN) = NaN +func Remainder(x, y float64) float64 { + if haveArchRemainder { + return archRemainder(x, y) + } + return remainder(x, y) +} + +func remainder(x, y float64) float64 { + const ( + Tiny = 4.45014771701440276618e-308 // 0x0020000000000000 + HalfMax = MaxFloat64 / 2 + ) + // special cases + switch { + case IsNaN(x) || IsNaN(y) || IsInf(x, 0) || y == 0: + return NaN() + case IsInf(y, 0): + return x + } + sign := false + if x < 0 { + x = -x + sign = true + } + if y < 0 { + y = -y + } + if x == y { + if sign { + zero := 0.0 + return -zero + } + return 0 + } + if y <= HalfMax { + x = Mod(x, y+y) // now x < 2y + } + if y < Tiny { + if x+x > y { + x -= y + if x+x >= y { + x -= y + } + } + } else { + yHalf := 0.5 * y + if x > yHalf { + x -= y + if x >= yHalf { + x -= y + } + } + } + if sign { + x = -x + } + return x +} diff --git a/src/math/signbit.go b/src/math/signbit.go new file mode 100644 index 0000000..f6e61d6 --- /dev/null +++ b/src/math/signbit.go @@ -0,0 +1,10 @@ +// Copyright 2010 The Go Authors. All rights reserved. +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. + +package math + +// Signbit reports whether x is negative or negative zero. +func Signbit(x float64) bool { + return Float64bits(x)&(1<<63) != 0 +} diff --git a/src/math/sin.go b/src/math/sin.go new file mode 100644 index 0000000..d95bb54 --- /dev/null +++ b/src/math/sin.go @@ -0,0 +1,242 @@ +// Copyright 2011 The Go Authors. All rights reserved. +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. + +package math + +/* + Floating-point sine and cosine. +*/ + +// The original C code, the long comment, and the constants +// below were from http://netlib.sandia.gov/cephes/cmath/sin.c, +// available from http://www.netlib.org/cephes/cmath.tgz. +// The go code is a simplified version of the original C. +// +// sin.c +// +// Circular sine +// +// SYNOPSIS: +// +// double x, y, sin(); +// y = sin( x ); +// +// DESCRIPTION: +// +// Range reduction is into intervals of pi/4. The reduction error is nearly +// eliminated by contriving an extended precision modular arithmetic. +// +// Two polynomial approximating functions are employed. +// Between 0 and pi/4 the sine is approximated by +// x + x**3 P(x**2). +// Between pi/4 and pi/2 the cosine is represented as +// 1 - x**2 Q(x**2). +// +// ACCURACY: +// +// Relative error: +// arithmetic domain # trials peak rms +// DEC 0, 10 150000 3.0e-17 7.8e-18 +// IEEE -1.07e9,+1.07e9 130000 2.1e-16 5.4e-17 +// +// Partial loss of accuracy begins to occur at x = 2**30 = 1.074e9. The loss +// is not gradual, but jumps suddenly to about 1 part in 10e7. Results may +// be meaningless for x > 2**49 = 5.6e14. +// +// cos.c +// +// Circular cosine +// +// SYNOPSIS: +// +// double x, y, cos(); +// y = cos( x ); +// +// DESCRIPTION: +// +// Range reduction is into intervals of pi/4. The reduction error is nearly +// eliminated by contriving an extended precision modular arithmetic. +// +// Two polynomial approximating functions are employed. +// Between 0 and pi/4 the cosine is approximated by +// 1 - x**2 Q(x**2). +// Between pi/4 and pi/2 the sine is represented as +// x + x**3 P(x**2). +// +// ACCURACY: +// +// Relative error: +// arithmetic domain # trials peak rms +// IEEE -1.07e9,+1.07e9 130000 2.1e-16 5.4e-17 +// DEC 0,+1.07e9 17000 3.0e-17 7.2e-18 +// +// Cephes Math Library Release 2.8: June, 2000 +// Copyright 1984, 1987, 1989, 1992, 2000 by Stephen L. Moshier +// +// The readme file at http://netlib.sandia.gov/cephes/ says: +// Some software in this archive may be from the book _Methods and +// Programs for Mathematical Functions_ (Prentice-Hall or Simon & Schuster +// International, 1989) or from the Cephes Mathematical Library, a +// commercial product. In either event, it is copyrighted by the author. +// What you see here may be used freely but it comes with no support or +// guarantee. +// +// The two known misprints in the book are repaired here in the +// source listings for the gamma function and the incomplete beta +// integral. +// +// Stephen L. Moshier +// moshier@na-net.ornl.gov + +// sin coefficients +var _sin = [...]float64{ + 1.58962301576546568060e-10, // 0x3de5d8fd1fd19ccd + -2.50507477628578072866e-8, // 0xbe5ae5e5a9291f5d + 2.75573136213857245213e-6, // 0x3ec71de3567d48a1 + -1.98412698295895385996e-4, // 0xbf2a01a019bfdf03 + 8.33333333332211858878e-3, // 0x3f8111111110f7d0 + -1.66666666666666307295e-1, // 0xbfc5555555555548 +} + +// cos coefficients +var _cos = [...]float64{ + -1.13585365213876817300e-11, // 0xbda8fa49a0861a9b + 2.08757008419747316778e-9, // 0x3e21ee9d7b4e3f05 + -2.75573141792967388112e-7, // 0xbe927e4f7eac4bc6 + 2.48015872888517045348e-5, // 0x3efa01a019c844f5 + -1.38888888888730564116e-3, // 0xbf56c16c16c14f91 + 4.16666666666665929218e-2, // 0x3fa555555555554b +} + +// Cos returns the cosine of the radian argument x. +// +// Special cases are: +// Cos(±Inf) = NaN +// Cos(NaN) = NaN +func Cos(x float64) float64 { + if haveArchCos { + return archCos(x) + } + return cos(x) +} + +func cos(x float64) float64 { + const ( + PI4A = 7.85398125648498535156e-1 // 0x3fe921fb40000000, Pi/4 split into three parts + PI4B = 3.77489470793079817668e-8 // 0x3e64442d00000000, + PI4C = 2.69515142907905952645e-15 // 0x3ce8469898cc5170, + ) + // special cases + switch { + case IsNaN(x) || IsInf(x, 0): + return NaN() + } + + // make argument positive + sign := false + x = Abs(x) + + var j uint64 + var y, z float64 + if x >= reduceThreshold { + j, z = trigReduce(x) + } else { + j = uint64(x * (4 / Pi)) // integer part of x/(Pi/4), as integer for tests on the phase angle + y = float64(j) // integer part of x/(Pi/4), as float + + // map zeros to origin + if j&1 == 1 { + j++ + y++ + } + j &= 7 // octant modulo 2Pi radians (360 degrees) + z = ((x - y*PI4A) - y*PI4B) - y*PI4C // Extended precision modular arithmetic + } + + if j > 3 { + j -= 4 + sign = !sign + } + if j > 1 { + sign = !sign + } + + zz := z * z + if j == 1 || j == 2 { + y = z + z*zz*((((((_sin[0]*zz)+_sin[1])*zz+_sin[2])*zz+_sin[3])*zz+_sin[4])*zz+_sin[5]) + } else { + y = 1.0 - 0.5*zz + zz*zz*((((((_cos[0]*zz)+_cos[1])*zz+_cos[2])*zz+_cos[3])*zz+_cos[4])*zz+_cos[5]) + } + if sign { + y = -y + } + return y +} + +// Sin returns the sine of the radian argument x. +// +// Special cases are: +// Sin(±0) = ±0 +// Sin(±Inf) = NaN +// Sin(NaN) = NaN +func Sin(x float64) float64 { + if haveArchSin { + return archSin(x) + } + return sin(x) +} + +func sin(x float64) float64 { + const ( + PI4A = 7.85398125648498535156e-1 // 0x3fe921fb40000000, Pi/4 split into three parts + PI4B = 3.77489470793079817668e-8 // 0x3e64442d00000000, + PI4C = 2.69515142907905952645e-15 // 0x3ce8469898cc5170, + ) + // special cases + switch { + case x == 0 || IsNaN(x): + return x // return ±0 || NaN() + case IsInf(x, 0): + return NaN() + } + + // make argument positive but save the sign + sign := false + if x < 0 { + x = -x + sign = true + } + + var j uint64 + var y, z float64 + if x >= reduceThreshold { + j, z = trigReduce(x) + } else { + j = uint64(x * (4 / Pi)) // integer part of x/(Pi/4), as integer for tests on the phase angle + y = float64(j) // integer part of x/(Pi/4), as float + + // map zeros to origin + if j&1 == 1 { + j++ + y++ + } + j &= 7 // octant modulo 2Pi radians (360 degrees) + z = ((x - y*PI4A) - y*PI4B) - y*PI4C // Extended precision modular arithmetic + } + // reflect in x axis + if j > 3 { + sign = !sign + j -= 4 + } + zz := z * z + if j == 1 || j == 2 { + y = 1.0 - 0.5*zz + zz*zz*((((((_cos[0]*zz)+_cos[1])*zz+_cos[2])*zz+_cos[3])*zz+_cos[4])*zz+_cos[5]) + } else { + y = z + z*zz*((((((_sin[0]*zz)+_sin[1])*zz+_sin[2])*zz+_sin[3])*zz+_sin[4])*zz+_sin[5]) + } + if sign { + y = -y + } + return y +} diff --git a/src/math/sin_s390x.s b/src/math/sin_s390x.s new file mode 100644 index 0000000..7eb2206 --- /dev/null +++ b/src/math/sin_s390x.s @@ -0,0 +1,356 @@ +// Copyright 2016 The Go Authors. All rights reserved. +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. + +#include "textflag.h" + +// Various constants +DATA sincosxnan<>+0(SB)/8, $0x7ff8000000000000 +GLOBL sincosxnan<>+0(SB), RODATA, $8 +DATA sincosxlim<>+0(SB)/8, $0x432921fb54442d19 +GLOBL sincosxlim<>+0(SB), RODATA, $8 +DATA sincosxadd<>+0(SB)/8, $0xc338000000000000 +GLOBL sincosxadd<>+0(SB), RODATA, $8 +DATA sincosxpi2l<>+0(SB)/8, $0.108285667392191389e-31 +GLOBL sincosxpi2l<>+0(SB), RODATA, $8 +DATA sincosxpi2m<>+0(SB)/8, $0.612323399573676480e-16 +GLOBL sincosxpi2m<>+0(SB), RODATA, $8 +DATA sincosxpi2h<>+0(SB)/8, $0.157079632679489656e+01 +GLOBL sincosxpi2h<>+0(SB), RODATA, $8 +DATA sincosrpi2<>+0(SB)/8, $0.636619772367581341e+00 +GLOBL sincosrpi2<>+0(SB), RODATA, $8 + +// Minimax polynomial approximations +DATA sincosc0<>+0(SB)/8, $0.100000000000000000E+01 +GLOBL sincosc0<>+0(SB), RODATA, $8 +DATA sincosc1<>+0(SB)/8, $-.499999999999999833E+00 +GLOBL sincosc1<>+0(SB), RODATA, $8 +DATA sincosc2<>+0(SB)/8, $0.416666666666625843E-01 +GLOBL sincosc2<>+0(SB), RODATA, $8 +DATA sincosc3<>+0(SB)/8, $-.138888888885498984E-02 +GLOBL sincosc3<>+0(SB), RODATA, $8 +DATA sincosc4<>+0(SB)/8, $0.248015871681607202E-04 +GLOBL sincosc4<>+0(SB), RODATA, $8 +DATA sincosc5<>+0(SB)/8, $-.275572911309937875E-06 +GLOBL sincosc5<>+0(SB), RODATA, $8 +DATA sincosc6<>+0(SB)/8, $0.208735047247632818E-08 +GLOBL sincosc6<>+0(SB), RODATA, $8 +DATA sincosc7<>+0(SB)/8, $-.112753632738365317E-10 +GLOBL sincosc7<>+0(SB), RODATA, $8 +DATA sincoss0<>+0(SB)/8, $0.100000000000000000E+01 +GLOBL sincoss0<>+0(SB), RODATA, $8 +DATA sincoss1<>+0(SB)/8, $-.166666666666666657E+00 +GLOBL sincoss1<>+0(SB), RODATA, $8 +DATA sincoss2<>+0(SB)/8, $0.833333333333309209E-02 +GLOBL sincoss2<>+0(SB), RODATA, $8 +DATA sincoss3<>+0(SB)/8, $-.198412698410701448E-03 +GLOBL sincoss3<>+0(SB), RODATA, $8 +DATA sincoss4<>+0(SB)/8, $0.275573191453906794E-05 +GLOBL sincoss4<>+0(SB), RODATA, $8 +DATA sincoss5<>+0(SB)/8, $-.250520918387633290E-07 +GLOBL sincoss5<>+0(SB), RODATA, $8 +DATA sincoss6<>+0(SB)/8, $0.160571285514715856E-09 +GLOBL sincoss6<>+0(SB), RODATA, $8 +DATA sincoss7<>+0(SB)/8, $-.753213484933210972E-12 +GLOBL sincoss7<>+0(SB), RODATA, $8 + +// Sin returns the sine of the radian argument x. +// +// Special cases are: +// Sin(±0) = ±0 +// Sin(±Inf) = NaN +// Sin(NaN) = NaN +// The algorithm used is minimax polynomial approximation. +// with coefficients determined with a Remez exchange algorithm. + +TEXT ·sinAsm(SB),NOSPLIT,$0-16 + FMOVD x+0(FP), F0 + //special case Sin(±0) = ±0 + FMOVD $(0.0), F1 + FCMPU F0, F1 + BEQ sinIsZero + LTDBR F0, F0 + BLTU L17 + FMOVD F0, F5 +L2: + MOVD $sincoss7<>+0(SB), R1 + FMOVD 0(R1), F4 + MOVD $sincoss6<>+0(SB), R1 + FMOVD 0(R1), F1 + MOVD $sincoss5<>+0(SB), R1 + VLEG $0, 0(R1), V18 + MOVD $sincoss4<>+0(SB), R1 + FMOVD 0(R1), F6 + MOVD $sincoss2<>+0(SB), R1 + VLEG $0, 0(R1), V16 + MOVD $sincoss3<>+0(SB), R1 + FMOVD 0(R1), F7 + MOVD $sincoss1<>+0(SB), R1 + FMOVD 0(R1), F3 + MOVD $sincoss0<>+0(SB), R1 + FMOVD 0(R1), F2 + WFCHDBS V2, V5, V2 + BEQ L18 + MOVD $sincosrpi2<>+0(SB), R1 + FMOVD 0(R1), F3 + MOVD $sincosxadd<>+0(SB), R1 + FMOVD 0(R1), F2 + WFMSDB V0, V3, V2, V3 + FMOVD 0(R1), F6 + FADD F3, F6 + MOVD $sincosxpi2h<>+0(SB), R1 + FMOVD 0(R1), F2 + FMSUB F2, F6, F0 + MOVD $sincosxpi2m<>+0(SB), R1 + FMOVD 0(R1), F4 + FMADD F4, F6, F0 + MOVD $sincosxpi2l<>+0(SB), R1 + WFMDB V0, V0, V1 + FMOVD 0(R1), F7 + WFMDB V1, V1, V2 + LGDR F3, R1 + MOVD $sincosxlim<>+0(SB), R2 + TMLL R1, $1 + BEQ L6 + FMOVD 0(R2), F0 + WFCHDBS V0, V5, V0 + BNE L14 + MOVD $sincosc7<>+0(SB), R2 + FMOVD 0(R2), F0 + MOVD $sincosc6<>+0(SB), R2 + FMOVD 0(R2), F4 + MOVD $sincosc5<>+0(SB), R2 + WFMADB V1, V0, V4, V0 + FMOVD 0(R2), F6 + MOVD $sincosc4<>+0(SB), R2 + WFMADB V1, V0, V6, V0 + FMOVD 0(R2), F4 + MOVD $sincosc2<>+0(SB), R2 + FMOVD 0(R2), F6 + WFMADB V2, V4, V6, V4 + MOVD $sincosc3<>+0(SB), R2 + FMOVD 0(R2), F3 + MOVD $sincosc1<>+0(SB), R2 + WFMADB V2, V0, V3, V0 + FMOVD 0(R2), F6 + WFMADB V1, V4, V6, V4 + TMLL R1, $2 + WFMADB V2, V0, V4, V0 + MOVD $sincosc0<>+0(SB), R1 + FMOVD 0(R1), F2 + WFMADB V1, V0, V2, V0 + BNE L15 + FMOVD F0, ret+8(FP) + RET + +L6: + FMOVD 0(R2), F4 + WFCHDBS V4, V5, V4 + BNE L14 + MOVD $sincoss7<>+0(SB), R2 + FMOVD 0(R2), F4 + MOVD $sincoss6<>+0(SB), R2 + FMOVD 0(R2), F3 + MOVD $sincoss5<>+0(SB), R2 + WFMADB V1, V4, V3, V4 + WFMADB V6, V7, V0, V6 + FMOVD 0(R2), F0 + MOVD $sincoss4<>+0(SB), R2 + FMADD F4, F1, F0 + FMOVD 0(R2), F3 + MOVD $sincoss2<>+0(SB), R2 + FMOVD 0(R2), F4 + MOVD $sincoss3<>+0(SB), R2 + WFMADB V2, V3, V4, V3 + FMOVD 0(R2), F4 + MOVD $sincoss1<>+0(SB), R2 + WFMADB V2, V0, V4, V0 + FMOVD 0(R2), F4 + WFMADB V1, V3, V4, V3 + FNEG F6, F4 + WFMADB V2, V0, V3, V2 + WFMDB V4, V1, V0 + TMLL R1, $2 + WFMSDB V0, V2, V6, V0 + BNE L15 + FMOVD F0, ret+8(FP) + RET + +L14: + MOVD $sincosxnan<>+0(SB), R1 + FMOVD 0(R1), F0 + FMOVD F0, ret+8(FP) + RET + +L18: + WFMDB V0, V0, V2 + WFMADB V2, V4, V1, V4 + WFMDB V2, V2, V1 + WFMADB V2, V4, V18, V4 + WFMADB V1, V6, V16, V6 + WFMADB V1, V4, V7, V4 + WFMADB V2, V6, V3, V6 + FMUL F0, F2 + WFMADB V1, V4, V6, V4 + FMADD F4, F2, F0 + FMOVD F0, ret+8(FP) + RET + +L17: + FNEG F0, F5 + BR L2 +L15: + FNEG F0, F0 + FMOVD F0, ret+8(FP) + RET + + +sinIsZero: + FMOVD F0, ret+8(FP) + RET + +// Cos returns the cosine of the radian argument. +// +// Special cases are: +// Cos(±Inf) = NaN +// Cos(NaN) = NaN +// The algorithm used is minimax polynomial approximation. +// with coefficients determined with a Remez exchange algorithm. + +TEXT ·cosAsm(SB),NOSPLIT,$0-16 + FMOVD x+0(FP), F0 + LTDBR F0, F0 + BLTU L35 + FMOVD F0, F1 +L21: + MOVD $sincosc7<>+0(SB), R1 + FMOVD 0(R1), F4 + MOVD $sincosc6<>+0(SB), R1 + VLEG $0, 0(R1), V20 + MOVD $sincosc5<>+0(SB), R1 + VLEG $0, 0(R1), V18 + MOVD $sincosc4<>+0(SB), R1 + FMOVD 0(R1), F6 + MOVD $sincosc2<>+0(SB), R1 + VLEG $0, 0(R1), V16 + MOVD $sincosc3<>+0(SB), R1 + FMOVD 0(R1), F7 + MOVD $sincosc1<>+0(SB), R1 + FMOVD 0(R1), F5 + MOVD $sincosrpi2<>+0(SB), R1 + FMOVD 0(R1), F2 + MOVD $sincosxadd<>+0(SB), R1 + FMOVD 0(R1), F3 + MOVD $sincoss0<>+0(SB), R1 + WFMSDB V0, V2, V3, V2 + FMOVD 0(R1), F3 + WFCHDBS V3, V1, V3 + LGDR F2, R1 + BEQ L36 + MOVD $sincosxadd<>+0(SB), R2 + FMOVD 0(R2), F4 + FADD F2, F4 + MOVD $sincosxpi2h<>+0(SB), R2 + FMOVD 0(R2), F2 + WFMSDB V4, V2, V0, V2 + MOVD $sincosxpi2m<>+0(SB), R2 + FMOVD 0(R2), F0 + WFMADB V4, V0, V2, V0 + MOVD $sincosxpi2l<>+0(SB), R2 + WFMDB V0, V0, V2 + FMOVD 0(R2), F5 + WFMDB V2, V2, V6 + MOVD $sincosxlim<>+0(SB), R2 + TMLL R1, $1 + BNE L25 + FMOVD 0(R2), F0 + WFCHDBS V0, V1, V0 + BNE L33 + MOVD $sincosc7<>+0(SB), R2 + FMOVD 0(R2), F0 + MOVD $sincosc6<>+0(SB), R2 + FMOVD 0(R2), F4 + MOVD $sincosc5<>+0(SB), R2 + WFMADB V2, V0, V4, V0 + FMOVD 0(R2), F1 + MOVD $sincosc4<>+0(SB), R2 + WFMADB V2, V0, V1, V0 + FMOVD 0(R2), F4 + MOVD $sincosc2<>+0(SB), R2 + FMOVD 0(R2), F1 + WFMADB V6, V4, V1, V4 + MOVD $sincosc3<>+0(SB), R2 + FMOVD 0(R2), F3 + MOVD $sincosc1<>+0(SB), R2 + WFMADB V6, V0, V3, V0 + FMOVD 0(R2), F1 + WFMADB V2, V4, V1, V4 + TMLL R1, $2 + WFMADB V6, V0, V4, V0 + MOVD $sincosc0<>+0(SB), R1 + FMOVD 0(R1), F4 + WFMADB V2, V0, V4, V0 + BNE L34 + FMOVD F0, ret+8(FP) + RET + +L25: + FMOVD 0(R2), F3 + WFCHDBS V3, V1, V1 + BNE L33 + MOVD $sincoss7<>+0(SB), R2 + FMOVD 0(R2), F1 + MOVD $sincoss6<>+0(SB), R2 + FMOVD 0(R2), F3 + MOVD $sincoss5<>+0(SB), R2 + WFMADB V2, V1, V3, V1 + FMOVD 0(R2), F3 + MOVD $sincoss4<>+0(SB), R2 + WFMADB V2, V1, V3, V1 + FMOVD 0(R2), F3 + MOVD $sincoss2<>+0(SB), R2 + FMOVD 0(R2), F7 + WFMADB V6, V3, V7, V3 + MOVD $sincoss3<>+0(SB), R2 + FMADD F5, F4, F0 + FMOVD 0(R2), F4 + MOVD $sincoss1<>+0(SB), R2 + FMADD F1, F6, F4 + FMOVD 0(R2), F1 + FMADD F3, F2, F1 + FMUL F0, F2 + WFMADB V6, V4, V1, V6 + TMLL R1, $2 + FMADD F6, F2, F0 + BNE L34 + FMOVD F0, ret+8(FP) + RET + +L33: + MOVD $sincosxnan<>+0(SB), R1 + FMOVD 0(R1), F0 + FMOVD F0, ret+8(FP) + RET + +L36: + FMUL F0, F0 + MOVD $sincosc0<>+0(SB), R1 + WFMDB V0, V0, V1 + WFMADB V0, V4, V20, V4 + WFMADB V1, V6, V16, V6 + WFMADB V0, V4, V18, V4 + WFMADB V0, V6, V5, V6 + WFMADB V1, V4, V7, V4 + FMOVD 0(R1), F2 + WFMADB V1, V4, V6, V4 + WFMADB V0, V4, V2, V0 + FMOVD F0, ret+8(FP) + RET + +L35: + FNEG F0, F1 + BR L21 +L34: + FNEG F0, F0 + FMOVD F0, ret+8(FP) + RET diff --git a/src/math/sincos.go b/src/math/sincos.go new file mode 100644 index 0000000..5c5726f --- /dev/null +++ b/src/math/sincos.go @@ -0,0 +1,72 @@ +// Copyright 2010 The Go Authors. All rights reserved. +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. + +package math + +// Coefficients _sin[] and _cos[] are found in pkg/math/sin.go. + +// Sincos returns Sin(x), Cos(x). +// +// Special cases are: +// Sincos(±0) = ±0, 1 +// Sincos(±Inf) = NaN, NaN +// Sincos(NaN) = NaN, NaN +func Sincos(x float64) (sin, cos float64) { + const ( + PI4A = 7.85398125648498535156e-1 // 0x3fe921fb40000000, Pi/4 split into three parts + PI4B = 3.77489470793079817668e-8 // 0x3e64442d00000000, + PI4C = 2.69515142907905952645e-15 // 0x3ce8469898cc5170, + ) + // special cases + switch { + case x == 0: + return x, 1 // return ±0.0, 1.0 + case IsNaN(x) || IsInf(x, 0): + return NaN(), NaN() + } + + // make argument positive + sinSign, cosSign := false, false + if x < 0 { + x = -x + sinSign = true + } + + var j uint64 + var y, z float64 + if x >= reduceThreshold { + j, z = trigReduce(x) + } else { + j = uint64(x * (4 / Pi)) // integer part of x/(Pi/4), as integer for tests on the phase angle + y = float64(j) // integer part of x/(Pi/4), as float + + if j&1 == 1 { // map zeros to origin + j++ + y++ + } + j &= 7 // octant modulo 2Pi radians (360 degrees) + z = ((x - y*PI4A) - y*PI4B) - y*PI4C // Extended precision modular arithmetic + } + if j > 3 { // reflect in x axis + j -= 4 + sinSign, cosSign = !sinSign, !cosSign + } + if j > 1 { + cosSign = !cosSign + } + + zz := z * z + cos = 1.0 - 0.5*zz + zz*zz*((((((_cos[0]*zz)+_cos[1])*zz+_cos[2])*zz+_cos[3])*zz+_cos[4])*zz+_cos[5]) + sin = z + z*zz*((((((_sin[0]*zz)+_sin[1])*zz+_sin[2])*zz+_sin[3])*zz+_sin[4])*zz+_sin[5]) + if j == 1 || j == 2 { + sin, cos = cos, sin + } + if cosSign { + cos = -cos + } + if sinSign { + sin = -sin + } + return +} diff --git a/src/math/sinh.go b/src/math/sinh.go new file mode 100644 index 0000000..9fe9b4e --- /dev/null +++ b/src/math/sinh.go @@ -0,0 +1,91 @@ +// Copyright 2009 The Go Authors. All rights reserved. +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. + +package math + +/* + Floating-point hyperbolic sine and cosine. + + The exponential func is called for arguments + greater in magnitude than 0.5. + + A series is used for arguments smaller in magnitude than 0.5. + + Cosh(x) is computed from the exponential func for + all arguments. +*/ + +// Sinh returns the hyperbolic sine of x. +// +// Special cases are: +// Sinh(±0) = ±0 +// Sinh(±Inf) = ±Inf +// Sinh(NaN) = NaN +func Sinh(x float64) float64 { + if haveArchSinh { + return archSinh(x) + } + return sinh(x) +} + +func sinh(x float64) float64 { + // The coefficients are #2029 from Hart & Cheney. (20.36D) + const ( + P0 = -0.6307673640497716991184787251e+6 + P1 = -0.8991272022039509355398013511e+5 + P2 = -0.2894211355989563807284660366e+4 + P3 = -0.2630563213397497062819489e+2 + Q0 = -0.6307673640497716991212077277e+6 + Q1 = 0.1521517378790019070696485176e+5 + Q2 = -0.173678953558233699533450911e+3 + ) + + sign := false + if x < 0 { + x = -x + sign = true + } + + var temp float64 + switch { + case x > 21: + temp = Exp(x) * 0.5 + + case x > 0.5: + ex := Exp(x) + temp = (ex - 1/ex) * 0.5 + + default: + sq := x * x + temp = (((P3*sq+P2)*sq+P1)*sq + P0) * x + temp = temp / (((sq+Q2)*sq+Q1)*sq + Q0) + } + + if sign { + temp = -temp + } + return temp +} + +// Cosh returns the hyperbolic cosine of x. +// +// Special cases are: +// Cosh(±0) = 1 +// Cosh(±Inf) = +Inf +// Cosh(NaN) = NaN +func Cosh(x float64) float64 { + if haveArchCosh { + return archCosh(x) + } + return cosh(x) +} + +func cosh(x float64) float64 { + x = Abs(x) + if x > 21 { + return Exp(x) * 0.5 + } + ex := Exp(x) + return (ex + 1/ex) * 0.5 +} diff --git a/src/math/sinh_s390x.s b/src/math/sinh_s390x.s new file mode 100644 index 0000000..d684968 --- /dev/null +++ b/src/math/sinh_s390x.s @@ -0,0 +1,251 @@ +// Copyright 2016 The Go Authors. All rights reserved. +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. + + +#include "textflag.h" + +// Constants +DATA sinhrodataL21<>+0(SB)/8, $0.231904681384629956E-16 +DATA sinhrodataL21<>+8(SB)/8, $0.693147180559945286E+00 +DATA sinhrodataL21<>+16(SB)/8, $704.E0 +GLOBL sinhrodataL21<>+0(SB), RODATA, $24 +DATA sinhrlog2<>+0(SB)/8, $0x3ff7154760000000 +GLOBL sinhrlog2<>+0(SB), RODATA, $8 +DATA sinhxinf<>+0(SB)/8, $0x7ff0000000000000 +GLOBL sinhxinf<>+0(SB), RODATA, $8 +DATA sinhxinit<>+0(SB)/8, $0x3ffb504f333f9de6 +GLOBL sinhxinit<>+0(SB), RODATA, $8 +DATA sinhxlim1<>+0(SB)/8, $800.E0 +GLOBL sinhxlim1<>+0(SB), RODATA, $8 +DATA sinhxadd<>+0(SB)/8, $0xc3200001610007fb +GLOBL sinhxadd<>+0(SB), RODATA, $8 +DATA sinhx4ff<>+0(SB)/8, $0x4ff0000000000000 +GLOBL sinhx4ff<>+0(SB), RODATA, $8 + +// Minimax polynomial approximations +DATA sinhe0<>+0(SB)/8, $0.11715728752538099300E+01 +GLOBL sinhe0<>+0(SB), RODATA, $8 +DATA sinhe1<>+0(SB)/8, $0.11715728752538099300E+01 +GLOBL sinhe1<>+0(SB), RODATA, $8 +DATA sinhe2<>+0(SB)/8, $0.58578643762688526692E+00 +GLOBL sinhe2<>+0(SB), RODATA, $8 +DATA sinhe3<>+0(SB)/8, $0.19526214587563004497E+00 +GLOBL sinhe3<>+0(SB), RODATA, $8 +DATA sinhe4<>+0(SB)/8, $0.48815536475176217404E-01 +GLOBL sinhe4<>+0(SB), RODATA, $8 +DATA sinhe5<>+0(SB)/8, $0.97631072948627397816E-02 +GLOBL sinhe5<>+0(SB), RODATA, $8 +DATA sinhe6<>+0(SB)/8, $0.16271839297756073153E-02 +GLOBL sinhe6<>+0(SB), RODATA, $8 +DATA sinhe7<>+0(SB)/8, $0.23245485387271142509E-03 +GLOBL sinhe7<>+0(SB), RODATA, $8 +DATA sinhe8<>+0(SB)/8, $0.29080955860869629131E-04 +GLOBL sinhe8<>+0(SB), RODATA, $8 +DATA sinhe9<>+0(SB)/8, $0.32311267157667725278E-05 +GLOBL sinhe9<>+0(SB), RODATA, $8 + +// Sinh returns the hyperbolic sine of the argument. +// +// Special cases are: +// Sinh(±0) = ±0 +// Sinh(±Inf) = ±Inf +// Sinh(NaN) = NaN +// The algorithm used is minimax polynomial approximation +// with coefficients determined with a Remez exchange algorithm. + +TEXT ·sinhAsm(SB),NOSPLIT,$0-16 + FMOVD x+0(FP), F0 + //special case Sinh(±0) = ±0 + FMOVD $(0.0), F1 + FCMPU F0, F1 + BEQ sinhIsZero + //special case Sinh(±Inf) = ±Inf + FMOVD $1.797693134862315708145274237317043567981e+308, F1 + FCMPU F1, F0 + BLEU sinhIsInf + FMOVD $-1.797693134862315708145274237317043567981e+308, F1 + FCMPU F1, F0 + BGT sinhIsInf + + MOVD $sinhrodataL21<>+0(SB), R5 + LTDBR F0, F0 + MOVD sinhxinit<>+0(SB), R1 + FMOVD F0, F4 + MOVD R1, R3 + BLTU L19 + FMOVD F0, F2 +L2: + WORD $0xED205010 //cdb %f2,.L22-.L21(%r5) + BYTE $0x00 + BYTE $0x19 + BGE L15 //jnl .L15 + BVS L15 + WFCEDBS V2, V2, V0 + BEQ L20 +L12: + FMOVD F4, F0 + FMOVD F0, ret+8(FP) + RET + +L15: + WFCEDBS V2, V2, V0 + BVS L12 + MOVD $sinhxlim1<>+0(SB), R2 + FMOVD 0(R2), F0 + WFCHDBS V0, V2, V0 + BEQ L6 + WFCHEDBS V4, V2, V6 + MOVD $sinhxinf<>+0(SB), R1 + FMOVD 0(R1), F0 + BNE LEXITTAGsinh + WFCHDBS V2, V4, V2 + BNE L16 + FNEG F0, F0 + FMOVD F0, ret+8(FP) + RET + +L19: + FNEG F0, F2 + BR L2 +L6: + MOVD $sinhxadd<>+0(SB), R2 + FMOVD 0(R2), F0 + MOVD sinhrlog2<>+0(SB), R2 + LDGR R2, F6 + WFMSDB V4, V6, V0, V16 + FMOVD sinhrodataL21<>+8(SB), F6 + WFADB V0, V16, V0 + FMOVD sinhrodataL21<>+0(SB), F3 + WFMSDB V0, V6, V4, V6 + MOVD $sinhe9<>+0(SB), R2 + WFMADB V0, V3, V6, V0 + FMOVD 0(R2), F1 + MOVD $sinhe7<>+0(SB), R2 + WFMDB V0, V0, V6 + FMOVD 0(R2), F5 + MOVD $sinhe8<>+0(SB), R2 + FMOVD 0(R2), F3 + MOVD $sinhe6<>+0(SB), R2 + WFMADB V6, V1, V5, V1 + FMOVD 0(R2), F5 + MOVD $sinhe5<>+0(SB), R2 + FMOVD 0(R2), F7 + MOVD $sinhe3<>+0(SB), R2 + WFMADB V6, V3, V5, V3 + FMOVD 0(R2), F5 + MOVD $sinhe4<>+0(SB), R2 + WFMADB V6, V7, V5, V7 + FMOVD 0(R2), F5 + MOVD $sinhe2<>+0(SB), R2 + VLEG $0, 0(R2), V20 + WFMDB V6, V6, V18 + WFMADB V6, V5, V20, V5 + WFMADB V1, V18, V7, V1 + FNEG F0, F0 + WFMADB V3, V18, V5, V3 + MOVD $sinhe1<>+0(SB), R3 + WFCEDBS V2, V4, V2 + FMOVD 0(R3), F5 + MOVD $sinhe0<>+0(SB), R3 + WFMADB V6, V1, V5, V1 + FMOVD 0(R3), F5 + VLGVG $0, V16, R2 + WFMADB V6, V3, V5, V6 + RLL $3, R2, R2 + RISBGN $0, $15, $48, R2, R1 + BEQ L9 + WFMSDB V0, V1, V6, V0 + MOVD $sinhx4ff<>+0(SB), R3 + FNEG F0, F0 + FMOVD 0(R3), F2 + FMUL F2, F0 + ANDW $0xFFFF, R2 + WORD $0xA53FEFB6 //llill %r3,61366 + SUBW R2, R3, R2 + RISBGN $0, $15, $48, R2, R1 + LDGR R1, F2 + FMUL F2, F0 + FMOVD F0, ret+8(FP) + RET + +L20: + MOVD $sinhxadd<>+0(SB), R2 + FMOVD 0(R2), F2 + MOVD sinhrlog2<>+0(SB), R2 + LDGR R2, F0 + WFMSDB V4, V0, V2, V6 + FMOVD sinhrodataL21<>+8(SB), F0 + FADD F6, F2 + MOVD $sinhe9<>+0(SB), R2 + FMSUB F0, F2, F4 + FMOVD 0(R2), F1 + FMOVD sinhrodataL21<>+0(SB), F3 + MOVD $sinhe7<>+0(SB), R2 + FMADD F3, F2, F4 + FMOVD 0(R2), F0 + MOVD $sinhe8<>+0(SB), R2 + WFMDB V4, V4, V2 + FMOVD 0(R2), F3 + MOVD $sinhe6<>+0(SB), R2 + FMOVD 0(R2), F5 + LGDR F6, R2 + RLL $3, R2, R2 + RISBGN $0, $15, $48, R2, R1 + WFMADB V2, V1, V0, V1 + LDGR R1, F0 + MOVD $sinhe5<>+0(SB), R1 + WFMADB V2, V3, V5, V3 + FMOVD 0(R1), F5 + MOVD $sinhe3<>+0(SB), R1 + FMOVD 0(R1), F6 + WFMDB V2, V2, V7 + WFMADB V2, V5, V6, V5 + WORD $0xA7487FB6 //lhi %r4,32694 + FNEG F4, F4 + ANDW $0xFFFF, R2 + SUBW R2, R4, R2 + RISBGN $0, $15, $48, R2, R3 + LDGR R3, F6 + WFADB V0, V6, V16 + MOVD $sinhe4<>+0(SB), R1 + WFMADB V1, V7, V5, V1 + WFMDB V4, V16, V4 + FMOVD 0(R1), F5 + MOVD $sinhe2<>+0(SB), R1 + VLEG $0, 0(R1), V16 + MOVD $sinhe1<>+0(SB), R1 + WFMADB V2, V5, V16, V5 + VLEG $0, 0(R1), V16 + WFMADB V3, V7, V5, V3 + WFMADB V2, V1, V16, V1 + FSUB F6, F0 + FMUL F1, F4 + MOVD $sinhe0<>+0(SB), R1 + FMOVD 0(R1), F6 + WFMADB V2, V3, V6, V2 + WFMADB V0, V2, V4, V0 + FMOVD F0, ret+8(FP) + RET + +L9: + WFMADB V0, V1, V6, V0 + MOVD $sinhx4ff<>+0(SB), R3 + FMOVD 0(R3), F2 + FMUL F2, F0 + WORD $0xA72AF000 //ahi %r2,-4096 + RISBGN $0, $15, $48, R2, R1 + LDGR R1, F2 + FMUL F2, F0 + FMOVD F0, ret+8(FP) + RET + +L16: + FMOVD F0, ret+8(FP) + RET + +LEXITTAGsinh: +sinhIsInf: +sinhIsZero: + FMOVD F0, ret+8(FP) + RET diff --git a/src/math/sqrt.go b/src/math/sqrt.go new file mode 100644 index 0000000..903d57d --- /dev/null +++ b/src/math/sqrt.go @@ -0,0 +1,149 @@ +// Copyright 2009 The Go Authors. All rights reserved. +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. + +package math + +// The original C code and the long comment below are +// from FreeBSD's /usr/src/lib/msun/src/e_sqrt.c and +// came with this notice. The go code is a simplified +// version of the original C. +// +// ==================================================== +// Copyright (C) 1993 by Sun Microsystems, Inc. All rights reserved. +// +// Developed at SunPro, a Sun Microsystems, Inc. business. +// Permission to use, copy, modify, and distribute this +// software is freely granted, provided that this notice +// is preserved. +// ==================================================== +// +// __ieee754_sqrt(x) +// Return correctly rounded sqrt. +// ----------------------------------------- +// | Use the hardware sqrt if you have one | +// ----------------------------------------- +// Method: +// Bit by bit method using integer arithmetic. (Slow, but portable) +// 1. Normalization +// Scale x to y in [1,4) with even powers of 2: +// find an integer k such that 1 <= (y=x*2**(2k)) < 4, then +// sqrt(x) = 2**k * sqrt(y) +// 2. Bit by bit computation +// Let q = sqrt(y) truncated to i bit after binary point (q = 1), +// i 0 +// i+1 2 +// s = 2*q , and y = 2 * ( y - q ). (1) +// i i i i +// +// To compute q from q , one checks whether +// i+1 i +// +// -(i+1) 2 +// (q + 2 ) <= y. (2) +// i +// -(i+1) +// If (2) is false, then q = q ; otherwise q = q + 2 . +// i+1 i i+1 i +// +// With some algebraic manipulation, it is not difficult to see +// that (2) is equivalent to +// -(i+1) +// s + 2 <= y (3) +// i i +// +// The advantage of (3) is that s and y can be computed by +// i i +// the following recurrence formula: +// if (3) is false +// +// s = s , y = y ; (4) +// i+1 i i+1 i +// +// otherwise, +// -i -(i+1) +// s = s + 2 , y = y - s - 2 (5) +// i+1 i i+1 i i +// +// One may easily use induction to prove (4) and (5). +// Note. Since the left hand side of (3) contain only i+2 bits, +// it is not necessary to do a full (53-bit) comparison +// in (3). +// 3. Final rounding +// After generating the 53 bits result, we compute one more bit. +// Together with the remainder, we can decide whether the +// result is exact, bigger than 1/2ulp, or less than 1/2ulp +// (it will never equal to 1/2ulp). +// The rounding mode can be detected by checking whether +// huge + tiny is equal to huge, and whether huge - tiny is +// equal to huge for some floating point number "huge" and "tiny". +// +// +// Notes: Rounding mode detection omitted. The constants "mask", "shift", +// and "bias" are found in src/math/bits.go + +// Sqrt returns the square root of x. +// +// Special cases are: +// Sqrt(+Inf) = +Inf +// Sqrt(±0) = ±0 +// Sqrt(x < 0) = NaN +// Sqrt(NaN) = NaN +func Sqrt(x float64) float64 { + if haveArchSqrt { + return archSqrt(x) + } + return sqrt(x) +} + +// Note: Sqrt is implemented in assembly on some systems. +// Others have assembly stubs that jump to func sqrt below. +// On systems where Sqrt is a single instruction, the compiler +// may turn a direct call into a direct use of that instruction instead. + +func sqrt(x float64) float64 { + // special cases + switch { + case x == 0 || IsNaN(x) || IsInf(x, 1): + return x + case x < 0: + return NaN() + } + ix := Float64bits(x) + // normalize x + exp := int((ix >> shift) & mask) + if exp == 0 { // subnormal x + for ix&(1<<shift) == 0 { + ix <<= 1 + exp-- + } + exp++ + } + exp -= bias // unbias exponent + ix &^= mask << shift + ix |= 1 << shift + if exp&1 == 1 { // odd exp, double x to make it even + ix <<= 1 + } + exp >>= 1 // exp = exp/2, exponent of square root + // generate sqrt(x) bit by bit + ix <<= 1 + var q, s uint64 // q = sqrt(x) + r := uint64(1 << (shift + 1)) // r = moving bit from MSB to LSB + for r != 0 { + t := s + r + if t <= ix { + s = t + r + ix -= t + q += r + } + ix <<= 1 + r >>= 1 + } + // final rounding + if ix != 0 { // remainder, result not exact + q += q & 1 // round according to extra bit + } + ix = q>>1 + uint64(exp-1+bias)<<shift // significand + biased exponent + return Float64frombits(ix) +} diff --git a/src/math/sqrt_386.s b/src/math/sqrt_386.s new file mode 100644 index 0000000..90aec13 --- /dev/null +++ b/src/math/sqrt_386.s @@ -0,0 +1,12 @@ +// Copyright 2009 The Go Authors. All rights reserved. +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. + +#include "textflag.h" + +// func archSqrt(x float64) float64 +TEXT ·archSqrt(SB),NOSPLIT,$0 + FMOVD x+0(FP),F0 + FSQRT + FMOVDP F0,ret+8(FP) + RET diff --git a/src/math/sqrt_amd64.s b/src/math/sqrt_amd64.s new file mode 100644 index 0000000..c3b110e --- /dev/null +++ b/src/math/sqrt_amd64.s @@ -0,0 +1,12 @@ +// Copyright 2009 The Go Authors. All rights reserved. +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. + +#include "textflag.h" + +// func archSqrt(x float64) float64 +TEXT ·archSqrt(SB), NOSPLIT, $0 + XORPS X0, X0 // break dependency + SQRTSD x+0(FP), X0 + MOVSD X0, ret+8(FP) + RET diff --git a/src/math/sqrt_arm.s b/src/math/sqrt_arm.s new file mode 100644 index 0000000..64792ec --- /dev/null +++ b/src/math/sqrt_arm.s @@ -0,0 +1,20 @@ +// Copyright 2011 The Go Authors. All rights reserved. +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. + +#include "textflag.h" + +// func archSqrt(x float64) float64 +TEXT ·archSqrt(SB),NOSPLIT,$0 + MOVB runtime·goarm(SB), R11 + CMP $5, R11 + BEQ arm5 + MOVD x+0(FP),F0 + SQRTD F0,F0 + MOVD F0,ret+8(FP) + RET +arm5: + // Tail call to Go implementation. + // Can't use JMP, as in softfloat mode SQRTD is rewritten + // to a CALL, which makes this function have a frame. + RET ·sqrt(SB) diff --git a/src/math/sqrt_arm64.s b/src/math/sqrt_arm64.s new file mode 100644 index 0000000..36ba41a --- /dev/null +++ b/src/math/sqrt_arm64.s @@ -0,0 +1,12 @@ +// Copyright 2015 The Go Authors. All rights reserved. +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. + +#include "textflag.h" + +// func archSqrt(x float64) float64 +TEXT ·archSqrt(SB),NOSPLIT,$0 + FMOVD x+0(FP), F0 + FSQRTD F0, F0 + FMOVD F0, ret+8(FP) + RET diff --git a/src/math/sqrt_asm.go b/src/math/sqrt_asm.go new file mode 100644 index 0000000..2cec1a5 --- /dev/null +++ b/src/math/sqrt_asm.go @@ -0,0 +1,11 @@ +// Copyright 2021 The Go Authors. All rights reserved. +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. + +//go:build 386 || amd64 || arm64 || arm || mips || mipsle || ppc64 || ppc64le || s390x || riscv64 || wasm + +package math + +const haveArchSqrt = true + +func archSqrt(x float64) float64 diff --git a/src/math/sqrt_mipsx.s b/src/math/sqrt_mipsx.s new file mode 100644 index 0000000..291d4af --- /dev/null +++ b/src/math/sqrt_mipsx.s @@ -0,0 +1,19 @@ +// Copyright 2016 The Go Authors. All rights reserved. +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. + +//go:build mips || mipsle +// +build mips mipsle + +#include "textflag.h" + +// func archSqrt(x float64) float64 +TEXT ·archSqrt(SB),NOSPLIT,$0 +#ifdef GOMIPS_softfloat + JMP ·sqrt(SB) +#else + MOVD x+0(FP), F0 + SQRTD F0, F0 + MOVD F0, ret+8(FP) +#endif + RET diff --git a/src/math/sqrt_noasm.go b/src/math/sqrt_noasm.go new file mode 100644 index 0000000..3979622 --- /dev/null +++ b/src/math/sqrt_noasm.go @@ -0,0 +1,13 @@ +// Copyright 2021 The Go Authors. All rights reserved. +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. + +//go:build !386 && !amd64 && !arm64 && !arm && !mips && !mipsle && !ppc64 && !ppc64le && !s390x && !riscv64 && !wasm + +package math + +const haveArchSqrt = false + +func archSqrt(x float64) float64 { + panic("not implemented") +} diff --git a/src/math/sqrt_ppc64x.s b/src/math/sqrt_ppc64x.s new file mode 100644 index 0000000..c929da2 --- /dev/null +++ b/src/math/sqrt_ppc64x.s @@ -0,0 +1,15 @@ +// Copyright 2016 The Go Authors. All rights reserved. +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. + +//go:build ppc64 || ppc64le +// +build ppc64 ppc64le + +#include "textflag.h" + +// func archSqrt(x float64) float64 +TEXT ·archSqrt(SB),NOSPLIT,$0 + FMOVD x+0(FP), F0 + FSQRT F0, F0 + FMOVD F0, ret+8(FP) + RET diff --git a/src/math/sqrt_riscv64.s b/src/math/sqrt_riscv64.s new file mode 100644 index 0000000..0dbdbc9 --- /dev/null +++ b/src/math/sqrt_riscv64.s @@ -0,0 +1,12 @@ +// Copyright 2020 The Go Authors. All rights reserved. +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. + +#include "textflag.h" + +// func archSqrt(x float64) float64 +TEXT ·archSqrt(SB),NOSPLIT,$0 + MOVD x+0(FP), F0 + FSQRTD F0, F0 + MOVD F0, ret+8(FP) + RET diff --git a/src/math/sqrt_s390x.s b/src/math/sqrt_s390x.s new file mode 100644 index 0000000..fa31f75 --- /dev/null +++ b/src/math/sqrt_s390x.s @@ -0,0 +1,12 @@ +// Copyright 2016 The Go Authors. All rights reserved. +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. + +#include "textflag.h" + +// func archSqrt(x float64) float64 +TEXT ·archSqrt(SB),NOSPLIT,$0 + FMOVD x+0(FP), F1 + FSQRT F1, F1 + FMOVD F1, ret+8(FP) + RET diff --git a/src/math/sqrt_wasm.s b/src/math/sqrt_wasm.s new file mode 100644 index 0000000..fa6799d --- /dev/null +++ b/src/math/sqrt_wasm.s @@ -0,0 +1,12 @@ +// Copyright 2018 The Go Authors. All rights reserved. +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. + +#include "textflag.h" + +TEXT ·archSqrt(SB),NOSPLIT,$0 + Get SP + F64Load x+0(FP) + F64Sqrt + F64Store ret+8(FP) + RET diff --git a/src/math/stubs.go b/src/math/stubs.go new file mode 100644 index 0000000..c4350d4 --- /dev/null +++ b/src/math/stubs.go @@ -0,0 +1,160 @@ +// Copyright 2021 The Go Authors. All rights reserved. +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. + +//go:build !s390x + +// This is a large group of functions that most architectures don't +// implement in assembly. + +package math + +const haveArchAcos = false + +func archAcos(x float64) float64 { + panic("not implemented") +} + +const haveArchAcosh = false + +func archAcosh(x float64) float64 { + panic("not implemented") +} + +const haveArchAsin = false + +func archAsin(x float64) float64 { + panic("not implemented") +} + +const haveArchAsinh = false + +func archAsinh(x float64) float64 { + panic("not implemented") +} + +const haveArchAtan = false + +func archAtan(x float64) float64 { + panic("not implemented") +} + +const haveArchAtan2 = false + +func archAtan2(y, x float64) float64 { + panic("not implemented") +} + +const haveArchAtanh = false + +func archAtanh(x float64) float64 { + panic("not implemented") +} + +const haveArchCbrt = false + +func archCbrt(x float64) float64 { + panic("not implemented") +} + +const haveArchCos = false + +func archCos(x float64) float64 { + panic("not implemented") +} + +const haveArchCosh = false + +func archCosh(x float64) float64 { + panic("not implemented") +} + +const haveArchErf = false + +func archErf(x float64) float64 { + panic("not implemented") +} + +const haveArchErfc = false + +func archErfc(x float64) float64 { + panic("not implemented") +} + +const haveArchExpm1 = false + +func archExpm1(x float64) float64 { + panic("not implemented") +} + +const haveArchFrexp = false + +func archFrexp(x float64) (float64, int) { + panic("not implemented") +} + +const haveArchLdexp = false + +func archLdexp(frac float64, exp int) float64 { + panic("not implemented") +} + +const haveArchLog10 = false + +func archLog10(x float64) float64 { + panic("not implemented") +} + +const haveArchLog2 = false + +func archLog2(x float64) float64 { + panic("not implemented") +} + +const haveArchLog1p = false + +func archLog1p(x float64) float64 { + panic("not implemented") +} + +const haveArchMod = false + +func archMod(x, y float64) float64 { + panic("not implemented") +} + +const haveArchPow = false + +func archPow(x, y float64) float64 { + panic("not implemented") +} + +const haveArchRemainder = false + +func archRemainder(x, y float64) float64 { + panic("not implemented") +} + +const haveArchSin = false + +func archSin(x float64) float64 { + panic("not implemented") +} + +const haveArchSinh = false + +func archSinh(x float64) float64 { + panic("not implemented") +} + +const haveArchTan = false + +func archTan(x float64) float64 { + panic("not implemented") +} + +const haveArchTanh = false + +func archTanh(x float64) float64 { + panic("not implemented") +} diff --git a/src/math/stubs_s390x.s b/src/math/stubs_s390x.s new file mode 100644 index 0000000..7400179 --- /dev/null +++ b/src/math/stubs_s390x.s @@ -0,0 +1,468 @@ +// Copyright 2016 The Go Authors. All rights reserved. +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. + +#include "textflag.h" + +TEXT ·archLog10(SB), NOSPLIT, $0 + MOVD ·log10vectorfacility+0x00(SB), R1 + BR (R1) + +TEXT ·log10TrampolineSetup(SB), NOSPLIT, $0 + MOVB ·hasVX(SB), R1 + CMPBEQ R1, $1, vectorimpl // vectorfacility = 1, vector supported + MOVD $·log10vectorfacility+0x00(SB), R1 + MOVD $·log10(SB), R2 + MOVD R2, 0(R1) + BR ·log10(SB) + +vectorimpl: + MOVD $·log10vectorfacility+0x00(SB), R1 + MOVD $·log10Asm(SB), R2 + MOVD R2, 0(R1) + BR ·log10Asm(SB) + +GLOBL ·log10vectorfacility+0x00(SB), NOPTR, $8 +DATA ·log10vectorfacility+0x00(SB)/8, $·log10TrampolineSetup(SB) + +TEXT ·archCos(SB), NOSPLIT, $0 + MOVD ·cosvectorfacility+0x00(SB), R1 + BR (R1) + +TEXT ·cosTrampolineSetup(SB), NOSPLIT, $0 + MOVB ·hasVX(SB), R1 + CMPBEQ R1, $1, vectorimpl // vectorfacility = 1, vector supported + MOVD $·cosvectorfacility+0x00(SB), R1 + MOVD $·cos(SB), R2 + MOVD R2, 0(R1) + BR ·cos(SB) + +vectorimpl: + MOVD $·cosvectorfacility+0x00(SB), R1 + MOVD $·cosAsm(SB), R2 + MOVD R2, 0(R1) + BR ·cosAsm(SB) + +GLOBL ·cosvectorfacility+0x00(SB), NOPTR, $8 +DATA ·cosvectorfacility+0x00(SB)/8, $·cosTrampolineSetup(SB) + +TEXT ·archCosh(SB), NOSPLIT, $0 + MOVD ·coshvectorfacility+0x00(SB), R1 + BR (R1) + +TEXT ·coshTrampolineSetup(SB), NOSPLIT, $0 + MOVB ·hasVX(SB), R1 + CMPBEQ R1, $1, vectorimpl // vectorfacility = 1, vector supported + MOVD $·coshvectorfacility+0x00(SB), R1 + MOVD $·cosh(SB), R2 + MOVD R2, 0(R1) + BR ·cosh(SB) + +vectorimpl: + MOVD $·coshvectorfacility+0x00(SB), R1 + MOVD $·coshAsm(SB), R2 + MOVD R2, 0(R1) + BR ·coshAsm(SB) + +GLOBL ·coshvectorfacility+0x00(SB), NOPTR, $8 +DATA ·coshvectorfacility+0x00(SB)/8, $·coshTrampolineSetup(SB) + +TEXT ·archSin(SB), NOSPLIT, $0 + MOVD ·sinvectorfacility+0x00(SB), R1 + BR (R1) + +TEXT ·sinTrampolineSetup(SB), NOSPLIT, $0 + MOVB ·hasVX(SB), R1 + CMPBEQ R1, $1, vectorimpl // vectorfacility = 1, vector supported + MOVD $·sinvectorfacility+0x00(SB), R1 + MOVD $·sin(SB), R2 + MOVD R2, 0(R1) + BR ·sin(SB) + +vectorimpl: + MOVD $·sinvectorfacility+0x00(SB), R1 + MOVD $·sinAsm(SB), R2 + MOVD R2, 0(R1) + BR ·sinAsm(SB) + +GLOBL ·sinvectorfacility+0x00(SB), NOPTR, $8 +DATA ·sinvectorfacility+0x00(SB)/8, $·sinTrampolineSetup(SB) + +TEXT ·archSinh(SB), NOSPLIT, $0 + MOVD ·sinhvectorfacility+0x00(SB), R1 + BR (R1) + +TEXT ·sinhTrampolineSetup(SB), NOSPLIT, $0 + MOVB ·hasVX(SB), R1 + CMPBEQ R1, $1, vectorimpl // vectorfacility = 1, vector supported + MOVD $·sinhvectorfacility+0x00(SB), R1 + MOVD $·sinh(SB), R2 + MOVD R2, 0(R1) + BR ·sinh(SB) + +vectorimpl: + MOVD $·sinhvectorfacility+0x00(SB), R1 + MOVD $·sinhAsm(SB), R2 + MOVD R2, 0(R1) + BR ·sinhAsm(SB) + +GLOBL ·sinhvectorfacility+0x00(SB), NOPTR, $8 +DATA ·sinhvectorfacility+0x00(SB)/8, $·sinhTrampolineSetup(SB) + +TEXT ·archTanh(SB), NOSPLIT, $0 + MOVD ·tanhvectorfacility+0x00(SB), R1 + BR (R1) + +TEXT ·tanhTrampolineSetup(SB), NOSPLIT, $0 + MOVB ·hasVX(SB), R1 + CMPBEQ R1, $1, vectorimpl // vectorfacility = 1, vector supported + MOVD $·tanhvectorfacility+0x00(SB), R1 + MOVD $·tanh(SB), R2 + MOVD R2, 0(R1) + BR ·tanh(SB) + +vectorimpl: + MOVD $·tanhvectorfacility+0x00(SB), R1 + MOVD $·tanhAsm(SB), R2 + MOVD R2, 0(R1) + BR ·tanhAsm(SB) + +GLOBL ·tanhvectorfacility+0x00(SB), NOPTR, $8 +DATA ·tanhvectorfacility+0x00(SB)/8, $·tanhTrampolineSetup(SB) + +TEXT ·archLog1p(SB), NOSPLIT, $0 + MOVD ·log1pvectorfacility+0x00(SB), R1 + BR (R1) + +TEXT ·log1pTrampolineSetup(SB), NOSPLIT, $0 + MOVB ·hasVX(SB), R1 + CMPBEQ R1, $1, vectorimpl // vectorfacility = 1, vector supported + MOVD $·log1pvectorfacility+0x00(SB), R1 + MOVD $·log1p(SB), R2 + MOVD R2, 0(R1) + BR ·log1p(SB) + +vectorimpl: + MOVD $·log1pvectorfacility+0x00(SB), R1 + MOVD $·log1pAsm(SB), R2 + MOVD R2, 0(R1) + BR ·log1pAsm(SB) + +GLOBL ·log1pvectorfacility+0x00(SB), NOPTR, $8 +DATA ·log1pvectorfacility+0x00(SB)/8, $·log1pTrampolineSetup(SB) + +TEXT ·archAtanh(SB), NOSPLIT, $0 + MOVD ·atanhvectorfacility+0x00(SB), R1 + BR (R1) + +TEXT ·atanhTrampolineSetup(SB), NOSPLIT, $0 + MOVB ·hasVX(SB), R1 + CMPBEQ R1, $1, vectorimpl // vectorfacility = 1, vector supported + MOVD $·atanhvectorfacility+0x00(SB), R1 + MOVD $·atanh(SB), R2 + MOVD R2, 0(R1) + BR ·atanh(SB) + +vectorimpl: + MOVD $·atanhvectorfacility+0x00(SB), R1 + MOVD $·atanhAsm(SB), R2 + MOVD R2, 0(R1) + BR ·atanhAsm(SB) + +GLOBL ·atanhvectorfacility+0x00(SB), NOPTR, $8 +DATA ·atanhvectorfacility+0x00(SB)/8, $·atanhTrampolineSetup(SB) + +TEXT ·archAcos(SB), NOSPLIT, $0 + MOVD ·acosvectorfacility+0x00(SB), R1 + BR (R1) + +TEXT ·acosTrampolineSetup(SB), NOSPLIT, $0 + MOVB ·hasVX(SB), R1 + CMPBEQ R1, $1, vectorimpl // vectorfacility = 1, vector supported + MOVD $·acosvectorfacility+0x00(SB), R1 + MOVD $·acos(SB), R2 + MOVD R2, 0(R1) + BR ·acos(SB) + +vectorimpl: + MOVD $·acosvectorfacility+0x00(SB), R1 + MOVD $·acosAsm(SB), R2 + MOVD R2, 0(R1) + BR ·acosAsm(SB) + +GLOBL ·acosvectorfacility+0x00(SB), NOPTR, $8 +DATA ·acosvectorfacility+0x00(SB)/8, $·acosTrampolineSetup(SB) + +TEXT ·archAsin(SB), NOSPLIT, $0 + MOVD ·asinvectorfacility+0x00(SB), R1 + BR (R1) + +TEXT ·asinTrampolineSetup(SB), NOSPLIT, $0 + MOVB ·hasVX(SB), R1 + CMPBEQ R1, $1, vectorimpl // vectorfacility = 1, vector supported + MOVD $·asinvectorfacility+0x00(SB), R1 + MOVD $·asin(SB), R2 + MOVD R2, 0(R1) + BR ·asin(SB) + +vectorimpl: + MOVD $·asinvectorfacility+0x00(SB), R1 + MOVD $·asinAsm(SB), R2 + MOVD R2, 0(R1) + BR ·asinAsm(SB) + +GLOBL ·asinvectorfacility+0x00(SB), NOPTR, $8 +DATA ·asinvectorfacility+0x00(SB)/8, $·asinTrampolineSetup(SB) + +TEXT ·archAsinh(SB), NOSPLIT, $0 + MOVD ·asinhvectorfacility+0x00(SB), R1 + BR (R1) + +TEXT ·asinhTrampolineSetup(SB), NOSPLIT, $0 + MOVB ·hasVX(SB), R1 + CMPBEQ R1, $1, vectorimpl // vectorfacility = 1, vector supported + MOVD $·asinhvectorfacility+0x00(SB), R1 + MOVD $·asinh(SB), R2 + MOVD R2, 0(R1) + BR ·asinh(SB) + +vectorimpl: + MOVD $·asinhvectorfacility+0x00(SB), R1 + MOVD $·asinhAsm(SB), R2 + MOVD R2, 0(R1) + BR ·asinhAsm(SB) + +GLOBL ·asinhvectorfacility+0x00(SB), NOPTR, $8 +DATA ·asinhvectorfacility+0x00(SB)/8, $·asinhTrampolineSetup(SB) + +TEXT ·archAcosh(SB), NOSPLIT, $0 + MOVD ·acoshvectorfacility+0x00(SB), R1 + BR (R1) + +TEXT ·acoshTrampolineSetup(SB), NOSPLIT, $0 + MOVB ·hasVX(SB), R1 + CMPBEQ R1, $1, vectorimpl // vectorfacility = 1, vector supported + MOVD $·acoshvectorfacility+0x00(SB), R1 + MOVD $·acosh(SB), R2 + MOVD R2, 0(R1) + BR ·acosh(SB) + +vectorimpl: + MOVD $·acoshvectorfacility+0x00(SB), R1 + MOVD $·acoshAsm(SB), R2 + MOVD R2, 0(R1) + BR ·acoshAsm(SB) + +GLOBL ·acoshvectorfacility+0x00(SB), NOPTR, $8 +DATA ·acoshvectorfacility+0x00(SB)/8, $·acoshTrampolineSetup(SB) + +TEXT ·archErf(SB), NOSPLIT, $0 + MOVD ·erfvectorfacility+0x00(SB), R1 + BR (R1) + +TEXT ·erfTrampolineSetup(SB), NOSPLIT, $0 + MOVB ·hasVX(SB), R1 + CMPBEQ R1, $1, vectorimpl // vectorfacility = 1, vector supported + MOVD $·erfvectorfacility+0x00(SB), R1 + MOVD $·erf(SB), R2 + MOVD R2, 0(R1) + BR ·erf(SB) + +vectorimpl: + MOVD $·erfvectorfacility+0x00(SB), R1 + MOVD $·erfAsm(SB), R2 + MOVD R2, 0(R1) + BR ·erfAsm(SB) + +GLOBL ·erfvectorfacility+0x00(SB), NOPTR, $8 +DATA ·erfvectorfacility+0x00(SB)/8, $·erfTrampolineSetup(SB) + +TEXT ·archErfc(SB), NOSPLIT, $0 + MOVD ·erfcvectorfacility+0x00(SB), R1 + BR (R1) + +TEXT ·erfcTrampolineSetup(SB), NOSPLIT, $0 + MOVB ·hasVX(SB), R1 + CMPBEQ R1, $1, vectorimpl // vectorfacility = 1, vector supported + MOVD $·erfcvectorfacility+0x00(SB), R1 + MOVD $·erfc(SB), R2 + MOVD R2, 0(R1) + BR ·erfc(SB) + +vectorimpl: + MOVD $·erfcvectorfacility+0x00(SB), R1 + MOVD $·erfcAsm(SB), R2 + MOVD R2, 0(R1) + BR ·erfcAsm(SB) + +GLOBL ·erfcvectorfacility+0x00(SB), NOPTR, $8 +DATA ·erfcvectorfacility+0x00(SB)/8, $·erfcTrampolineSetup(SB) + +TEXT ·archAtan(SB), NOSPLIT, $0 + MOVD ·atanvectorfacility+0x00(SB), R1 + BR (R1) + +TEXT ·atanTrampolineSetup(SB), NOSPLIT, $0 + MOVB ·hasVX(SB), R1 + CMPBEQ R1, $1, vectorimpl // vectorfacility = 1, vector supported + MOVD $·atanvectorfacility+0x00(SB), R1 + MOVD $·atan(SB), R2 + MOVD R2, 0(R1) + BR ·atan(SB) + +vectorimpl: + MOVD $·atanvectorfacility+0x00(SB), R1 + MOVD $·atanAsm(SB), R2 + MOVD R2, 0(R1) + BR ·atanAsm(SB) + +GLOBL ·atanvectorfacility+0x00(SB), NOPTR, $8 +DATA ·atanvectorfacility+0x00(SB)/8, $·atanTrampolineSetup(SB) + +TEXT ·archAtan2(SB), NOSPLIT, $0 + MOVD ·atan2vectorfacility+0x00(SB), R1 + BR (R1) + +TEXT ·atan2TrampolineSetup(SB), NOSPLIT, $0 + MOVB ·hasVX(SB), R1 + CMPBEQ R1, $1, vectorimpl // vectorfacility = 1, vector supported + MOVD $·atan2vectorfacility+0x00(SB), R1 + MOVD $·atan2(SB), R2 + MOVD R2, 0(R1) + BR ·atan2(SB) + +vectorimpl: + MOVD $·atan2vectorfacility+0x00(SB), R1 + MOVD $·atan2Asm(SB), R2 + MOVD R2, 0(R1) + BR ·atan2Asm(SB) + +GLOBL ·atan2vectorfacility+0x00(SB), NOPTR, $8 +DATA ·atan2vectorfacility+0x00(SB)/8, $·atan2TrampolineSetup(SB) + +TEXT ·archCbrt(SB), NOSPLIT, $0 + MOVD ·cbrtvectorfacility+0x00(SB), R1 + BR (R1) + +TEXT ·cbrtTrampolineSetup(SB), NOSPLIT, $0 + MOVB ·hasVX(SB), R1 + CMPBEQ R1, $1, vectorimpl // vectorfacility = 1, vector supported + MOVD $·cbrtvectorfacility+0x00(SB), R1 + MOVD $·cbrt(SB), R2 + MOVD R2, 0(R1) + BR ·cbrt(SB) + +vectorimpl: + MOVD $·cbrtvectorfacility+0x00(SB), R1 + MOVD $·cbrtAsm(SB), R2 + MOVD R2, 0(R1) + BR ·cbrtAsm(SB) + +GLOBL ·cbrtvectorfacility+0x00(SB), NOPTR, $8 +DATA ·cbrtvectorfacility+0x00(SB)/8, $·cbrtTrampolineSetup(SB) + +TEXT ·archLog(SB), NOSPLIT, $0 + MOVD ·logvectorfacility+0x00(SB), R1 + BR (R1) + +TEXT ·logTrampolineSetup(SB), NOSPLIT, $0 + MOVB ·hasVX(SB), R1 + CMPBEQ R1, $1, vectorimpl // vectorfacility = 1, vector supported + MOVD $·logvectorfacility+0x00(SB), R1 + MOVD $·log(SB), R2 + MOVD R2, 0(R1) + BR ·log(SB) + +vectorimpl: + MOVD $·logvectorfacility+0x00(SB), R1 + MOVD $·logAsm(SB), R2 + MOVD R2, 0(R1) + BR ·logAsm(SB) + +GLOBL ·logvectorfacility+0x00(SB), NOPTR, $8 +DATA ·logvectorfacility+0x00(SB)/8, $·logTrampolineSetup(SB) + +TEXT ·archTan(SB), NOSPLIT, $0 + MOVD ·tanvectorfacility+0x00(SB), R1 + BR (R1) + +TEXT ·tanTrampolineSetup(SB), NOSPLIT, $0 + MOVB ·hasVX(SB), R1 + CMPBEQ R1, $1, vectorimpl // vectorfacility = 1, vector supported + MOVD $·tanvectorfacility+0x00(SB), R1 + MOVD $·tan(SB), R2 + MOVD R2, 0(R1) + BR ·tan(SB) + +vectorimpl: + MOVD $·tanvectorfacility+0x00(SB), R1 + MOVD $·tanAsm(SB), R2 + MOVD R2, 0(R1) + BR ·tanAsm(SB) + +GLOBL ·tanvectorfacility+0x00(SB), NOPTR, $8 +DATA ·tanvectorfacility+0x00(SB)/8, $·tanTrampolineSetup(SB) + +TEXT ·archExp(SB), NOSPLIT, $0 + MOVD ·expvectorfacility+0x00(SB), R1 + BR (R1) + +TEXT ·expTrampolineSetup(SB), NOSPLIT, $0 + MOVB ·hasVX(SB), R1 + CMPBEQ R1, $1, vectorimpl // vectorfacility = 1, vector supported + MOVD $·expvectorfacility+0x00(SB), R1 + MOVD $·exp(SB), R2 + MOVD R2, 0(R1) + BR ·exp(SB) + +vectorimpl: + MOVD $·expvectorfacility+0x00(SB), R1 + MOVD $·expAsm(SB), R2 + MOVD R2, 0(R1) + BR ·expAsm(SB) + +GLOBL ·expvectorfacility+0x00(SB), NOPTR, $8 +DATA ·expvectorfacility+0x00(SB)/8, $·expTrampolineSetup(SB) + +TEXT ·archExpm1(SB), NOSPLIT, $0 + MOVD ·expm1vectorfacility+0x00(SB), R1 + BR (R1) + +TEXT ·expm1TrampolineSetup(SB), NOSPLIT, $0 + MOVB ·hasVX(SB), R1 + CMPBEQ R1, $1, vectorimpl // vectorfacility = 1, vector supported + MOVD $·expm1vectorfacility+0x00(SB), R1 + MOVD $·expm1(SB), R2 + MOVD R2, 0(R1) + BR ·expm1(SB) + +vectorimpl: + MOVD $·expm1vectorfacility+0x00(SB), R1 + MOVD $·expm1Asm(SB), R2 + MOVD R2, 0(R1) + BR ·expm1Asm(SB) + +GLOBL ·expm1vectorfacility+0x00(SB), NOPTR, $8 +DATA ·expm1vectorfacility+0x00(SB)/8, $·expm1TrampolineSetup(SB) + +TEXT ·archPow(SB), NOSPLIT, $0 + MOVD ·powvectorfacility+0x00(SB), R1 + BR (R1) + +TEXT ·powTrampolineSetup(SB), NOSPLIT, $0 + MOVB ·hasVX(SB), R1 + CMPBEQ R1, $1, vectorimpl // vectorfacility = 1, vector supported + MOVD $·powvectorfacility+0x00(SB), R1 + MOVD $·pow(SB), R2 + MOVD R2, 0(R1) + BR ·pow(SB) + +vectorimpl: + MOVD $·powvectorfacility+0x00(SB), R1 + MOVD $·powAsm(SB), R2 + MOVD R2, 0(R1) + BR ·powAsm(SB) + +GLOBL ·powvectorfacility+0x00(SB), NOPTR, $8 +DATA ·powvectorfacility+0x00(SB)/8, $·powTrampolineSetup(SB) + diff --git a/src/math/tan.go b/src/math/tan.go new file mode 100644 index 0000000..a25417f --- /dev/null +++ b/src/math/tan.go @@ -0,0 +1,139 @@ +// Copyright 2011 The Go Authors. All rights reserved. +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. + +package math + +/* + Floating-point tangent. +*/ + +// The original C code, the long comment, and the constants +// below were from http://netlib.sandia.gov/cephes/cmath/sin.c, +// available from http://www.netlib.org/cephes/cmath.tgz. +// The go code is a simplified version of the original C. +// +// tan.c +// +// Circular tangent +// +// SYNOPSIS: +// +// double x, y, tan(); +// y = tan( x ); +// +// DESCRIPTION: +// +// Returns the circular tangent of the radian argument x. +// +// Range reduction is modulo pi/4. A rational function +// x + x**3 P(x**2)/Q(x**2) +// is employed in the basic interval [0, pi/4]. +// +// ACCURACY: +// Relative error: +// arithmetic domain # trials peak rms +// DEC +-1.07e9 44000 4.1e-17 1.0e-17 +// IEEE +-1.07e9 30000 2.9e-16 8.1e-17 +// +// Partial loss of accuracy begins to occur at x = 2**30 = 1.074e9. The loss +// is not gradual, but jumps suddenly to about 1 part in 10e7. Results may +// be meaningless for x > 2**49 = 5.6e14. +// [Accuracy loss statement from sin.go comments.] +// +// Cephes Math Library Release 2.8: June, 2000 +// Copyright 1984, 1987, 1989, 1992, 2000 by Stephen L. Moshier +// +// The readme file at http://netlib.sandia.gov/cephes/ says: +// Some software in this archive may be from the book _Methods and +// Programs for Mathematical Functions_ (Prentice-Hall or Simon & Schuster +// International, 1989) or from the Cephes Mathematical Library, a +// commercial product. In either event, it is copyrighted by the author. +// What you see here may be used freely but it comes with no support or +// guarantee. +// +// The two known misprints in the book are repaired here in the +// source listings for the gamma function and the incomplete beta +// integral. +// +// Stephen L. Moshier +// moshier@na-net.ornl.gov + +// tan coefficients +var _tanP = [...]float64{ + -1.30936939181383777646e4, // 0xc0c992d8d24f3f38 + 1.15351664838587416140e6, // 0x413199eca5fc9ddd + -1.79565251976484877988e7, // 0xc1711fead3299176 +} +var _tanQ = [...]float64{ + 1.00000000000000000000e0, + 1.36812963470692954678e4, //0x40cab8a5eeb36572 + -1.32089234440210967447e6, //0xc13427bc582abc96 + 2.50083801823357915839e7, //0x4177d98fc2ead8ef + -5.38695755929454629881e7, //0xc189afe03cbe5a31 +} + +// Tan returns the tangent of the radian argument x. +// +// Special cases are: +// Tan(±0) = ±0 +// Tan(±Inf) = NaN +// Tan(NaN) = NaN +func Tan(x float64) float64 { + if haveArchTan { + return archTan(x) + } + return tan(x) +} + +func tan(x float64) float64 { + const ( + PI4A = 7.85398125648498535156e-1 // 0x3fe921fb40000000, Pi/4 split into three parts + PI4B = 3.77489470793079817668e-8 // 0x3e64442d00000000, + PI4C = 2.69515142907905952645e-15 // 0x3ce8469898cc5170, + ) + // special cases + switch { + case x == 0 || IsNaN(x): + return x // return ±0 || NaN() + case IsInf(x, 0): + return NaN() + } + + // make argument positive but save the sign + sign := false + if x < 0 { + x = -x + sign = true + } + var j uint64 + var y, z float64 + if x >= reduceThreshold { + j, z = trigReduce(x) + } else { + j = uint64(x * (4 / Pi)) // integer part of x/(Pi/4), as integer for tests on the phase angle + y = float64(j) // integer part of x/(Pi/4), as float + + /* map zeros and singularities to origin */ + if j&1 == 1 { + j++ + y++ + } + + z = ((x - y*PI4A) - y*PI4B) - y*PI4C + } + zz := z * z + + if zz > 1e-14 { + y = z + z*(zz*(((_tanP[0]*zz)+_tanP[1])*zz+_tanP[2])/((((zz+_tanQ[1])*zz+_tanQ[2])*zz+_tanQ[3])*zz+_tanQ[4])) + } else { + y = z + } + if j&2 == 2 { + y = -1 / y + } + if sign { + y = -y + } + return y +} diff --git a/src/math/tan_s390x.s b/src/math/tan_s390x.s new file mode 100644 index 0000000..8226760 --- /dev/null +++ b/src/math/tan_s390x.s @@ -0,0 +1,110 @@ +// Copyright 2017 The Go Authors. All rights reserved. +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. + +#include "textflag.h" + +// Minimax polynomial approximations +DATA ·tanrodataL13<> + 0(SB)/8, $0.181017336383229927e-07 +DATA ·tanrodataL13<> + 8(SB)/8, $-.256590857271311164e-03 +DATA ·tanrodataL13<> + 16(SB)/8, $-.464359274328689195e+00 +DATA ·tanrodataL13<> + 24(SB)/8, $1.0 +DATA ·tanrodataL13<> + 32(SB)/8, $-.333333333333333464e+00 +DATA ·tanrodataL13<> + 40(SB)/8, $0.245751217306830032e-01 +DATA ·tanrodataL13<> + 48(SB)/8, $-.245391301343844510e-03 +DATA ·tanrodataL13<> + 56(SB)/8, $0.214530914428992319e-01 +DATA ·tanrodataL13<> + 64(SB)/8, $0.108285667160535624e-31 +DATA ·tanrodataL13<> + 72(SB)/8, $0.612323399573676480e-16 +DATA ·tanrodataL13<> + 80(SB)/8, $0.157079632679489656e+01 +DATA ·tanrodataL13<> + 88(SB)/8, $0.636619772367581341e+00 +GLOBL ·tanrodataL13<> + 0(SB), RODATA, $96 + +// Constants +DATA ·tanxnan<> + 0(SB)/8, $0x7ff8000000000000 +GLOBL ·tanxnan<> + 0(SB), RODATA, $8 +DATA ·tanxlim<> + 0(SB)/8, $0x432921fb54442d19 +GLOBL ·tanxlim<> + 0(SB), RODATA, $8 +DATA ·tanxadd<> + 0(SB)/8, $0xc338000000000000 +GLOBL ·tanxadd<> + 0(SB), RODATA, $8 + +// Tan returns the tangent of the radian argument. +// +// Special cases are: +// Tan(±0) = ±0 +// Tan(±Inf) = NaN +// Tan(NaN) = NaN +// The algorithm used is minimax polynomial approximation using a table of +// polynomial coefficients determined with a Remez exchange algorithm. + +TEXT ·tanAsm(SB), NOSPLIT, $0-16 + FMOVD x+0(FP), F0 + //special case Tan(±0) = ±0 + FMOVD $(0.0), F1 + FCMPU F0, F1 + BEQ atanIsZero + + MOVD $·tanrodataL13<>+0(SB), R5 + LTDBR F0, F0 + BLTU L10 + FMOVD F0, F2 +L2: + MOVD $·tanxlim<>+0(SB), R1 + WORD $0xED201000 //cdb %f2,0(%r1) + BYTE $0x00 + BYTE $0x19 + BGE L11 + BVS L11 + MOVD $·tanxadd<>+0(SB), R1 + FMOVD 88(R5), F6 + FMOVD 0(R1), F4 + WFMSDB V0, V6, V4, V6 + FMOVD 80(R5), F1 + FADD F6, F4 + FMOVD 72(R5), F2 + FMSUB F1, F4, F0 + FMOVD 64(R5), F3 + WFMADB V4, V2, V0, V2 + FMOVD 56(R5), F1 + WFMADB V4, V3, V2, V4 + FMUL F2, F2 + VLEG $0, 48(R5), V18 + LGDR F6, R1 + FMOVD 40(R5), F5 + FMOVD 32(R5), F3 + FMADD F1, F2, F3 + FMOVD 24(R5), F1 + FMOVD 16(R5), F7 + FMOVD 8(R5), F0 + WFMADB V2, V7, V1, V7 + WFMADB V2, V0, V5, V0 + WFMDB V2, V2, V1 + FMOVD 0(R5), F5 + WFLCDB V4, V16 + WFMADB V2, V5, V18, V5 + WFMADB V1, V0, V7, V0 + TMLL R1, $1 + WFMADB V1, V5, V3, V1 + BNE L12 + WFDDB V0, V1, V0 + WFMDB V2, V16, V2 + WFMADB V2, V0, V4, V0 + WORD $0xB3130000 //lcdbr %f0,%f0 + FMOVD F0, ret+8(FP) + RET +L12: + WFMSDB V2, V1, V0, V2 + WFMDB V16, V2, V2 + FDIV F2, F0 + FMOVD F0, ret+8(FP) + RET +L11: + MOVD $·tanxnan<>+0(SB), R1 + FMOVD 0(R1), F0 + FMOVD F0, ret+8(FP) + RET +L10: + WORD $0xB3130020 //lcdbr %f2,%f0 + BR L2 +atanIsZero: + FMOVD F0, ret+8(FP) + RET diff --git a/src/math/tanh.go b/src/math/tanh.go new file mode 100644 index 0000000..a825678 --- /dev/null +++ b/src/math/tanh.go @@ -0,0 +1,104 @@ +// Copyright 2009 The Go Authors. All rights reserved. +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. + +package math + +// The original C code, the long comment, and the constants +// below were from http://netlib.sandia.gov/cephes/cmath/sin.c, +// available from http://www.netlib.org/cephes/cmath.tgz. +// The go code is a simplified version of the original C. +// tanh.c +// +// Hyperbolic tangent +// +// SYNOPSIS: +// +// double x, y, tanh(); +// +// y = tanh( x ); +// +// DESCRIPTION: +// +// Returns hyperbolic tangent of argument in the range MINLOG to MAXLOG. +// MAXLOG = 8.8029691931113054295988e+01 = log(2**127) +// MINLOG = -8.872283911167299960540e+01 = log(2**-128) +// +// A rational function is used for |x| < 0.625. The form +// x + x**3 P(x)/Q(x) of Cody & Waite is employed. +// Otherwise, +// tanh(x) = sinh(x)/cosh(x) = 1 - 2/(exp(2x) + 1). +// +// ACCURACY: +// +// Relative error: +// arithmetic domain # trials peak rms +// IEEE -2,2 30000 2.5e-16 5.8e-17 +// +// Cephes Math Library Release 2.8: June, 2000 +// Copyright 1984, 1987, 1989, 1992, 2000 by Stephen L. Moshier +// +// The readme file at http://netlib.sandia.gov/cephes/ says: +// Some software in this archive may be from the book _Methods and +// Programs for Mathematical Functions_ (Prentice-Hall or Simon & Schuster +// International, 1989) or from the Cephes Mathematical Library, a +// commercial product. In either event, it is copyrighted by the author. +// What you see here may be used freely but it comes with no support or +// guarantee. +// +// The two known misprints in the book are repaired here in the +// source listings for the gamma function and the incomplete beta +// integral. +// +// Stephen L. Moshier +// moshier@na-net.ornl.gov +// + +var tanhP = [...]float64{ + -9.64399179425052238628e-1, + -9.92877231001918586564e1, + -1.61468768441708447952e3, +} +var tanhQ = [...]float64{ + 1.12811678491632931402e2, + 2.23548839060100448583e3, + 4.84406305325125486048e3, +} + +// Tanh returns the hyperbolic tangent of x. +// +// Special cases are: +// Tanh(±0) = ±0 +// Tanh(±Inf) = ±1 +// Tanh(NaN) = NaN +func Tanh(x float64) float64 { + if haveArchTanh { + return archTanh(x) + } + return tanh(x) +} + +func tanh(x float64) float64 { + const MAXLOG = 8.8029691931113054295988e+01 // log(2**127) + z := Abs(x) + switch { + case z > 0.5*MAXLOG: + if x < 0 { + return -1 + } + return 1 + case z >= 0.625: + s := Exp(2 * z) + z = 1 - 2/(s+1) + if x < 0 { + z = -z + } + default: + if x == 0 { + return x + } + s := x * x + z = x + x*s*((tanhP[0]*s+tanhP[1])*s+tanhP[2])/(((s+tanhQ[0])*s+tanhQ[1])*s+tanhQ[2]) + } + return z +} diff --git a/src/math/tanh_s390x.s b/src/math/tanh_s390x.s new file mode 100644 index 0000000..7e2d4dd --- /dev/null +++ b/src/math/tanh_s390x.s @@ -0,0 +1,169 @@ +// Copyright 2016 The Go Authors. All rights reserved. +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. + +#include "textflag.h" + +// Minimax polynomial approximations +DATA tanhrodataL18<>+0(SB)/8, $-1.0 +DATA tanhrodataL18<>+8(SB)/8, $-2.0 +DATA tanhrodataL18<>+16(SB)/8, $1.0 +DATA tanhrodataL18<>+24(SB)/8, $2.0 +DATA tanhrodataL18<>+32(SB)/8, $0.20000000000000011868E+01 +DATA tanhrodataL18<>+40(SB)/8, $0.13333333333333341256E+01 +DATA tanhrodataL18<>+48(SB)/8, $0.26666666663549111502E+00 +DATA tanhrodataL18<>+56(SB)/8, $0.66666666658721844678E+00 +DATA tanhrodataL18<>+64(SB)/8, $0.88890217768964374821E-01 +DATA tanhrodataL18<>+72(SB)/8, $0.25397199429103821138E-01 +DATA tanhrodataL18<>+80(SB)/8, $-.346573590279972643E+00 +DATA tanhrodataL18<>+88(SB)/8, $20.E0 +GLOBL tanhrodataL18<>+0(SB), RODATA, $96 + +// Constants +DATA tanhrlog2<>+0(SB)/8, $0x4007154760000000 +GLOBL tanhrlog2<>+0(SB), RODATA, $8 +DATA tanhxadd<>+0(SB)/8, $0xc2f0000100003ff0 +GLOBL tanhxadd<>+0(SB), RODATA, $8 +DATA tanhxmone<>+0(SB)/8, $-1.0 +GLOBL tanhxmone<>+0(SB), RODATA, $8 +DATA tanhxzero<>+0(SB)/8, $0 +GLOBL tanhxzero<>+0(SB), RODATA, $8 + +// Polynomial coefficients +DATA tanhtab<>+0(SB)/8, $0.000000000000000000E+00 +DATA tanhtab<>+8(SB)/8, $-.171540871271399150E-01 +DATA tanhtab<>+16(SB)/8, $-.306597931864376363E-01 +DATA tanhtab<>+24(SB)/8, $-.410200970469965021E-01 +DATA tanhtab<>+32(SB)/8, $-.486343079978231466E-01 +DATA tanhtab<>+40(SB)/8, $-.538226193725835820E-01 +DATA tanhtab<>+48(SB)/8, $-.568439602538111520E-01 +DATA tanhtab<>+56(SB)/8, $-.579091847395528847E-01 +DATA tanhtab<>+64(SB)/8, $-.571909584179366341E-01 +DATA tanhtab<>+72(SB)/8, $-.548312665987204407E-01 +DATA tanhtab<>+80(SB)/8, $-.509471843643441085E-01 +DATA tanhtab<>+88(SB)/8, $-.456353588448863359E-01 +DATA tanhtab<>+96(SB)/8, $-.389755254243262365E-01 +DATA tanhtab<>+104(SB)/8, $-.310332908285244231E-01 +DATA tanhtab<>+112(SB)/8, $-.218623539150173528E-01 +DATA tanhtab<>+120(SB)/8, $-.115062908917949451E-01 +GLOBL tanhtab<>+0(SB), RODATA, $128 + +// Tanh returns the hyperbolic tangent of the argument. +// +// Special cases are: +// Tanh(±0) = ±0 +// Tanh(±Inf) = ±1 +// Tanh(NaN) = NaN +// The algorithm used is minimax polynomial approximation using a table of +// polynomial coefficients determined with a Remez exchange algorithm. + +TEXT ·tanhAsm(SB),NOSPLIT,$0-16 + FMOVD x+0(FP), F0 + // special case Tanh(±0) = ±0 + FMOVD $(0.0), F1 + FCMPU F0, F1 + BEQ tanhIsZero + MOVD $tanhrodataL18<>+0(SB), R5 + LTDBR F0, F0 + MOVD $0x4034000000000000, R1 + BLTU L15 + FMOVD F0, F1 +L2: + MOVD $tanhxadd<>+0(SB), R2 + FMOVD 0(R2), F2 + MOVD tanhrlog2<>+0(SB), R2 + LDGR R2, F4 + WFMSDB V0, V4, V2, V4 + MOVD $tanhtab<>+0(SB), R3 + LGDR F4, R2 + RISBGZ $57, $60, $3, R2, R4 + WORD $0xED105058 //cdb %f1,.L19-.L18(%r5) + BYTE $0x00 + BYTE $0x19 + RISBGN $0, $15, $48, R2, R1 + WORD $0x68543000 //ld %f5,0(%r4,%r3) + LDGR R1, F6 + BLT L3 + MOVD $tanhxzero<>+0(SB), R1 + FMOVD 0(R1), F2 + WFCHDBS V0, V2, V4 + BEQ L9 + WFCHDBS V2, V0, V2 + BNE L1 + MOVD $tanhxmone<>+0(SB), R1 + FMOVD 0(R1), F0 + FMOVD F0, ret+8(FP) + RET + +L3: + FADD F4, F2 + FMOVD tanhrodataL18<>+80(SB), F4 + FMADD F4, F2, F0 + FMOVD tanhrodataL18<>+72(SB), F1 + WFMDB V0, V0, V3 + FMOVD tanhrodataL18<>+64(SB), F2 + WFMADB V0, V1, V2, V1 + FMOVD tanhrodataL18<>+56(SB), F4 + FMOVD tanhrodataL18<>+48(SB), F2 + WFMADB V1, V3, V4, V1 + FMOVD tanhrodataL18<>+40(SB), F4 + WFMADB V3, V2, V4, V2 + FMOVD tanhrodataL18<>+32(SB), F4 + WORD $0xB9270022 //lhr %r2,%r2 + WFMADB V3, V1, V4, V1 + FMOVD tanhrodataL18<>+24(SB), F4 + WFMADB V3, V2, V4, V3 + WFMADB V0, V5, V0, V2 + WFMADB V0, V1, V3, V0 + WORD $0xA7183ECF //lhi %r1,16079 + WFMADB V0, V2, V5, V2 + FMUL F6, F2 + MOVW R2, R10 + MOVW R1, R11 + CMPBLE R10, R11, L16 + FMOVD F6, F0 + WORD $0xED005010 //adb %f0,.L28-.L18(%r5) + BYTE $0x00 + BYTE $0x1A + WORD $0xA7184330 //lhi %r1,17200 + FADD F2, F0 + MOVW R2, R10 + MOVW R1, R11 + CMPBGT R10, R11, L17 + WORD $0xED605010 //sdb %f6,.L28-.L18(%r5) + BYTE $0x00 + BYTE $0x1B + FADD F6, F2 + WFDDB V0, V2, V0 + FMOVD F0, ret+8(FP) + RET + +L9: + FMOVD tanhrodataL18<>+16(SB), F0 +L1: + FMOVD F0, ret+8(FP) + RET + +L15: + FNEG F0, F1 + BR L2 +L16: + FADD F6, F2 + FMOVD tanhrodataL18<>+8(SB), F0 + FMADD F4, F2, F0 + FMOVD tanhrodataL18<>+0(SB), F4 + FNEG F0, F0 + WFMADB V0, V2, V4, V0 + FMOVD F0, ret+8(FP) + RET + +L17: + WFDDB V0, V4, V0 + FMOVD tanhrodataL18<>+16(SB), F2 + WFSDB V0, V2, V0 + FMOVD F0, ret+8(FP) + RET + +tanhIsZero: //return ±0 + FMOVD F0, ret+8(FP) + RET diff --git a/src/math/trig_reduce.go b/src/math/trig_reduce.go new file mode 100644 index 0000000..5cdf4fa --- /dev/null +++ b/src/math/trig_reduce.go @@ -0,0 +1,100 @@ +// Copyright 2018 The Go Authors. All rights reserved. +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. + +package math + +import ( + "math/bits" +) + +// reduceThreshold is the maximum value of x where the reduction using Pi/4 +// in 3 float64 parts still gives accurate results. This threshold +// is set by y*C being representable as a float64 without error +// where y is given by y = floor(x * (4 / Pi)) and C is the leading partial +// terms of 4/Pi. Since the leading terms (PI4A and PI4B in sin.go) have 30 +// and 32 trailing zero bits, y should have less than 30 significant bits. +// y < 1<<30 -> floor(x*4/Pi) < 1<<30 -> x < (1<<30 - 1) * Pi/4 +// So, conservatively we can take x < 1<<29. +// Above this threshold Payne-Hanek range reduction must be used. +const reduceThreshold = 1 << 29 + +// trigReduce implements Payne-Hanek range reduction by Pi/4 +// for x > 0. It returns the integer part mod 8 (j) and +// the fractional part (z) of x / (Pi/4). +// The implementation is based on: +// "ARGUMENT REDUCTION FOR HUGE ARGUMENTS: Good to the Last Bit" +// K. C. Ng et al, March 24, 1992 +// The simulated multi-precision calculation of x*B uses 64-bit integer arithmetic. +func trigReduce(x float64) (j uint64, z float64) { + const PI4 = Pi / 4 + if x < PI4 { + return 0, x + } + // Extract out the integer and exponent such that, + // x = ix * 2 ** exp. + ix := Float64bits(x) + exp := int(ix>>shift&mask) - bias - shift + ix &^= mask << shift + ix |= 1 << shift + // Use the exponent to extract the 3 appropriate uint64 digits from mPi4, + // B ~ (z0, z1, z2), such that the product leading digit has the exponent -61. + // Note, exp >= -53 since x >= PI4 and exp < 971 for maximum float64. + digit, bitshift := uint(exp+61)/64, uint(exp+61)%64 + z0 := (mPi4[digit] << bitshift) | (mPi4[digit+1] >> (64 - bitshift)) + z1 := (mPi4[digit+1] << bitshift) | (mPi4[digit+2] >> (64 - bitshift)) + z2 := (mPi4[digit+2] << bitshift) | (mPi4[digit+3] >> (64 - bitshift)) + // Multiply mantissa by the digits and extract the upper two digits (hi, lo). + z2hi, _ := bits.Mul64(z2, ix) + z1hi, z1lo := bits.Mul64(z1, ix) + z0lo := z0 * ix + lo, c := bits.Add64(z1lo, z2hi, 0) + hi, _ := bits.Add64(z0lo, z1hi, c) + // The top 3 bits are j. + j = hi >> 61 + // Extract the fraction and find its magnitude. + hi = hi<<3 | lo>>61 + lz := uint(bits.LeadingZeros64(hi)) + e := uint64(bias - (lz + 1)) + // Clear implicit mantissa bit and shift into place. + hi = (hi << (lz + 1)) | (lo >> (64 - (lz + 1))) + hi >>= 64 - shift + // Include the exponent and convert to a float. + hi |= e << shift + z = Float64frombits(hi) + // Map zeros to origin. + if j&1 == 1 { + j++ + j &= 7 + z-- + } + // Multiply the fractional part by pi/4. + return j, z * PI4 +} + +// mPi4 is the binary digits of 4/pi as a uint64 array, +// that is, 4/pi = Sum mPi4[i]*2^(-64*i) +// 19 64-bit digits and the leading one bit give 1217 bits +// of precision to handle the largest possible float64 exponent. +var mPi4 = [...]uint64{ + 0x0000000000000001, + 0x45f306dc9c882a53, + 0xf84eafa3ea69bb81, + 0xb6c52b3278872083, + 0xfca2c757bd778ac3, + 0x6e48dc74849ba5c0, + 0x0c925dd413a32439, + 0xfc3bd63962534e7d, + 0xd1046bea5d768909, + 0xd338e04d68befc82, + 0x7323ac7306a673e9, + 0x3908bf177bf25076, + 0x3ff12fffbc0b301f, + 0xde5e2316b414da3e, + 0xda6cfd9e4f96136e, + 0x9e8c7ecd3cbfd45a, + 0xea4f758fd7cbe2f6, + 0x7a0e73ef14a525d4, + 0xd7f6bf623f1aba10, + 0xac06608df8f6d757, +} diff --git a/src/math/unsafe.go b/src/math/unsafe.go new file mode 100644 index 0000000..e59f50c --- /dev/null +++ b/src/math/unsafe.go @@ -0,0 +1,29 @@ +// Copyright 2009 The Go Authors. All rights reserved. +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. + +package math + +import "unsafe" + +// Float32bits returns the IEEE 754 binary representation of f, +// with the sign bit of f and the result in the same bit position. +// Float32bits(Float32frombits(x)) == x. +func Float32bits(f float32) uint32 { return *(*uint32)(unsafe.Pointer(&f)) } + +// Float32frombits returns the floating-point number corresponding +// to the IEEE 754 binary representation b, with the sign bit of b +// and the result in the same bit position. +// Float32frombits(Float32bits(x)) == x. +func Float32frombits(b uint32) float32 { return *(*float32)(unsafe.Pointer(&b)) } + +// Float64bits returns the IEEE 754 binary representation of f, +// with the sign bit of f and the result in the same bit position, +// and Float64bits(Float64frombits(x)) == x. +func Float64bits(f float64) uint64 { return *(*uint64)(unsafe.Pointer(&f)) } + +// Float64frombits returns the floating-point number corresponding +// to the IEEE 754 binary representation b, with the sign bit of b +// and the result in the same bit position. +// Float64frombits(Float64bits(x)) == x. +func Float64frombits(b uint64) float64 { return *(*float64)(unsafe.Pointer(&b)) } |