Adding upstream version 1.21.8.upstream/1.21.8

Signed-off-by: Daniel Baumann <daniel.baumann@progress-linux.org>
author: Daniel Baumann <daniel.baumann@progress-linux.org> 2024-04-16 19:19:13 +0000
committer: Daniel Baumann <daniel.baumann@progress-linux.org> 2024-04-16 19:19:13 +0000
commit: ccd992355df7192993c666236047820244914598 (patch)
tree: f00fea65147227b7743083c6148396f74cd66935 /src/math
parent: Initial commit. (diff)
download: golang-1.21-ccd992355df7192993c666236047820244914598.tar.xz
golang-1.21-ccd992355df7192993c666236047820244914598.zip
208 files changed, 49397 insertions, 0 deletions
diff --git a/src/math/abs.go b/src/math/abs.go
new file mode 100644
index 0000000..08be145
--- /dev/null
+++ b/src/math/abs.go
@@ -0,0 +1,15 @@
+// Copyright 2009 The Go Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+package math
+
+// Abs returns the absolute value of x.
+//
+// Special cases are:
+//
+//	Abs(±Inf) = +Inf
+//	Abs(NaN) = NaN
+func Abs(x float64) float64 {
+	return Float64frombits(Float64bits(x) &^ (1 << 63))
+}
diff --git a/src/math/acos_s390x.s b/src/math/acos_s390x.s
new file mode 100644
index 0000000..d2288b8
--- /dev/null
+++ b/src/math/acos_s390x.s
@@ -0,0 +1,144 @@
+// Copyright 2017 The Go Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+#include "textflag.h"
+
+// Minimax polynomial coefficients and other constants
+DATA ·acosrodataL13<> + 0(SB)/8, $0.314159265358979323E+01   //pi
+DATA ·acosrodataL13<> + 8(SB)/8, $-0.0
+DATA ·acosrodataL13<> + 16(SB)/8, $0x7ff8000000000000    //Nan
+DATA ·acosrodataL13<> + 24(SB)/8, $-1.0
+DATA ·acosrodataL13<> + 32(SB)/8, $1.0
+DATA ·acosrodataL13<> + 40(SB)/8, $0.166666666666651626E+00
+DATA ·acosrodataL13<> + 48(SB)/8, $0.750000000042621169E-01
+DATA ·acosrodataL13<> + 56(SB)/8, $0.446428567178116477E-01
+DATA ·acosrodataL13<> + 64(SB)/8, $0.303819660378071894E-01
+DATA ·acosrodataL13<> + 72(SB)/8, $0.223715011892010405E-01
+DATA ·acosrodataL13<> + 80(SB)/8, $0.173659424522364952E-01
+DATA ·acosrodataL13<> + 88(SB)/8, $0.137810186504372266E-01
+DATA ·acosrodataL13<> + 96(SB)/8, $0.134066870961173521E-01
+DATA ·acosrodataL13<> + 104(SB)/8, $-.412335502831898721E-02
+DATA ·acosrodataL13<> + 112(SB)/8, $0.867383739532082719E-01
+DATA ·acosrodataL13<> + 120(SB)/8, $-.328765950607171649E+00
+DATA ·acosrodataL13<> + 128(SB)/8, $0.110401073869414626E+01
+DATA ·acosrodataL13<> + 136(SB)/8, $-.270694366992537307E+01
+DATA ·acosrodataL13<> + 144(SB)/8, $0.500196500770928669E+01
+DATA ·acosrodataL13<> + 152(SB)/8, $-.665866959108585165E+01
+DATA ·acosrodataL13<> + 160(SB)/8, $-.344895269334086578E+01
+DATA ·acosrodataL13<> + 168(SB)/8, $0.927437952918301659E+00
+DATA ·acosrodataL13<> + 176(SB)/8, $0.610487478874645653E+01
+DATA ·acosrodataL13<> + 184(SB)/8, $0.157079632679489656e+01
+DATA ·acosrodataL13<> + 192(SB)/8, $0.0
+GLOBL ·acosrodataL13<> + 0(SB), RODATA, $200
+
+// Acos returns the arccosine, in radians, of the argument.
+//
+// Special case is:
+//      Acos(x) = NaN if x < -1 or x > 1
+// The algorithm used is minimax polynomial approximation
+// with coefficients determined with a Remez exchange algorithm.
+
+TEXT	·acosAsm(SB), NOSPLIT, $0-16
+	FMOVD	x+0(FP), F0
+	MOVD	$·acosrodataL13<>+0(SB), R9
+	LGDR	F0, R12
+	FMOVD	F0, F10
+	SRAD	$32, R12
+	WORD	$0xC0293FE6	//iilf	%r2,1072079005
+	BYTE	$0xA0
+	BYTE	$0x9D
+	WORD	$0xB917001C	//llgtr	%r1,%r12
+	CMPW	R1,R2
+	BGT	L2
+	FMOVD	192(R9), F8
+	FMADD	F0, F0, F8
+	FMOVD	184(R9), F1
+L3:
+	WFMDB	V8, V8, V2
+	FMOVD	176(R9), F6
+	FMOVD	168(R9), F0
+	FMOVD	160(R9), F4
+	WFMADB	V2, V0, V6, V0
+	FMOVD	152(R9), F6
+	WFMADB	V2, V4, V6, V4
+	FMOVD	144(R9), F6
+	WFMADB	V2, V0, V6, V0
+	FMOVD	136(R9), F6
+	WFMADB	V2, V4, V6, V4
+	FMOVD	128(R9), F6
+	WFMADB	V2, V0, V6, V0
+	FMOVD	120(R9), F6
+	WFMADB	V2, V4, V6, V4
+	FMOVD	112(R9), F6
+	WFMADB	V2, V0, V6, V0
+	FMOVD	104(R9), F6
+	WFMADB	V2, V4, V6, V4
+	FMOVD	96(R9), F6
+	WFMADB	V2, V0, V6, V0
+	FMOVD	88(R9), F6
+	WFMADB	V2, V4, V6, V4
+	FMOVD	80(R9), F6
+	WFMADB	V2, V0, V6, V0
+	FMOVD	72(R9), F6
+	WFMADB	V2, V4, V6, V4
+	FMOVD	64(R9), F6
+	WFMADB	V2, V0, V6, V0
+	FMOVD	56(R9), F6
+	WFMADB	V2, V4, V6, V4
+	FMOVD	48(R9), F6
+	WFMADB	V2, V0, V6, V0
+	FMOVD	40(R9), F6
+	WFMADB	V2, V4, V6, V2
+	FMOVD	192(R9), F4
+	WFMADB	V8, V0, V2, V0
+	WFMADB	V10, V8, V4, V8
+	FMADD	F0, F8, F10
+	WFSDB	V10, V1, V10
+L1:
+	FMOVD	F10, ret+8(FP)
+	RET
+
+L2:
+	WORD	$0xC0293FEF	//iilf	%r2,1072693247
+	BYTE	$0xFF
+	BYTE	$0xFF
+	CMPW	R1, R2
+	BLE	L12
+L4:
+	WORD	$0xED009020	//cdb	%f0,.L34-.L13(%r9)
+	BYTE	$0x00
+	BYTE	$0x19
+	BEQ	L8
+	WORD	$0xED009018	//cdb	%f0,.L35-.L13(%r9)
+	BYTE	$0x00
+	BYTE	$0x19
+	BEQ	L9
+	WFCEDBS	V10, V10, V0
+	BVS	L1
+	FMOVD	16(R9), F10
+	BR	L1
+L12:
+	FMOVD	24(R9), F0
+	FMADD	F10, F10, F0
+	WORD	$0xB3130080	//lcdbr	%f8,%f0
+	WORD	$0xED009008	//cdb	%f0,.L37-.L13(%r9)
+	BYTE	$0x00
+	BYTE	$0x19
+	FSQRT	F8, F10
+L5:
+	MOVW	R12, R4
+	CMPBLE	R4, $0, L7
+	WORD	$0xB31300AA	//lcdbr	%f10,%f10
+	FMOVD	$0, F1
+	BR	L3
+L9:
+	FMOVD	0(R9), F10
+	BR	L1
+L8:
+	FMOVD	$0, F0
+	FMOVD	F0, ret+8(FP)
+	RET
+L7:
+	FMOVD	0(R9), F1
+	BR	L3
diff --git a/src/math/acosh.go b/src/math/acosh.go
new file mode 100644
index 0000000..a85d003
--- /dev/null
+++ b/src/math/acosh.go
@@ -0,0 +1,65 @@
+// Copyright 2010 The Go Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+package math
+
+// The original C code, the long comment, and the constants
+// below are from FreeBSD's /usr/src/lib/msun/src/e_acosh.c
+// and came with this notice. The go code is a simplified
+// version of the original C.
+//
+// ====================================================
+// Copyright (C) 1993 by Sun Microsystems, Inc. All rights reserved.
+//
+// Developed at SunPro, a Sun Microsystems, Inc. business.
+// Permission to use, copy, modify, and distribute this
+// software is freely granted, provided that this notice
+// is preserved.
+// ====================================================
+//
+//
+// __ieee754_acosh(x)
+// Method :
+//	Based on
+//	        acosh(x) = log [ x + sqrt(x*x-1) ]
+//	we have
+//	        acosh(x) := log(x)+ln2,	if x is large; else
+//	        acosh(x) := log(2x-1/(sqrt(x*x-1)+x)) if x>2; else
+//	        acosh(x) := log1p(t+sqrt(2.0*t+t*t)); where t=x-1.
+//
+// Special cases:
+//	acosh(x) is NaN with signal if x<1.
+//	acosh(NaN) is NaN without signal.
+//
+
+// Acosh returns the inverse hyperbolic cosine of x.
+//
+// Special cases are:
+//
+//	Acosh(+Inf) = +Inf
+//	Acosh(x) = NaN if x < 1
+//	Acosh(NaN) = NaN
+func Acosh(x float64) float64 {
+	if haveArchAcosh {
+		return archAcosh(x)
+	}
+	return acosh(x)
+}
+
+func acosh(x float64) float64 {
+	const Large = 1 << 28 // 2**28
+	// first case is special case
+	switch {
+	case x < 1 || IsNaN(x):
+		return NaN()
+	case x == 1:
+		return 0
+	case x >= Large:
+		return Log(x) + Ln2 // x > 2**28
+	case x > 2:
+		return Log(2*x - 1/(x+Sqrt(x*x-1))) // 2**28 > x > 2
+	}
+	t := x - 1
+	return Log1p(t + Sqrt(2*t+t*t)) // 2 >= x > 1
+}
diff --git a/src/math/acosh_s390x.s b/src/math/acosh_s390x.s
new file mode 100644
index 0000000..9294c48
--- /dev/null
+++ b/src/math/acosh_s390x.s
@@ -0,0 +1,158 @@
+// Copyright 2017 The Go Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+#include "textflag.h"
+
+// Minimax polynomial coefficients and other constants
+DATA ·acoshrodataL11<> + 0(SB)/8, $-1.0
+DATA ·acoshrodataL11<> + 8(SB)/8, $.41375273347623353626
+DATA ·acoshrodataL11<> + 16(SB)/8, $.51487302528619766235E+04
+DATA ·acoshrodataL11<> + 24(SB)/8, $-1.67526912689208984375
+DATA ·acoshrodataL11<> + 32(SB)/8, $0.181818181818181826E+00
+DATA ·acoshrodataL11<> + 40(SB)/8, $-.165289256198351540E-01
+DATA ·acoshrodataL11<> + 48(SB)/8, $0.200350613573012186E-02
+DATA ·acoshrodataL11<> + 56(SB)/8, $-.273205381970859341E-03
+DATA ·acoshrodataL11<> + 64(SB)/8, $0.397389654305194527E-04
+DATA ·acoshrodataL11<> + 72(SB)/8, $0.938370938292558173E-06
+DATA ·acoshrodataL11<> + 80(SB)/8, $-.602107458843052029E-05
+DATA ·acoshrodataL11<> + 88(SB)/8, $0.212881813645679599E-07
+DATA ·acoshrodataL11<> + 96(SB)/8, $-.148682720127920854E-06
+DATA ·acoshrodataL11<> + 104(SB)/8, $-5.5
+DATA ·acoshrodataL11<> + 112(SB)/8, $0x7ff8000000000000      //Nan
+GLOBL ·acoshrodataL11<> + 0(SB), RODATA, $120
+
+// Table of log correction terms
+DATA ·acoshtab2068<> + 0(SB)/8, $0.585235384085551248E-01
+DATA ·acoshtab2068<> + 8(SB)/8, $0.412206153771168640E-01
+DATA ·acoshtab2068<> + 16(SB)/8, $0.273839003221648339E-01
+DATA ·acoshtab2068<> + 24(SB)/8, $0.166383778368856480E-01
+DATA ·acoshtab2068<> + 32(SB)/8, $0.866678223433169637E-02
+DATA ·acoshtab2068<> + 40(SB)/8, $0.319831684989627514E-02
+DATA ·acoshtab2068<> + 48(SB)/8, $0.0
+DATA ·acoshtab2068<> + 56(SB)/8, $-.113006378583725549E-02
+DATA ·acoshtab2068<> + 64(SB)/8, $-.367979419636602491E-03
+DATA ·acoshtab2068<> + 72(SB)/8, $0.213172484510484979E-02
+DATA ·acoshtab2068<> + 80(SB)/8, $0.623271047682013536E-02
+DATA ·acoshtab2068<> + 88(SB)/8, $0.118140812789696885E-01
+DATA ·acoshtab2068<> + 96(SB)/8, $0.187681358930914206E-01
+DATA ·acoshtab2068<> + 104(SB)/8, $0.269985148668178992E-01
+DATA ·acoshtab2068<> + 112(SB)/8, $0.364186619761331328E-01
+DATA ·acoshtab2068<> + 120(SB)/8, $0.469505379381388441E-01
+GLOBL ·acoshtab2068<> + 0(SB), RODATA, $128
+
+// Acosh returns the inverse hyperbolic cosine of the argument.
+//
+// Special cases are:
+//      Acosh(+Inf) = +Inf
+//      Acosh(x) = NaN if x < 1
+//      Acosh(NaN) = NaN
+// The algorithm used is minimax polynomial approximation
+// with coefficients determined with a Remez exchange algorithm.
+
+TEXT	·acoshAsm(SB), NOSPLIT, $0-16
+	FMOVD	x+0(FP), F0
+	MOVD	$·acoshrodataL11<>+0(SB), R9
+	LGDR	F0, R1
+	WORD	$0xC0295FEF	//iilf	%r2,1609564159
+	BYTE	$0xFF
+	BYTE	$0xFF
+	SRAD	$32, R1
+	CMPW	R1, R2
+	BGT	L2
+	WORD	$0xC0293FEF	//iilf	%r2,1072693247
+	BYTE	$0xFF
+	BYTE	$0xFF
+	CMPW	R1, R2
+	BGT	L10
+L3:
+	WFCEDBS	V0, V0, V2
+	BVS	L1
+	FMOVD	112(R9), F0
+L1:
+	FMOVD	F0, ret+8(FP)
+	RET
+L2:
+	WORD	$0xC0297FEF	//iilf	%r2,2146435071
+	BYTE	$0xFF
+	BYTE	$0xFF
+	MOVW	R1, R6
+	MOVW	R2, R7
+	CMPBGT	R6, R7, L1
+	FMOVD	F0, F8
+	FMOVD	$0, F0
+	WFADB	V0, V8, V0
+	WORD	$0xC0398006	//iilf	%r3,2147909631
+	BYTE	$0x7F
+	BYTE	$0xFF
+	LGDR	F0, R5
+	SRAD	$32, R5
+	MOVH	$0x0, R1
+	SUBW	R5, R3
+	FMOVD	$0, F10
+	RISBGZ	$32, $47, $0, R3, R4
+	RISBGZ	$57, $60, $51, R3, R3
+	BYTE	$0x18	//lr	%r2,%r4
+	BYTE	$0x24
+	RISBGN	$0, $31, $32, R4, R1
+	SUBW	$0x100000, R2
+	SRAW	$8, R2, R2
+	ORW	$0x45000000, R2
+L5:
+	LDGR	R1, F0
+	FMOVD	104(R9), F2
+	FMADD	F8, F0, F2
+	FMOVD	96(R9), F4
+	WFMADB	V10, V0, V2, V0
+	FMOVD	88(R9), F6
+	FMOVD	80(R9), F2
+	WFMADB	V0, V6, V4, V6
+	FMOVD	72(R9), F1
+	WFMDB	V0, V0, V4
+	WFMADB	V0, V1, V2, V1
+	FMOVD	64(R9), F2
+	WFMADB	V6, V4, V1, V6
+	FMOVD	56(R9), F1
+	RISBGZ	$57, $60, $0, R3, R3
+	WFMADB	V0, V2, V1, V2
+	FMOVD	48(R9), F1
+	WFMADB	V4, V6, V2, V6
+	FMOVD	40(R9), F2
+	WFMADB	V0, V1, V2, V1
+	VLVGF	$0, R2, V2
+	WFMADB	V4, V6, V1, V4
+	LDEBR	F2, F2
+	FMOVD	32(R9), F6
+	WFMADB	V0, V4, V6, V4
+	FMOVD	24(R9), F1
+	FMOVD	16(R9), F6
+	MOVD	$·acoshtab2068<>+0(SB), R1
+	WFMADB	V2, V1, V6, V2
+	FMOVD	0(R3)(R1*1), F3
+	WFMADB	V0, V4, V3, V0
+	FMOVD	8(R9), F4
+	FMADD	F4, F2, F0
+	FMOVD	F0, ret+8(FP)
+	RET
+L10:
+	FMOVD	F0, F8
+	FMOVD	0(R9), F0
+	FMADD	F8, F8, F0
+	LTDBR	F0, F0
+	FSQRT	F0, F10
+L4:
+	WFADB	V10, V8, V0
+	WORD	$0xC0398006	//iilf	%r3,2147909631
+	BYTE	$0x7F
+	BYTE	$0xFF
+	LGDR	F0, R5
+	SRAD	$32, R5
+	MOVH	$0x0, R1
+	SUBW	R5, R3
+	SRAW	$8, R3, R2
+	RISBGZ	$32, $47, $0, R3, R4
+	ANDW	$0xFFFFFF00, R2
+	RISBGZ	$57, $60, $51, R3, R3
+	ORW	$0x45000000, R2
+	RISBGN	$0, $31, $32, R4, R1
+	BR	L5
diff --git a/src/math/all_test.go b/src/math/all_test.go
new file mode 100644
index 0000000..af3c38c
--- /dev/null
+++ b/src/math/all_test.go
@@ -0,0 +1,3913 @@
+// Copyright 2009 The Go Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+package math_test
+
+import (
+	"fmt"
+	. "math"
+	"testing"
+	"unsafe"
+)
+
+var vf = []float64{
+	4.9790119248836735e+00,
+	7.7388724745781045e+00,
+	-2.7688005719200159e-01,
+	-5.0106036182710749e+00,
+	9.6362937071984173e+00,
+	2.9263772392439646e+00,
+	5.2290834314593066e+00,
+	2.7279399104360102e+00,
+	1.8253080916808550e+00,
+	-8.6859247685756013e+00,
+}
+
+// The expected results below were computed by the high precision calculators
+// at https://keisan.casio.com/.  More exact input values (array vf[], above)
+// were obtained by printing them with "%.26f".  The answers were calculated
+// to 26 digits (by using the "Digit number" drop-down control of each
+// calculator).
+var acos = []float64{
+	1.0496193546107222142571536e+00,
+	6.8584012813664425171660692e-01,
+	1.5984878714577160325521819e+00,
+	2.0956199361475859327461799e+00,
+	2.7053008467824138592616927e-01,
+	1.2738121680361776018155625e+00,
+	1.0205369421140629186287407e+00,
+	1.2945003481781246062157835e+00,
+	1.3872364345374451433846657e+00,
+	2.6231510803970463967294145e+00,
+}
+var acosh = []float64{
+	2.4743347004159012494457618e+00,
+	2.8576385344292769649802701e+00,
+	7.2796961502981066190593175e-01,
+	2.4796794418831451156471977e+00,
+	3.0552020742306061857212962e+00,
+	2.044238592688586588942468e+00,
+	2.5158701513104513595766636e+00,
+	1.99050839282411638174299e+00,
+	1.6988625798424034227205445e+00,
+	2.9611454842470387925531875e+00,
+}
+var asin = []float64{
+	5.2117697218417440497416805e-01,
+	8.8495619865825236751471477e-01,
+	-02.769154466281941332086016e-02,
+	-5.2482360935268931351485822e-01,
+	1.3002662421166552333051524e+00,
+	2.9698415875871901741575922e-01,
+	5.5025938468083370060258102e-01,
+	2.7629597861677201301553823e-01,
+	1.83559892257451475846656e-01,
+	-1.0523547536021497774980928e+00,
+}
+var asinh = []float64{
+	2.3083139124923523427628243e+00,
+	2.743551594301593620039021e+00,
+	-2.7345908534880091229413487e-01,
+	-2.3145157644718338650499085e+00,
+	2.9613652154015058521951083e+00,
+	1.7949041616585821933067568e+00,
+	2.3564032905983506405561554e+00,
+	1.7287118790768438878045346e+00,
+	1.3626658083714826013073193e+00,
+	-2.8581483626513914445234004e+00,
+}
+var atan = []float64{
+	1.372590262129621651920085e+00,
+	1.442290609645298083020664e+00,
+	-2.7011324359471758245192595e-01,
+	-1.3738077684543379452781531e+00,
+	1.4673921193587666049154681e+00,
+	1.2415173565870168649117764e+00,
+	1.3818396865615168979966498e+00,
+	1.2194305844639670701091426e+00,
+	1.0696031952318783760193244e+00,
+	-1.4561721938838084990898679e+00,
+}
+var atanh = []float64{
+	5.4651163712251938116878204e-01,
+	1.0299474112843111224914709e+00,
+	-2.7695084420740135145234906e-02,
+	-5.5072096119207195480202529e-01,
+	1.9943940993171843235906642e+00,
+	3.01448604578089708203017e-01,
+	5.8033427206942188834370595e-01,
+	2.7987997499441511013958297e-01,
+	1.8459947964298794318714228e-01,
+	-1.3273186910532645867272502e+00,
+}
+var atan2 = []float64{
+	1.1088291730037004444527075e+00,
+	9.1218183188715804018797795e-01,
+	1.5984772603216203736068915e+00,
+	2.0352918654092086637227327e+00,
+	8.0391819139044720267356014e-01,
+	1.2861075249894661588866752e+00,
+	1.0889904479131695712182587e+00,
+	1.3044821793397925293797357e+00,
+	1.3902530903455392306872261e+00,
+	2.2859857424479142655411058e+00,
+}
+var cbrt = []float64{
+	1.7075799841925094446722675e+00,
+	1.9779982212970353936691498e+00,
+	-6.5177429017779910853339447e-01,
+	-1.7111838886544019873338113e+00,
+	2.1279920909827937423960472e+00,
+	1.4303536770460741452312367e+00,
+	1.7357021059106154902341052e+00,
+	1.3972633462554328350552916e+00,
+	1.2221149580905388454977636e+00,
+	-2.0556003730500069110343596e+00,
+}
+var ceil = []float64{
+	5.0000000000000000e+00,
+	8.0000000000000000e+00,
+	Copysign(0, -1),
+	-5.0000000000000000e+00,
+	1.0000000000000000e+01,
+	3.0000000000000000e+00,
+	6.0000000000000000e+00,
+	3.0000000000000000e+00,
+	2.0000000000000000e+00,
+	-8.0000000000000000e+00,
+}
+var copysign = []float64{
+	-4.9790119248836735e+00,
+	-7.7388724745781045e+00,
+	-2.7688005719200159e-01,
+	-5.0106036182710749e+00,
+	-9.6362937071984173e+00,
+	-2.9263772392439646e+00,
+	-5.2290834314593066e+00,
+	-2.7279399104360102e+00,
+	-1.8253080916808550e+00,
+	-8.6859247685756013e+00,
+}
+var cos = []float64{
+	2.634752140995199110787593e-01,
+	1.148551260848219865642039e-01,
+	9.6191297325640768154550453e-01,
+	2.938141150061714816890637e-01,
+	-9.777138189897924126294461e-01,
+	-9.7693041344303219127199518e-01,
+	4.940088096948647263961162e-01,
+	-9.1565869021018925545016502e-01,
+	-2.517729313893103197176091e-01,
+	-7.39241351595676573201918e-01,
+}
+
+// Results for 100000 * Pi + vf[i]
+var cosLarge = []float64{
+	2.634752141185559426744e-01,
+	1.14855126055543100712e-01,
+	9.61912973266488928113e-01,
+	2.9381411499556122552e-01,
+	-9.777138189880161924641e-01,
+	-9.76930413445147608049e-01,
+	4.940088097314976789841e-01,
+	-9.15658690217517835002e-01,
+	-2.51772931436786954751e-01,
+	-7.3924135157173099849e-01,
+}
+
+var cosh = []float64{
+	7.2668796942212842775517446e+01,
+	1.1479413465659254502011135e+03,
+	1.0385767908766418550935495e+00,
+	7.5000957789658051428857788e+01,
+	7.655246669605357888468613e+03,
+	9.3567491758321272072888257e+00,
+	9.331351599270605471131735e+01,
+	7.6833430994624643209296404e+00,
+	3.1829371625150718153881164e+00,
+	2.9595059261916188501640911e+03,
+}
+var erf = []float64{
+	5.1865354817738701906913566e-01,
+	7.2623875834137295116929844e-01,
+	-3.123458688281309990629839e-02,
+	-5.2143121110253302920437013e-01,
+	8.2704742671312902508629582e-01,
+	3.2101767558376376743993945e-01,
+	5.403990312223245516066252e-01,
+	3.0034702916738588551174831e-01,
+	2.0369924417882241241559589e-01,
+	-7.8069386968009226729944677e-01,
+}
+var erfc = []float64{
+	4.8134645182261298093086434e-01,
+	2.7376124165862704883070156e-01,
+	1.0312345868828130999062984e+00,
+	1.5214312111025330292043701e+00,
+	1.7295257328687097491370418e-01,
+	6.7898232441623623256006055e-01,
+	4.596009687776754483933748e-01,
+	6.9965297083261411448825169e-01,
+	7.9630075582117758758440411e-01,
+	1.7806938696800922672994468e+00,
+}
+var erfinv = []float64{
+	4.746037673358033586786350696e-01,
+	8.559054432692110956388764172e-01,
+	-2.45427830571707336251331946e-02,
+	-4.78116683518973366268905506e-01,
+	1.479804430319470983648120853e+00,
+	2.654485787128896161882650211e-01,
+	5.027444534221520197823192493e-01,
+	2.466703532707627818954585670e-01,
+	1.632011465103005426240343116e-01,
+	-1.06672334642196900710000389e+00,
+}
+var exp = []float64{
+	1.4533071302642137507696589e+02,
+	2.2958822575694449002537581e+03,
+	7.5814542574851666582042306e-01,
+	6.6668778421791005061482264e-03,
+	1.5310493273896033740861206e+04,
+	1.8659907517999328638667732e+01,
+	1.8662167355098714543942057e+02,
+	1.5301332413189378961665788e+01,
+	6.2047063430646876349125085e+00,
+	1.6894712385826521111610438e-04,
+}
+var expm1 = []float64{
+	5.105047796122957327384770212e-02,
+	8.046199708567344080562675439e-02,
+	-2.764970978891639815187418703e-03,
+	-4.8871434888875355394330300273e-02,
+	1.0115864277221467777117227494e-01,
+	2.969616407795910726014621657e-02,
+	5.368214487944892300914037972e-02,
+	2.765488851131274068067445335e-02,
+	1.842068661871398836913874273e-02,
+	-8.3193870863553801814961137573e-02,
+}
+var expm1Large = []float64{
+	4.2031418113550844e+21,
+	4.0690789717473863e+33,
+	-0.9372627915981363e+00,
+	-1.0,
+	7.077694784145933e+41,
+	5.117936223839153e+12,
+	5.124137759001189e+22,
+	7.03546003972584e+11,
+	8.456921800389698e+07,
+	-1.0,
+}
+var exp2 = []float64{
+	3.1537839463286288034313104e+01,
+	2.1361549283756232296144849e+02,
+	8.2537402562185562902577219e-01,
+	3.1021158628740294833424229e-02,
+	7.9581744110252191462569661e+02,
+	7.6019905892596359262696423e+00,
+	3.7506882048388096973183084e+01,
+	6.6250893439173561733216375e+00,
+	3.5438267900243941544605339e+00,
+	2.4281533133513300984289196e-03,
+}
+var fabs = []float64{
+	4.9790119248836735e+00,
+	7.7388724745781045e+00,
+	2.7688005719200159e-01,
+	5.0106036182710749e+00,
+	9.6362937071984173e+00,
+	2.9263772392439646e+00,
+	5.2290834314593066e+00,
+	2.7279399104360102e+00,
+	1.8253080916808550e+00,
+	8.6859247685756013e+00,
+}
+var fdim = []float64{
+	4.9790119248836735e+00,
+	7.7388724745781045e+00,
+	0.0000000000000000e+00,
+	0.0000000000000000e+00,
+	9.6362937071984173e+00,
+	2.9263772392439646e+00,
+	5.2290834314593066e+00,
+	2.7279399104360102e+00,
+	1.8253080916808550e+00,
+	0.0000000000000000e+00,
+}
+var floor = []float64{
+	4.0000000000000000e+00,
+	7.0000000000000000e+00,
+	-1.0000000000000000e+00,
+	-6.0000000000000000e+00,
+	9.0000000000000000e+00,
+	2.0000000000000000e+00,
+	5.0000000000000000e+00,
+	2.0000000000000000e+00,
+	1.0000000000000000e+00,
+	-9.0000000000000000e+00,
+}
+var fmod = []float64{
+	4.197615023265299782906368e-02,
+	2.261127525421895434476482e+00,
+	3.231794108794261433104108e-02,
+	4.989396381728925078391512e+00,
+	3.637062928015826201999516e-01,
+	1.220868282268106064236690e+00,
+	4.770916568540693347699744e+00,
+	1.816180268691969246219742e+00,
+	8.734595415957246977711748e-01,
+	1.314075231424398637614104e+00,
+}
+
+type fi struct {
+	f float64
+	i int
+}
+
+var frexp = []fi{
+	{6.2237649061045918750e-01, 3},
+	{9.6735905932226306250e-01, 3},
+	{-5.5376011438400318000e-01, -1},
+	{-6.2632545228388436250e-01, 3},
+	{6.02268356699901081250e-01, 4},
+	{7.3159430981099115000e-01, 2},
+	{6.5363542893241332500e-01, 3},
+	{6.8198497760900255000e-01, 2},
+	{9.1265404584042750000e-01, 1},
+	{-5.4287029803597508250e-01, 4},
+}
+var gamma = []float64{
+	2.3254348370739963835386613898e+01,
+	2.991153837155317076427529816e+03,
+	-4.561154336726758060575129109e+00,
+	7.719403468842639065959210984e-01,
+	1.6111876618855418534325755566e+05,
+	1.8706575145216421164173224946e+00,
+	3.4082787447257502836734201635e+01,
+	1.579733951448952054898583387e+00,
+	9.3834586598354592860187267089e-01,
+	-2.093995902923148389186189429e-05,
+}
+var j0 = []float64{
+	-1.8444682230601672018219338e-01,
+	2.27353668906331975435892e-01,
+	9.809259936157051116270273e-01,
+	-1.741170131426226587841181e-01,
+	-2.1389448451144143352039069e-01,
+	-2.340905848928038763337414e-01,
+	-1.0029099691890912094586326e-01,
+	-1.5466726714884328135358907e-01,
+	3.252650187653420388714693e-01,
+	-8.72218484409407250005360235e-03,
+}
+var j1 = []float64{
+	-3.251526395295203422162967e-01,
+	1.893581711430515718062564e-01,
+	-1.3711761352467242914491514e-01,
+	3.287486536269617297529617e-01,
+	1.3133899188830978473849215e-01,
+	3.660243417832986825301766e-01,
+	-3.4436769271848174665420672e-01,
+	4.329481396640773768835036e-01,
+	5.8181350531954794639333955e-01,
+	-2.7030574577733036112996607e-01,
+}
+var j2 = []float64{
+	5.3837518920137802565192769e-02,
+	-1.7841678003393207281244667e-01,
+	9.521746934916464142495821e-03,
+	4.28958355470987397983072e-02,
+	2.4115371837854494725492872e-01,
+	4.842458532394520316844449e-01,
+	-3.142145220618633390125946e-02,
+	4.720849184745124761189957e-01,
+	3.122312022520957042957497e-01,
+	7.096213118930231185707277e-02,
+}
+var jM3 = []float64{
+	-3.684042080996403091021151e-01,
+	2.8157665936340887268092661e-01,
+	4.401005480841948348343589e-04,
+	3.629926999056814081597135e-01,
+	3.123672198825455192489266e-02,
+	-2.958805510589623607540455e-01,
+	-3.2033177696533233403289416e-01,
+	-2.592737332129663376736604e-01,
+	-1.0241334641061485092351251e-01,
+	-2.3762660886100206491674503e-01,
+}
+var lgamma = []fi{
+	{3.146492141244545774319734e+00, 1},
+	{8.003414490659126375852113e+00, 1},
+	{1.517575735509779707488106e+00, -1},
+	{-2.588480028182145853558748e-01, 1},
+	{1.1989897050205555002007985e+01, 1},
+	{6.262899811091257519386906e-01, 1},
+	{3.5287924899091566764846037e+00, 1},
+	{4.5725644770161182299423372e-01, 1},
+	{-6.363667087767961257654854e-02, 1},
+	{-1.077385130910300066425564e+01, -1},
+}
+var log = []float64{
+	1.605231462693062999102599e+00,
+	2.0462560018708770653153909e+00,
+	-1.2841708730962657801275038e+00,
+	1.6115563905281545116286206e+00,
+	2.2655365644872016636317461e+00,
+	1.0737652208918379856272735e+00,
+	1.6542360106073546632707956e+00,
+	1.0035467127723465801264487e+00,
+	6.0174879014578057187016475e-01,
+	2.161703872847352815363655e+00,
+}
+var logb = []float64{
+	2.0000000000000000e+00,
+	2.0000000000000000e+00,
+	-2.0000000000000000e+00,
+	2.0000000000000000e+00,
+	3.0000000000000000e+00,
+	1.0000000000000000e+00,
+	2.0000000000000000e+00,
+	1.0000000000000000e+00,
+	0.0000000000000000e+00,
+	3.0000000000000000e+00,
+}
+var log10 = []float64{
+	6.9714316642508290997617083e-01,
+	8.886776901739320576279124e-01,
+	-5.5770832400658929815908236e-01,
+	6.998900476822994346229723e-01,
+	9.8391002850684232013281033e-01,
+	4.6633031029295153334285302e-01,
+	7.1842557117242328821552533e-01,
+	4.3583479968917773161304553e-01,
+	2.6133617905227038228626834e-01,
+	9.3881606348649405716214241e-01,
+}
+var log1p = []float64{
+	4.8590257759797794104158205e-02,
+	7.4540265965225865330849141e-02,
+	-2.7726407903942672823234024e-03,
+	-5.1404917651627649094953380e-02,
+	9.1998280672258624681335010e-02,
+	2.8843762576593352865894824e-02,
+	5.0969534581863707268992645e-02,
+	2.6913947602193238458458594e-02,
+	1.8088493239630770262045333e-02,
+	-9.0865245631588989681559268e-02,
+}
+var log2 = []float64{
+	2.3158594707062190618898251e+00,
+	2.9521233862883917703341018e+00,
+	-1.8526669502700329984917062e+00,
+	2.3249844127278861543568029e+00,
+	3.268478366538305087466309e+00,
+	1.5491157592596970278166492e+00,
+	2.3865580889631732407886495e+00,
+	1.447811865817085365540347e+00,
+	8.6813999540425116282815557e-01,
+	3.118679457227342224364709e+00,
+}
+var modf = [][2]float64{
+	{4.0000000000000000e+00, 9.7901192488367350108546816e-01},
+	{7.0000000000000000e+00, 7.3887247457810456552351752e-01},
+	{Copysign(0, -1), -2.7688005719200159404635997e-01},
+	{-5.0000000000000000e+00, -1.060361827107492160848778e-02},
+	{9.0000000000000000e+00, 6.3629370719841737980004837e-01},
+	{2.0000000000000000e+00, 9.2637723924396464525443662e-01},
+	{5.0000000000000000e+00, 2.2908343145930665230025625e-01},
+	{2.0000000000000000e+00, 7.2793991043601025126008608e-01},
+	{1.0000000000000000e+00, 8.2530809168085506044576505e-01},
+	{-8.0000000000000000e+00, -6.8592476857560136238589621e-01},
+}
+var nextafter32 = []float32{
+	4.979012489318848e+00,
+	7.738873004913330e+00,
+	-2.768800258636475e-01,
+	-5.010602951049805e+00,
+	9.636294364929199e+00,
+	2.926377534866333e+00,
+	5.229084014892578e+00,
+	2.727940082550049e+00,
+	1.825308203697205e+00,
+	-8.685923576354980e+00,
+}
+var nextafter64 = []float64{
+	4.97901192488367438926388786e+00,
+	7.73887247457810545370193722e+00,
+	-2.7688005719200153853520874e-01,
+	-5.01060361827107403343006808e+00,
+	9.63629370719841915615688777e+00,
+	2.92637723924396508934364647e+00,
+	5.22908343145930754047867595e+00,
+	2.72793991043601069534929593e+00,
+	1.82530809168085528249036997e+00,
+	-8.68592476857559958602905681e+00,
+}
+var pow = []float64{
+	9.5282232631648411840742957e+04,
+	5.4811599352999901232411871e+07,
+	5.2859121715894396531132279e-01,
+	9.7587991957286474464259698e-06,
+	4.328064329346044846740467e+09,
+	8.4406761805034547437659092e+02,
+	1.6946633276191194947742146e+05,
+	5.3449040147551939075312879e+02,
+	6.688182138451414936380374e+01,
+	2.0609869004248742886827439e-09,
+}
+var remainder = []float64{
+	4.197615023265299782906368e-02,
+	2.261127525421895434476482e+00,
+	3.231794108794261433104108e-02,
+	-2.120723654214984321697556e-02,
+	3.637062928015826201999516e-01,
+	1.220868282268106064236690e+00,
+	-4.581668629186133046005125e-01,
+	-9.117596417440410050403443e-01,
+	8.734595415957246977711748e-01,
+	1.314075231424398637614104e+00,
+}
+var round = []float64{
+	5,
+	8,
+	Copysign(0, -1),
+	-5,
+	10,
+	3,
+	5,
+	3,
+	2,
+	-9,
+}
+var signbit = []bool{
+	false,
+	false,
+	true,
+	true,
+	false,
+	false,
+	false,
+	false,
+	false,
+	true,
+}
+var sin = []float64{
+	-9.6466616586009283766724726e-01,
+	9.9338225271646545763467022e-01,
+	-2.7335587039794393342449301e-01,
+	9.5586257685042792878173752e-01,
+	-2.099421066779969164496634e-01,
+	2.135578780799860532750616e-01,
+	-8.694568971167362743327708e-01,
+	4.019566681155577786649878e-01,
+	9.6778633541687993721617774e-01,
+	-6.734405869050344734943028e-01,
+}
+
+// Results for 100000 * Pi + vf[i]
+var sinLarge = []float64{
+	-9.646661658548936063912e-01,
+	9.933822527198506903752e-01,
+	-2.7335587036246899796e-01,
+	9.55862576853689321268e-01,
+	-2.099421066862688873691e-01,
+	2.13557878070308981163e-01,
+	-8.694568970959221300497e-01,
+	4.01956668098863248917e-01,
+	9.67786335404528727927e-01,
+	-6.7344058693131973066e-01,
+}
+var sinh = []float64{
+	7.2661916084208532301448439e+01,
+	1.1479409110035194500526446e+03,
+	-2.8043136512812518927312641e-01,
+	-7.499429091181587232835164e+01,
+	7.6552466042906758523925934e+03,
+	9.3031583421672014313789064e+00,
+	9.330815755828109072810322e+01,
+	7.6179893137269146407361477e+00,
+	3.021769180549615819524392e+00,
+	-2.95950575724449499189888e+03,
+}
+var sqrt = []float64{
+	2.2313699659365484748756904e+00,
+	2.7818829009464263511285458e+00,
+	5.2619393496314796848143251e-01,
+	2.2384377628763938724244104e+00,
+	3.1042380236055381099288487e+00,
+	1.7106657298385224403917771e+00,
+	2.286718922705479046148059e+00,
+	1.6516476350711159636222979e+00,
+	1.3510396336454586262419247e+00,
+	2.9471892997524949215723329e+00,
+}
+var tan = []float64{
+	-3.661316565040227801781974e+00,
+	8.64900232648597589369854e+00,
+	-2.8417941955033612725238097e-01,
+	3.253290185974728640827156e+00,
+	2.147275640380293804770778e-01,
+	-2.18600910711067004921551e-01,
+	-1.760002817872367935518928e+00,
+	-4.389808914752818126249079e-01,
+	-3.843885560201130679995041e+00,
+	9.10988793377685105753416e-01,
+}
+
+// Results for 100000 * Pi + vf[i]
+var tanLarge = []float64{
+	-3.66131656475596512705e+00,
+	8.6490023287202547927e+00,
+	-2.841794195104782406e-01,
+	3.2532901861033120983e+00,
+	2.14727564046880001365e-01,
+	-2.18600910700688062874e-01,
+	-1.760002817699722747043e+00,
+	-4.38980891453536115952e-01,
+	-3.84388555942723509071e+00,
+	9.1098879344275101051e-01,
+}
+var tanh = []float64{
+	9.9990531206936338549262119e-01,
+	9.9999962057085294197613294e-01,
+	-2.7001505097318677233756845e-01,
+	-9.9991110943061718603541401e-01,
+	9.9999999146798465745022007e-01,
+	9.9427249436125236705001048e-01,
+	9.9994257600983138572705076e-01,
+	9.9149409509772875982054701e-01,
+	9.4936501296239685514466577e-01,
+	-9.9999994291374030946055701e-01,
+}
+var trunc = []float64{
+	4.0000000000000000e+00,
+	7.0000000000000000e+00,
+	Copysign(0, -1),
+	-5.0000000000000000e+00,
+	9.0000000000000000e+00,
+	2.0000000000000000e+00,
+	5.0000000000000000e+00,
+	2.0000000000000000e+00,
+	1.0000000000000000e+00,
+	-8.0000000000000000e+00,
+}
+var y0 = []float64{
+	-3.053399153780788357534855e-01,
+	1.7437227649515231515503649e-01,
+	-8.6221781263678836910392572e-01,
+	-3.100664880987498407872839e-01,
+	1.422200649300982280645377e-01,
+	4.000004067997901144239363e-01,
+	-3.3340749753099352392332536e-01,
+	4.5399790746668954555205502e-01,
+	4.8290004112497761007536522e-01,
+	2.7036697826604756229601611e-01,
+}
+var y1 = []float64{
+	0.15494213737457922210218611,
+	-0.2165955142081145245075746,
+	-2.4644949631241895201032829,
+	0.1442740489541836405154505,
+	0.2215379960518984777080163,
+	0.3038800915160754150565448,
+	0.0691107642452362383808547,
+	0.2380116417809914424860165,
+	-0.20849492979459761009678934,
+	0.0242503179793232308250804,
+}
+var y2 = []float64{
+	0.3675780219390303613394936,
+	-0.23034826393250119879267257,
+	-16.939677983817727205631397,
+	0.367653980523052152867791,
+	-0.0962401471767804440353136,
+	-0.1923169356184851105200523,
+	0.35984072054267882391843766,
+	-0.2794987252299739821654982,
+	-0.7113490692587462579757954,
+	-0.2647831587821263302087457,
+}
+var yM3 = []float64{
+	-0.14035984421094849100895341,
+	-0.097535139617792072703973,
+	242.25775994555580176377379,
+	-0.1492267014802818619511046,
+	0.26148702629155918694500469,
+	0.56675383593895176530394248,
+	-0.206150264009006981070575,
+	0.64784284687568332737963658,
+	1.3503631555901938037008443,
+	0.1461869756579956803341844,
+}
+
+// arguments and expected results for special cases
+var vfacosSC = []float64{
+	-Pi,
+	1,
+	Pi,
+	NaN(),
+}
+var acosSC = []float64{
+	NaN(),
+	0,
+	NaN(),
+	NaN(),
+}
+
+var vfacoshSC = []float64{
+	Inf(-1),
+	0.5,
+	1,
+	Inf(1),
+	NaN(),
+}
+var acoshSC = []float64{
+	NaN(),
+	NaN(),
+	0,
+	Inf(1),
+	NaN(),
+}
+
+var vfasinSC = []float64{
+	-Pi,
+	Copysign(0, -1),
+	0,
+	Pi,
+	NaN(),
+}
+var asinSC = []float64{
+	NaN(),
+	Copysign(0, -1),
+	0,
+	NaN(),
+	NaN(),
+}
+
+var vfasinhSC = []float64{
+	Inf(-1),
+	Copysign(0, -1),
+	0,
+	Inf(1),
+	NaN(),
+}
+var asinhSC = []float64{
+	Inf(-1),
+	Copysign(0, -1),
+	0,
+	Inf(1),
+	NaN(),
+}
+
+var vfatanSC = []float64{
+	Inf(-1),
+	Copysign(0, -1),
+	0,
+	Inf(1),
+	NaN(),
+}
+var atanSC = []float64{
+	-Pi / 2,
+	Copysign(0, -1),
+	0,
+	Pi / 2,
+	NaN(),
+}
+
+var vfatanhSC = []float64{
+	Inf(-1),
+	-Pi,
+	-1,
+	Copysign(0, -1),
+	0,
+	1,
+	Pi,
+	Inf(1),
+	NaN(),
+}
+var atanhSC = []float64{
+	NaN(),
+	NaN(),
+	Inf(-1),
+	Copysign(0, -1),
+	0,
+	Inf(1),
+	NaN(),
+	NaN(),
+	NaN(),
+}
+var vfatan2SC = [][2]float64{
+	{Inf(-1), Inf(-1)},
+	{Inf(-1), -Pi},
+	{Inf(-1), 0},
+	{Inf(-1), +Pi},
+	{Inf(-1), Inf(1)},
+	{Inf(-1), NaN()},
+	{-Pi, Inf(-1)},
+	{-Pi, 0},
+	{-Pi, Inf(1)},
+	{-Pi, NaN()},
+	{Copysign(0, -1), Inf(-1)},
+	{Copysign(0, -1), -Pi},
+	{Copysign(0, -1), Copysign(0, -1)},
+	{Copysign(0, -1), 0},
+	{Copysign(0, -1), +Pi},
+	{Copysign(0, -1), Inf(1)},
+	{Copysign(0, -1), NaN()},
+	{0, Inf(-1)},
+	{0, -Pi},
+	{0, Copysign(0, -1)},
+	{0, 0},
+	{0, +Pi},
+	{0, Inf(1)},
+	{0, NaN()},
+	{+Pi, Inf(-1)},
+	{+Pi, 0},
+	{+Pi, Inf(1)},
+	{1.0, Inf(1)},
+	{-1.0, Inf(1)},
+	{+Pi, NaN()},
+	{Inf(1), Inf(-1)},
+	{Inf(1), -Pi},
+	{Inf(1), 0},
+	{Inf(1), +Pi},
+	{Inf(1), Inf(1)},
+	{Inf(1), NaN()},
+	{NaN(), NaN()},
+}
+var atan2SC = []float64{
+	-3 * Pi / 4,     // atan2(-Inf, -Inf)
+	-Pi / 2,         // atan2(-Inf, -Pi)
+	-Pi / 2,         // atan2(-Inf, +0)
+	-Pi / 2,         // atan2(-Inf, +Pi)
+	-Pi / 4,         // atan2(-Inf, +Inf)
+	NaN(),           // atan2(-Inf, NaN)
+	-Pi,             // atan2(-Pi, -Inf)
+	-Pi / 2,         // atan2(-Pi, +0)
+	Copysign(0, -1), // atan2(-Pi, Inf)
+	NaN(),           // atan2(-Pi, NaN)
+	-Pi,             // atan2(-0, -Inf)
+	-Pi,             // atan2(-0, -Pi)
+	-Pi,             // atan2(-0, -0)
+	Copysign(0, -1), // atan2(-0, +0)
+	Copysign(0, -1), // atan2(-0, +Pi)
+	Copysign(0, -1), // atan2(-0, +Inf)
+	NaN(),           // atan2(-0, NaN)
+	Pi,              // atan2(+0, -Inf)
+	Pi,              // atan2(+0, -Pi)
+	Pi,              // atan2(+0, -0)
+	0,               // atan2(+0, +0)
+	0,               // atan2(+0, +Pi)
+	0,               // atan2(+0, +Inf)
+	NaN(),           // atan2(+0, NaN)
+	Pi,              // atan2(+Pi, -Inf)
+	Pi / 2,          // atan2(+Pi, +0)
+	0,               // atan2(+Pi, +Inf)
+	0,               // atan2(+1, +Inf)
+	Copysign(0, -1), // atan2(-1, +Inf)
+	NaN(),           // atan2(+Pi, NaN)
+	3 * Pi / 4,      // atan2(+Inf, -Inf)
+	Pi / 2,          // atan2(+Inf, -Pi)
+	Pi / 2,          // atan2(+Inf, +0)
+	Pi / 2,          // atan2(+Inf, +Pi)
+	Pi / 4,          // atan2(+Inf, +Inf)
+	NaN(),           // atan2(+Inf, NaN)
+	NaN(),           // atan2(NaN, NaN)
+}
+
+var vfcbrtSC = []float64{
+	Inf(-1),
+	Copysign(0, -1),
+	0,
+	Inf(1),
+	NaN(),
+}
+var cbrtSC = []float64{
+	Inf(-1),
+	Copysign(0, -1),
+	0,
+	Inf(1),
+	NaN(),
+}
+
+var vfceilSC = []float64{
+	Inf(-1),
+	Copysign(0, -1),
+	0,
+	Inf(1),
+	NaN(),
+}
+var ceilSC = []float64{
+	Inf(-1),
+	Copysign(0, -1),
+	0,
+	Inf(1),
+	NaN(),
+}
+
+var vfcopysignSC = []float64{
+	Inf(-1),
+	Inf(1),
+	NaN(),
+}
+var copysignSC = []float64{
+	Inf(-1),
+	Inf(-1),
+	NaN(),
+}
+
+var vfcosSC = []float64{
+	Inf(-1),
+	Inf(1),
+	NaN(),
+}
+var cosSC = []float64{
+	NaN(),
+	NaN(),
+	NaN(),
+}
+
+var vfcoshSC = []float64{
+	Inf(-1),
+	Copysign(0, -1),
+	0,
+	Inf(1),
+	NaN(),
+}
+var coshSC = []float64{
+	Inf(1),
+	1,
+	1,
+	Inf(1),
+	NaN(),
+}
+
+var vferfSC = []float64{
+	Inf(-1),
+	Copysign(0, -1),
+	0,
+	Inf(1),
+	NaN(),
+	-1000,
+	1000,
+}
+var erfSC = []float64{
+	-1,
+	Copysign(0, -1),
+	0,
+	1,
+	NaN(),
+	-1,
+	1,
+}
+
+var vferfcSC = []float64{
+	Inf(-1),
+	Inf(1),
+	NaN(),
+	-1000,
+	1000,
+}
+var erfcSC = []float64{
+	2,
+	0,
+	NaN(),
+	2,
+	0,
+}
+
+var vferfinvSC = []float64{
+	1,
+	-1,
+	0,
+	Inf(-1),
+	Inf(1),
+	NaN(),
+}
+var erfinvSC = []float64{
+	Inf(+1),
+	Inf(-1),
+	0,
+	NaN(),
+	NaN(),
+	NaN(),
+}
+
+var vferfcinvSC = []float64{
+	0,
+	2,
+	1,
+	Inf(1),
+	Inf(-1),
+	NaN(),
+}
+var erfcinvSC = []float64{
+	Inf(+1),
+	Inf(-1),
+	0,
+	NaN(),
+	NaN(),
+	NaN(),
+}
+
+var vfexpSC = []float64{
+	Inf(-1),
+	-2000,
+	2000,
+	Inf(1),
+	NaN(),
+	// smallest float64 that overflows Exp(x)
+	7.097827128933841e+02,
+	// Issue 18912
+	1.48852223e+09,
+	1.4885222e+09,
+	1,
+	// near zero
+	3.725290298461915e-09,
+	// denormal
+	-740,
+}
+var expSC = []float64{
+	0,
+	0,
+	Inf(1),
+	Inf(1),
+	NaN(),
+	Inf(1),
+	Inf(1),
+	Inf(1),
+	2.718281828459045,
+	1.0000000037252903,
+	4.2e-322,
+}
+
+var vfexp2SC = []float64{
+	Inf(-1),
+	-2000,
+	2000,
+	Inf(1),
+	NaN(),
+	// smallest float64 that overflows Exp2(x)
+	1024,
+	// near underflow
+	-1.07399999999999e+03,
+	// near zero
+	3.725290298461915e-09,
+}
+var exp2SC = []float64{
+	0,
+	0,
+	Inf(1),
+	Inf(1),
+	NaN(),
+	Inf(1),
+	5e-324,
+	1.0000000025821745,
+}
+
+var vfexpm1SC = []float64{
+	Inf(-1),
+	-710,
+	Copysign(0, -1),
+	0,
+	710,
+	Inf(1),
+	NaN(),
+}
+var expm1SC = []float64{
+	-1,
+	-1,
+	Copysign(0, -1),
+	0,
+	Inf(1),
+	Inf(1),
+	NaN(),
+}
+
+var vffabsSC = []float64{
+	Inf(-1),
+	Copysign(0, -1),
+	0,
+	Inf(1),
+	NaN(),
+}
+var fabsSC = []float64{
+	Inf(1),
+	0,
+	0,
+	Inf(1),
+	NaN(),
+}
+
+var vffdimSC = [][2]float64{
+	{Inf(-1), Inf(-1)},
+	{Inf(-1), Inf(1)},
+	{Inf(-1), NaN()},
+	{Copysign(0, -1), Copysign(0, -1)},
+	{Copysign(0, -1), 0},
+	{0, Copysign(0, -1)},
+	{0, 0},
+	{Inf(1), Inf(-1)},
+	{Inf(1), Inf(1)},
+	{Inf(1), NaN()},
+	{NaN(), Inf(-1)},
+	{NaN(), Copysign(0, -1)},
+	{NaN(), 0},
+	{NaN(), Inf(1)},
+	{NaN(), NaN()},
+}
+var nan = Float64frombits(0xFFF8000000000000) // SSE2 DIVSD 0/0
+var vffdim2SC = [][2]float64{
+	{Inf(-1), Inf(-1)},
+	{Inf(-1), Inf(1)},
+	{Inf(-1), nan},
+	{Copysign(0, -1), Copysign(0, -1)},
+	{Copysign(0, -1), 0},
+	{0, Copysign(0, -1)},
+	{0, 0},
+	{Inf(1), Inf(-1)},
+	{Inf(1), Inf(1)},
+	{Inf(1), nan},
+	{nan, Inf(-1)},
+	{nan, Copysign(0, -1)},
+	{nan, 0},
+	{nan, Inf(1)},
+	{nan, nan},
+}
+var fdimSC = []float64{
+	NaN(),
+	0,
+	NaN(),
+	0,
+	0,
+	0,
+	0,
+	Inf(1),
+	NaN(),
+	NaN(),
+	NaN(),
+	NaN(),
+	NaN(),
+	NaN(),
+	NaN(),
+}
+var fmaxSC = []float64{
+	Inf(-1),
+	Inf(1),
+	NaN(),
+	Copysign(0, -1),
+	0,
+	0,
+	0,
+	Inf(1),
+	Inf(1),
+	Inf(1),
+	NaN(),
+	NaN(),
+	NaN(),
+	Inf(1),
+	NaN(),
+}
+var fminSC = []float64{
+	Inf(-1),
+	Inf(-1),
+	Inf(-1),
+	Copysign(0, -1),
+	Copysign(0, -1),
+	Copysign(0, -1),
+	0,
+	Inf(-1),
+	Inf(1),
+	NaN(),
+	Inf(-1),
+	NaN(),
+	NaN(),
+	NaN(),
+	NaN(),
+}
+
+var vffmodSC = [][2]float64{
+	{Inf(-1), Inf(-1)},
+	{Inf(-1), -Pi},
+	{Inf(-1), 0},
+	{Inf(-1), Pi},
+	{Inf(-1), Inf(1)},
+	{Inf(-1), NaN()},
+	{-Pi, Inf(-1)},
+	{-Pi, 0},
+	{-Pi, Inf(1)},
+	{-Pi, NaN()},
+	{Copysign(0, -1), Inf(-1)},
+	{Copysign(0, -1), 0},
+	{Copysign(0, -1), Inf(1)},
+	{Copysign(0, -1), NaN()},
+	{0, Inf(-1)},
+	{0, 0},
+	{0, Inf(1)},
+	{0, NaN()},
+	{Pi, Inf(-1)},
+	{Pi, 0},
+	{Pi, Inf(1)},
+	{Pi, NaN()},
+	{Inf(1), Inf(-1)},
+	{Inf(1), -Pi},
+	{Inf(1), 0},
+	{Inf(1), Pi},
+	{Inf(1), Inf(1)},
+	{Inf(1), NaN()},
+	{NaN(), Inf(-1)},
+	{NaN(), -Pi},
+	{NaN(), 0},
+	{NaN(), Pi},
+	{NaN(), Inf(1)},
+	{NaN(), NaN()},
+}
+var fmodSC = []float64{
+	NaN(),           // fmod(-Inf, -Inf)
+	NaN(),           // fmod(-Inf, -Pi)
+	NaN(),           // fmod(-Inf, 0)
+	NaN(),           // fmod(-Inf, Pi)
+	NaN(),           // fmod(-Inf, +Inf)
+	NaN(),           // fmod(-Inf, NaN)
+	-Pi,             // fmod(-Pi, -Inf)
+	NaN(),           // fmod(-Pi, 0)
+	-Pi,             // fmod(-Pi, +Inf)
+	NaN(),           // fmod(-Pi, NaN)
+	Copysign(0, -1), // fmod(-0, -Inf)
+	NaN(),           // fmod(-0, 0)
+	Copysign(0, -1), // fmod(-0, Inf)
+	NaN(),           // fmod(-0, NaN)
+	0,               // fmod(0, -Inf)
+	NaN(),           // fmod(0, 0)
+	0,               // fmod(0, +Inf)
+	NaN(),           // fmod(0, NaN)
+	Pi,              // fmod(Pi, -Inf)
+	NaN(),           // fmod(Pi, 0)
+	Pi,              // fmod(Pi, +Inf)
+	NaN(),           // fmod(Pi, NaN)
+	NaN(),           // fmod(+Inf, -Inf)
+	NaN(),           // fmod(+Inf, -Pi)
+	NaN(),           // fmod(+Inf, 0)
+	NaN(),           // fmod(+Inf, Pi)
+	NaN(),           // fmod(+Inf, +Inf)
+	NaN(),           // fmod(+Inf, NaN)
+	NaN(),           // fmod(NaN, -Inf)
+	NaN(),           // fmod(NaN, -Pi)
+	NaN(),           // fmod(NaN, 0)
+	NaN(),           // fmod(NaN, Pi)
+	NaN(),           // fmod(NaN, +Inf)
+	NaN(),           // fmod(NaN, NaN)
+}
+
+var vffrexpSC = []float64{
+	Inf(-1),
+	Copysign(0, -1),
+	0,
+	Inf(1),
+	NaN(),
+}
+var frexpSC = []fi{
+	{Inf(-1), 0},
+	{Copysign(0, -1), 0},
+	{0, 0},
+	{Inf(1), 0},
+	{NaN(), 0},
+}
+
+var vfgamma = [][2]float64{
+	{Inf(1), Inf(1)},
+	{Inf(-1), NaN()},
+	{0, Inf(1)},
+	{Copysign(0, -1), Inf(-1)},
+	{NaN(), NaN()},
+	{-1, NaN()},
+	{-2, NaN()},
+	{-3, NaN()},
+	{-1e16, NaN()},
+	{-1e300, NaN()},
+	{1.7e308, Inf(1)},
+
+	// Test inputs inspired by Python test suite.
+	// Outputs computed at high precision by PARI/GP.
+	// If recomputing table entries, be careful to use
+	// high-precision (%.1000g) formatting of the float64 inputs.
+	// For example, -2.0000000000000004 is the float64 with exact value
+	// -2.00000000000000044408920985626161695, and
+	// gamma(-2.0000000000000004) = -1249999999999999.5386078562728167651513, while
+	// gamma(-2.00000000000000044408920985626161695) = -1125899906826907.2044875028130093136826.
+	// Thus the table lists -1.1258999068426235e+15 as the answer.
+	{0.5, 1.772453850905516},
+	{1.5, 0.886226925452758},
+	{2.5, 1.329340388179137},
+	{3.5, 3.3233509704478426},
+	{-0.5, -3.544907701811032},
+	{-1.5, 2.363271801207355},
+	{-2.5, -0.9453087204829419},
+	{-3.5, 0.2700882058522691},
+	{0.1, 9.51350769866873},
+	{0.01, 99.4325851191506},
+	{1e-08, 9.999999942278434e+07},
+	{1e-16, 1e+16},
+	{0.001, 999.4237724845955},
+	{1e-16, 1e+16},
+	{1e-308, 1e+308},
+	{5.6e-309, 1.7857142857142864e+308},
+	{5.5e-309, Inf(1)},
+	{1e-309, Inf(1)},
+	{1e-323, Inf(1)},
+	{5e-324, Inf(1)},
+	{-0.1, -10.686287021193193},
+	{-0.01, -100.58719796441078},
+	{-1e-08, -1.0000000057721567e+08},
+	{-1e-16, -1e+16},
+	{-0.001, -1000.5782056293586},
+	{-1e-16, -1e+16},
+	{-1e-308, -1e+308},
+	{-5.6e-309, -1.7857142857142864e+308},
+	{-5.5e-309, Inf(-1)},
+	{-1e-309, Inf(-1)},
+	{-1e-323, Inf(-1)},
+	{-5e-324, Inf(-1)},
+	{-0.9999999999999999, -9.007199254740992e+15},
+	{-1.0000000000000002, 4.5035996273704955e+15},
+	{-1.9999999999999998, 2.2517998136852485e+15},
+	{-2.0000000000000004, -1.1258999068426235e+15},
+	{-100.00000000000001, -7.540083334883109e-145},
+	{-99.99999999999999, 7.540083334884096e-145},
+	{17, 2.0922789888e+13},
+	{171, 7.257415615307999e+306},
+	{171.6, 1.5858969096672565e+308},
+	{171.624, 1.7942117599248104e+308},
+	{171.625, Inf(1)},
+	{172, Inf(1)},
+	{2000, Inf(1)},
+	{-100.5, -3.3536908198076787e-159},
+	{-160.5, -5.255546447007829e-286},
+	{-170.5, -3.3127395215386074e-308},
+	{-171.5, 1.9316265431712e-310},
+	{-176.5, -1.196e-321},
+	{-177.5, 5e-324},
+	{-178.5, Copysign(0, -1)},
+	{-179.5, 0},
+	{-201.0001, 0},
+	{-202.9999, Copysign(0, -1)},
+	{-1000.5, Copysign(0, -1)},
+	{-1.0000000003e+09, Copysign(0, -1)},
+	{-4.5035996273704955e+15, 0},
+	{-63.349078729022985, 4.177797167776188e-88},
+	{-127.45117632943295, 1.183111089623681e-214},
+}
+
+var vfhypotSC = [][2]float64{
+	{Inf(-1), Inf(-1)},
+	{Inf(-1), 0},
+	{Inf(-1), Inf(1)},
+	{Inf(-1), NaN()},
+	{Copysign(0, -1), Copysign(0, -1)},
+	{Copysign(0, -1), 0},
+	{0, Copysign(0, -1)},
+	{0, 0}, // +0, +0
+	{0, Inf(-1)},
+	{0, Inf(1)},
+	{0, NaN()},
+	{Inf(1), Inf(-1)},
+	{Inf(1), 0},
+	{Inf(1), Inf(1)},
+	{Inf(1), NaN()},
+	{NaN(), Inf(-1)},
+	{NaN(), 0},
+	{NaN(), Inf(1)},
+	{NaN(), NaN()},
+}
+var hypotSC = []float64{
+	Inf(1),
+	Inf(1),
+	Inf(1),
+	Inf(1),
+	0,
+	0,
+	0,
+	0,
+	Inf(1),
+	Inf(1),
+	NaN(),
+	Inf(1),
+	Inf(1),
+	Inf(1),
+	Inf(1),
+	Inf(1),
+	NaN(),
+	Inf(1),
+	NaN(),
+}
+
+var ilogbSC = []int{
+	MaxInt32,
+	MinInt32,
+	MaxInt32,
+	MaxInt32,
+}
+
+var vfj0SC = []float64{
+	Inf(-1),
+	0,
+	Inf(1),
+	NaN(),
+}
+var j0SC = []float64{
+	0,
+	1,
+	0,
+	NaN(),
+}
+var j1SC = []float64{
+	0,
+	0,
+	0,
+	NaN(),
+}
+var j2SC = []float64{
+	0,
+	0,
+	0,
+	NaN(),
+}
+var jM3SC = []float64{
+	0,
+	0,
+	0,
+	NaN(),
+}
+
+var vfldexpSC = []fi{
+	{0, 0},
+	{0, -1075},
+	{0, 1024},
+	{Copysign(0, -1), 0},
+	{Copysign(0, -1), -1075},
+	{Copysign(0, -1), 1024},
+	{Inf(1), 0},
+	{Inf(1), -1024},
+	{Inf(-1), 0},
+	{Inf(-1), -1024},
+	{NaN(), -1024},
+	{10, int(1) << (uint64(unsafe.Sizeof(0)-1) * 8)},
+	{10, -(int(1) << (uint64(unsafe.Sizeof(0)-1) * 8))},
+}
+var ldexpSC = []float64{
+	0,
+	0,
+	0,
+	Copysign(0, -1),
+	Copysign(0, -1),
+	Copysign(0, -1),
+	Inf(1),
+	Inf(1),
+	Inf(-1),
+	Inf(-1),
+	NaN(),
+	Inf(1),
+	0,
+}
+
+var vflgammaSC = []float64{
+	Inf(-1),
+	-3,
+	0,
+	1,
+	2,
+	Inf(1),
+	NaN(),
+}
+var lgammaSC = []fi{
+	{Inf(-1), 1},
+	{Inf(1), 1},
+	{Inf(1), 1},
+	{0, 1},
+	{0, 1},
+	{Inf(1), 1},
+	{NaN(), 1},
+}
+
+var vflogSC = []float64{
+	Inf(-1),
+	-Pi,
+	Copysign(0, -1),
+	0,
+	1,
+	Inf(1),
+	NaN(),
+}
+var logSC = []float64{
+	NaN(),
+	NaN(),
+	Inf(-1),
+	Inf(-1),
+	0,
+	Inf(1),
+	NaN(),
+}
+
+var vflogbSC = []float64{
+	Inf(-1),
+	0,
+	Inf(1),
+	NaN(),
+}
+var logbSC = []float64{
+	Inf(1),
+	Inf(-1),
+	Inf(1),
+	NaN(),
+}
+
+var vflog1pSC = []float64{
+	Inf(-1),
+	-Pi,
+	-1,
+	Copysign(0, -1),
+	0,
+	Inf(1),
+	NaN(),
+	4503599627370496.5, // Issue #29488
+}
+var log1pSC = []float64{
+	NaN(),
+	NaN(),
+	Inf(-1),
+	Copysign(0, -1),
+	0,
+	Inf(1),
+	NaN(),
+	36.04365338911715, // Issue #29488
+}
+
+var vfmodfSC = []float64{
+	Inf(-1),
+	Copysign(0, -1),
+	Inf(1),
+	NaN(),
+}
+var modfSC = [][2]float64{
+	{Inf(-1), NaN()}, // [2]float64{Copysign(0, -1), Inf(-1)},
+	{Copysign(0, -1), Copysign(0, -1)},
+	{Inf(1), NaN()}, // [2]float64{0, Inf(1)},
+	{NaN(), NaN()},
+}
+
+var vfnextafter32SC = [][2]float32{
+	{0, 0},
+	{0, float32(Copysign(0, -1))},
+	{0, -1},
+	{0, float32(NaN())},
+	{float32(Copysign(0, -1)), 1},
+	{float32(Copysign(0, -1)), 0},
+	{float32(Copysign(0, -1)), float32(Copysign(0, -1))},
+	{float32(Copysign(0, -1)), -1},
+	{float32(NaN()), 0},
+	{float32(NaN()), float32(NaN())},
+}
+var nextafter32SC = []float32{
+	0,
+	0,
+	-1.401298464e-45, // Float32frombits(0x80000001)
+	float32(NaN()),
+	1.401298464e-45, // Float32frombits(0x00000001)
+	float32(Copysign(0, -1)),
+	float32(Copysign(0, -1)),
+	-1.401298464e-45, // Float32frombits(0x80000001)
+	float32(NaN()),
+	float32(NaN()),
+}
+
+var vfnextafter64SC = [][2]float64{
+	{0, 0},
+	{0, Copysign(0, -1)},
+	{0, -1},
+	{0, NaN()},
+	{Copysign(0, -1), 1},
+	{Copysign(0, -1), 0},
+	{Copysign(0, -1), Copysign(0, -1)},
+	{Copysign(0, -1), -1},
+	{NaN(), 0},
+	{NaN(), NaN()},
+}
+var nextafter64SC = []float64{
+	0,
+	0,
+	-4.9406564584124654418e-324, // Float64frombits(0x8000000000000001)
+	NaN(),
+	4.9406564584124654418e-324, // Float64frombits(0x0000000000000001)
+	Copysign(0, -1),
+	Copysign(0, -1),
+	-4.9406564584124654418e-324, // Float64frombits(0x8000000000000001)
+	NaN(),
+	NaN(),
+}
+
+var vfpowSC = [][2]float64{
+	{Inf(-1), -Pi},
+	{Inf(-1), -3},
+	{Inf(-1), Copysign(0, -1)},
+	{Inf(-1), 0},
+	{Inf(-1), 1},
+	{Inf(-1), 3},
+	{Inf(-1), Pi},
+	{Inf(-1), 0.5},
+	{Inf(-1), NaN()},
+
+	{-Pi, Inf(-1)},
+	{-Pi, -Pi},
+	{-Pi, Copysign(0, -1)},
+	{-Pi, 0},
+	{-Pi, 1},
+	{-Pi, Pi},
+	{-Pi, Inf(1)},
+	{-Pi, NaN()},
+
+	{-1, Inf(-1)},
+	{-1, Inf(1)},
+	{-1, NaN()},
+	{-0.5, Inf(-1)},
+	{-0.5, Inf(1)},
+	{Copysign(0, -1), Inf(-1)},
+	{Copysign(0, -1), -Pi},
+	{Copysign(0, -1), -0.5},
+	{Copysign(0, -1), -3},
+	{Copysign(0, -1), 3},
+	{Copysign(0, -1), Pi},
+	{Copysign(0, -1), 0.5},
+	{Copysign(0, -1), Inf(1)},
+
+	{0, Inf(-1)},
+	{0, -Pi},
+	{0, -3},
+	{0, Copysign(0, -1)},
+	{0, 0},
+	{0, 3},
+	{0, Pi},
+	{0, Inf(1)},
+	{0, NaN()},
+
+	{0.5, Inf(-1)},
+	{0.5, Inf(1)},
+	{1, Inf(-1)},
+	{1, Inf(1)},
+	{1, NaN()},
+
+	{Pi, Inf(-1)},
+	{Pi, Copysign(0, -1)},
+	{Pi, 0},
+	{Pi, 1},
+	{Pi, Inf(1)},
+	{Pi, NaN()},
+	{Inf(1), -Pi},
+	{Inf(1), Copysign(0, -1)},
+	{Inf(1), 0},
+	{Inf(1), 1},
+	{Inf(1), Pi},
+	{Inf(1), NaN()},
+	{NaN(), -Pi},
+	{NaN(), Copysign(0, -1)},
+	{NaN(), 0},
+	{NaN(), 1},
+	{NaN(), Pi},
+	{NaN(), NaN()},
+
+	// Issue #7394 overflow checks
+	{2, float64(1 << 32)},
+	{2, -float64(1 << 32)},
+	{-2, float64(1<<32 + 1)},
+	{0.5, float64(1 << 45)},
+	{0.5, -float64(1 << 45)},
+	{Nextafter(1, 2), float64(1 << 63)},
+	{Nextafter(1, -2), float64(1 << 63)},
+	{Nextafter(-1, 2), float64(1 << 63)},
+	{Nextafter(-1, -2), float64(1 << 63)},
+
+	// Issue #57465
+	{Copysign(0, -1), 1e19},
+	{Copysign(0, -1), -1e19},
+	{Copysign(0, -1), 1<<53 - 1},
+	{Copysign(0, -1), -(1<<53 - 1)},
+}
+var powSC = []float64{
+	0,               // pow(-Inf, -Pi)
+	Copysign(0, -1), // pow(-Inf, -3)
+	1,               // pow(-Inf, -0)
+	1,               // pow(-Inf, +0)
+	Inf(-1),         // pow(-Inf, 1)
+	Inf(-1),         // pow(-Inf, 3)
+	Inf(1),          // pow(-Inf, Pi)
+	Inf(1),          // pow(-Inf, 0.5)
+	NaN(),           // pow(-Inf, NaN)
+	0,               // pow(-Pi, -Inf)
+	NaN(),           // pow(-Pi, -Pi)
+	1,               // pow(-Pi, -0)
+	1,               // pow(-Pi, +0)
+	-Pi,             // pow(-Pi, 1)
+	NaN(),           // pow(-Pi, Pi)
+	Inf(1),          // pow(-Pi, +Inf)
+	NaN(),           // pow(-Pi, NaN)
+	1,               // pow(-1, -Inf) IEEE 754-2008
+	1,               // pow(-1, +Inf) IEEE 754-2008
+	NaN(),           // pow(-1, NaN)
+	Inf(1),          // pow(-1/2, -Inf)
+	0,               // pow(-1/2, +Inf)
+	Inf(1),          // pow(-0, -Inf)
+	Inf(1),          // pow(-0, -Pi)
+	Inf(1),          // pow(-0, -0.5)
+	Inf(-1),         // pow(-0, -3) IEEE 754-2008
+	Copysign(0, -1), // pow(-0, 3) IEEE 754-2008
+	0,               // pow(-0, +Pi)
+	0,               // pow(-0, 0.5)
+	0,               // pow(-0, +Inf)
+	Inf(1),          // pow(+0, -Inf)
+	Inf(1),          // pow(+0, -Pi)
+	Inf(1),          // pow(+0, -3)
+	1,               // pow(+0, -0)
+	1,               // pow(+0, +0)
+	0,               // pow(+0, 3)
+	0,               // pow(+0, +Pi)
+	0,               // pow(+0, +Inf)
+	NaN(),           // pow(+0, NaN)
+	Inf(1),          // pow(1/2, -Inf)
+	0,               // pow(1/2, +Inf)
+	1,               // pow(1, -Inf) IEEE 754-2008
+	1,               // pow(1, +Inf) IEEE 754-2008
+	1,               // pow(1, NaN) IEEE 754-2008
+	0,               // pow(+Pi, -Inf)
+	1,               // pow(+Pi, -0)
+	1,               // pow(+Pi, +0)
+	Pi,              // pow(+Pi, 1)
+	Inf(1),          // pow(+Pi, +Inf)
+	NaN(),           // pow(+Pi, NaN)
+	0,               // pow(+Inf, -Pi)
+	1,               // pow(+Inf, -0)
+	1,               // pow(+Inf, +0)
+	Inf(1),          // pow(+Inf, 1)
+	Inf(1),          // pow(+Inf, Pi)
+	NaN(),           // pow(+Inf, NaN)
+	NaN(),           // pow(NaN, -Pi)
+	1,               // pow(NaN, -0)
+	1,               // pow(NaN, +0)
+	NaN(),           // pow(NaN, 1)
+	NaN(),           // pow(NaN, +Pi)
+	NaN(),           // pow(NaN, NaN)
+
+	// Issue #7394 overflow checks
+	Inf(1),  // pow(2, float64(1 << 32))
+	0,       // pow(2, -float64(1 << 32))
+	Inf(-1), // pow(-2, float64(1<<32 + 1))
+	0,       // pow(1/2, float64(1 << 45))
+	Inf(1),  // pow(1/2, -float64(1 << 45))
+	Inf(1),  // pow(Nextafter(1, 2), float64(1 << 63))
+	0,       // pow(Nextafter(1, -2), float64(1 << 63))
+	0,       // pow(Nextafter(-1, 2), float64(1 << 63))
+	Inf(1),  // pow(Nextafter(-1, -2), float64(1 << 63))
+
+	// Issue #57465
+	0,               // pow(-0, 1e19)
+	Inf(1),          // pow(-0, -1e19)
+	Copysign(0, -1), // pow(-0, 1<<53 -1)
+	Inf(-1),         // pow(-0, -(1<<53 -1))
+}
+
+var vfpow10SC = []int{
+	MinInt32,
+	-324,
+	-323,
+	-50,
+	-22,
+	-1,
+	0,
+	1,
+	22,
+	50,
+	100,
+	200,
+	308,
+	309,
+	MaxInt32,
+}
+
+var pow10SC = []float64{
+	0,        // pow10(MinInt32)
+	0,        // pow10(-324)
+	1.0e-323, // pow10(-323)
+	1.0e-50,  // pow10(-50)
+	1.0e-22,  // pow10(-22)
+	1.0e-1,   // pow10(-1)
+	1.0e0,    // pow10(0)
+	1.0e1,    // pow10(1)
+	1.0e22,   // pow10(22)
+	1.0e50,   // pow10(50)
+	1.0e100,  // pow10(100)
+	1.0e200,  // pow10(200)
+	1.0e308,  // pow10(308)
+	Inf(1),   // pow10(309)
+	Inf(1),   // pow10(MaxInt32)
+}
+
+var vfroundSC = [][2]float64{
+	{0, 0},
+	{1.390671161567e-309, 0}, // denormal
+	{0.49999999999999994, 0}, // 0.5-epsilon
+	{0.5, 1},
+	{0.5000000000000001, 1}, // 0.5+epsilon
+	{-1.5, -2},
+	{-2.5, -3},
+	{NaN(), NaN()},
+	{Inf(1), Inf(1)},
+	{2251799813685249.5, 2251799813685250}, // 1 bit fraction
+	{2251799813685250.5, 2251799813685251},
+	{4503599627370495.5, 4503599627370496}, // 1 bit fraction, rounding to 0 bit fraction
+	{4503599627370497, 4503599627370497},   // large integer
+}
+var vfroundEvenSC = [][2]float64{
+	{0, 0},
+	{1.390671161567e-309, 0}, // denormal
+	{0.49999999999999994, 0}, // 0.5-epsilon
+	{0.5, 0},
+	{0.5000000000000001, 1}, // 0.5+epsilon
+	{-1.5, -2},
+	{-2.5, -2},
+	{NaN(), NaN()},
+	{Inf(1), Inf(1)},
+	{2251799813685249.5, 2251799813685250}, // 1 bit fraction
+	{2251799813685250.5, 2251799813685250},
+	{4503599627370495.5, 4503599627370496}, // 1 bit fraction, rounding to 0 bit fraction
+	{4503599627370497, 4503599627370497},   // large integer
+}
+
+var vfsignbitSC = []float64{
+	Inf(-1),
+	Copysign(0, -1),
+	0,
+	Inf(1),
+	NaN(),
+}
+var signbitSC = []bool{
+	true,
+	true,
+	false,
+	false,
+	false,
+}
+
+var vfsinSC = []float64{
+	Inf(-1),
+	Copysign(0, -1),
+	0,
+	Inf(1),
+	NaN(),
+}
+var sinSC = []float64{
+	NaN(),
+	Copysign(0, -1),
+	0,
+	NaN(),
+	NaN(),
+}
+
+var vfsinhSC = []float64{
+	Inf(-1),
+	Copysign(0, -1),
+	0,
+	Inf(1),
+	NaN(),
+}
+var sinhSC = []float64{
+	Inf(-1),
+	Copysign(0, -1),
+	0,
+	Inf(1),
+	NaN(),
+}
+
+var vfsqrtSC = []float64{
+	Inf(-1),
+	-Pi,
+	Copysign(0, -1),
+	0,
+	Inf(1),
+	NaN(),
+	Float64frombits(2), // subnormal; see https://golang.org/issue/13013
+}
+var sqrtSC = []float64{
+	NaN(),
+	NaN(),
+	Copysign(0, -1),
+	0,
+	Inf(1),
+	NaN(),
+	3.1434555694052576e-162,
+}
+
+var vftanhSC = []float64{
+	Inf(-1),
+	Copysign(0, -1),
+	0,
+	Inf(1),
+	NaN(),
+}
+var tanhSC = []float64{
+	-1,
+	Copysign(0, -1),
+	0,
+	1,
+	NaN(),
+}
+
+var vfy0SC = []float64{
+	Inf(-1),
+	0,
+	Inf(1),
+	NaN(),
+	-1,
+}
+var y0SC = []float64{
+	NaN(),
+	Inf(-1),
+	0,
+	NaN(),
+	NaN(),
+}
+var y1SC = []float64{
+	NaN(),
+	Inf(-1),
+	0,
+	NaN(),
+	NaN(),
+}
+var y2SC = []float64{
+	NaN(),
+	Inf(-1),
+	0,
+	NaN(),
+	NaN(),
+}
+var yM3SC = []float64{
+	NaN(),
+	Inf(1),
+	0,
+	NaN(),
+	NaN(),
+}
+
+// arguments and expected results for boundary cases
+const (
+	SmallestNormalFloat64   = 2.2250738585072014e-308 // 2**-1022
+	LargestSubnormalFloat64 = SmallestNormalFloat64 - SmallestNonzeroFloat64
+)
+
+var vffrexpBC = []float64{
+	SmallestNormalFloat64,
+	LargestSubnormalFloat64,
+	SmallestNonzeroFloat64,
+	MaxFloat64,
+	-SmallestNormalFloat64,
+	-LargestSubnormalFloat64,
+	-SmallestNonzeroFloat64,
+	-MaxFloat64,
+}
+var frexpBC = []fi{
+	{0.5, -1021},
+	{0.99999999999999978, -1022},
+	{0.5, -1073},
+	{0.99999999999999989, 1024},
+	{-0.5, -1021},
+	{-0.99999999999999978, -1022},
+	{-0.5, -1073},
+	{-0.99999999999999989, 1024},
+}
+
+var vfldexpBC = []fi{
+	{SmallestNormalFloat64, -52},
+	{LargestSubnormalFloat64, -51},
+	{SmallestNonzeroFloat64, 1074},
+	{MaxFloat64, -(1023 + 1074)},
+	{1, -1075},
+	{-1, -1075},
+	{1, 1024},
+	{-1, 1024},
+	{1.0000000000000002, -1075},
+	{1, -1075},
+}
+var ldexpBC = []float64{
+	SmallestNonzeroFloat64,
+	1e-323, // 2**-1073
+	1,
+	1e-323, // 2**-1073
+	0,
+	Copysign(0, -1),
+	Inf(1),
+	Inf(-1),
+	SmallestNonzeroFloat64,
+	0,
+}
+
+var logbBC = []float64{
+	-1022,
+	-1023,
+	-1074,
+	1023,
+	-1022,
+	-1023,
+	-1074,
+	1023,
+}
+
+// Test cases were generated with Berkeley TestFloat-3e/testfloat_gen.
+// http://www.jhauser.us/arithmetic/TestFloat.html.
+// The default rounding mode is selected (nearest/even), and exception flags are ignored.
+var fmaC = []struct{ x, y, z, want float64 }{
+	// Large exponent spread
+	{-3.999999999999087, -1.1123914289620494e-16, -7.999877929687506, -7.999877929687505},
+	{-262112.0000004768, -0.06251525855623184, 1.1102230248837136e-16, 16385.99945072085},
+	{-6.462348523533467e-27, -2.3763644720331857e-211, 4.000000000931324, 4.000000000931324},
+
+	// Effective addition
+	{-2.0000000037252907, 6.7904383376e-313, -3.3951933161e-313, -1.697607001654e-312},
+	{-0.12499999999999999, 512.007568359375, -1.4193627164960366e-16, -64.00094604492188},
+	{-2.7550648847397148e-39, -3.4028301595800694e+38, 0.9960937495343386, 1.9335955376735676},
+	{5.723369164769208e+24, 3.8149300927159385e-06, 1.84489958778182e+19, 4.028324913621874e+19},
+	{-0.4843749999990904, -3.6893487872543293e+19, 9.223653786709391e+18, 2.7093936974938993e+19},
+	{-3.8146972665201165e-06, 4.2949672959999385e+09, -2.2204460489938386e-16, -16384.000003844263},
+	{6.98156394130982e-309, -1.1072962560000002e+09, -4.4414561548793455e-308, -7.73065965765153e-300},
+
+	// Effective subtraction
+	{5e-324, 4.5, -2e-323, 0},
+	{5e-324, 7, -3.5e-323, 0},
+	{5e-324, 0.5000000000000001, -5e-324, Copysign(0, -1)},
+	{-2.1240680525e-314, -1.233647078189316e+308, -0.25781249999954525, -0.25780987964919844},
+	{8.579992955364441e-308, 0.6037391876780558, -4.4501307410480706e-308, 7.29947236107098e-309},
+	{-4.450143471986689e-308, -0.9960937499927239, -4.450419332475649e-308, -1.7659233458788e-310},
+	{1.4932076393918112, -2.2248022430460833e-308, 4.449875571054211e-308, 1.127783865601762e-308},
+
+	// Overflow
+	{-2.288020632214759e+38, -8.98846570988901e+307, 1.7696041796300924e+308, Inf(0)},
+	{1.4888652783208255e+308, -9.007199254742012e+15, -6.807282911929205e+38, Inf(-1)},
+	{9.142703268902826e+192, -1.3504889569802838e+296, -1.9082200803806996e-89, Inf(-1)},
+
+	// Finite x and y, but non-finite z.
+	{31.99218749627471, -1.7976930544991702e+308, Inf(0), Inf(0)},
+	{-1.7976931281784667e+308, -2.0009765625002265, Inf(-1), Inf(-1)},
+
+	// Special
+	{0, 0, 0, 0},
+	{Copysign(0, -1), 0, 0, 0},
+	{0, 0, Copysign(0, -1), 0},
+	{Copysign(0, -1), 0, Copysign(0, -1), Copysign(0, -1)},
+	{-1.1754226043408471e-38, NaN(), Inf(0), NaN()},
+	{0, 0, 2.22507385643494e-308, 2.22507385643494e-308},
+	{-8.65697792e+09, NaN(), -7.516192799999999e+09, NaN()},
+	{-0.00012207403779029757, 3.221225471996093e+09, NaN(), NaN()},
+	{Inf(-1), 0.1252441407414153, -1.387184532981584e-76, Inf(-1)},
+	{Inf(0), 1.525878907671432e-05, -9.214364835452549e+18, Inf(0)},
+
+	// Random
+	{0.1777916152213626, -32.000015266239636, -2.2204459148334633e-16, -5.689334401293007},
+	{-2.0816681711722314e-16, -0.4997558592585846, -0.9465627129124969, -0.9465627129124968},
+	{-1.9999997615814211, 1.8518819259933516e+19, 16.874999999999996, -3.703763410463646e+19},
+	{-0.12499994039717421, 32767.99999976135, -2.0752587082923246e+19, -2.075258708292325e+19},
+	{7.705600568510257e-34, -1.801432979000528e+16, -0.17224197722973714, -0.17224197722973716},
+	{3.8988133103758913e-308, -0.9848632812499999, 3.893879244098556e-308, 5.40811742605814e-310},
+	{-0.012651981190687427, 6.911985574912436e+38, 6.669240527007144e+18, -8.745031148409496e+36},
+	{4.612811918325842e+18, 1.4901161193847641e-08, 2.6077032311277997e-08, 6.873625395187494e+10},
+	{-9.094947033611148e-13, 4.450691014249257e-308, 2.086006742350485e-308, 2.086006742346437e-308},
+	{-7.751454006381804e-05, 5.588653777189071e-308, -2.2207280111272877e-308, -2.2211612130544025e-308},
+
+	// Issue #61130
+	{-1, 1, 1, 0},
+	{1, 1, -1, 0},
+}
+
+var sqrt32 = []float32{
+	0,
+	float32(Copysign(0, -1)),
+	float32(NaN()),
+	float32(Inf(1)),
+	float32(Inf(-1)),
+	1,
+	2,
+	-2,
+	4.9790119248836735e+00,
+	7.7388724745781045e+00,
+	-2.7688005719200159e-01,
+	-5.0106036182710749e+00,
+}
+
+func tolerance(a, b, e float64) bool {
+	// Multiplying by e here can underflow denormal values to zero.
+	// Check a==b so that at least if a and b are small and identical
+	// we say they match.
+	if a == b {
+		return true
+	}
+	d := a - b
+	if d < 0 {
+		d = -d
+	}
+
+	// note: b is correct (expected) value, a is actual value.
+	// make error tolerance a fraction of b, not a.
+	if b != 0 {
+		e = e * b
+		if e < 0 {
+			e = -e
+		}
+	}
+	return d < e
+}
+func close(a, b float64) bool      { return tolerance(a, b, 1e-14) }
+func veryclose(a, b float64) bool  { return tolerance(a, b, 4e-16) }
+func soclose(a, b, e float64) bool { return tolerance(a, b, e) }
+func alike(a, b float64) bool {
+	switch {
+	case IsNaN(a) && IsNaN(b):
+		return true
+	case a == b:
+		return Signbit(a) == Signbit(b)
+	}
+	return false
+}
+
+func TestNaN(t *testing.T) {
+	f64 := NaN()
+	if f64 == f64 {
+		t.Fatalf("NaN() returns %g, expected NaN", f64)
+	}
+	f32 := float32(f64)
+	if f32 == f32 {
+		t.Fatalf("float32(NaN()) is %g, expected NaN", f32)
+	}
+}
+
+func TestAcos(t *testing.T) {
+	for i := 0; i < len(vf); i++ {
+		a := vf[i] / 10
+		if f := Acos(a); !close(acos[i], f) {
+			t.Errorf("Acos(%g) = %g, want %g", a, f, acos[i])
+		}
+	}
+	for i := 0; i < len(vfacosSC); i++ {
+		if f := Acos(vfacosSC[i]); !alike(acosSC[i], f) {
+			t.Errorf("Acos(%g) = %g, want %g", vfacosSC[i], f, acosSC[i])
+		}
+	}
+}
+
+func TestAcosh(t *testing.T) {
+	for i := 0; i < len(vf); i++ {
+		a := 1 + Abs(vf[i])
+		if f := Acosh(a); !veryclose(acosh[i], f) {
+			t.Errorf("Acosh(%g) = %g, want %g", a, f, acosh[i])
+		}
+	}
+	for i := 0; i < len(vfacoshSC); i++ {
+		if f := Acosh(vfacoshSC[i]); !alike(acoshSC[i], f) {
+			t.Errorf("Acosh(%g) = %g, want %g", vfacoshSC[i], f, acoshSC[i])
+		}
+	}
+}
+
+func TestAsin(t *testing.T) {
+	for i := 0; i < len(vf); i++ {
+		a := vf[i] / 10
+		if f := Asin(a); !veryclose(asin[i], f) {
+			t.Errorf("Asin(%g) = %g, want %g", a, f, asin[i])
+		}
+	}
+	for i := 0; i < len(vfasinSC); i++ {
+		if f := Asin(vfasinSC[i]); !alike(asinSC[i], f) {
+			t.Errorf("Asin(%g) = %g, want %g", vfasinSC[i], f, asinSC[i])
+		}
+	}
+}
+
+func TestAsinh(t *testing.T) {
+	for i := 0; i < len(vf); i++ {
+		if f := Asinh(vf[i]); !veryclose(asinh[i], f) {
+			t.Errorf("Asinh(%g) = %g, want %g", vf[i], f, asinh[i])
+		}
+	}
+	for i := 0; i < len(vfasinhSC); i++ {
+		if f := Asinh(vfasinhSC[i]); !alike(asinhSC[i], f) {
+			t.Errorf("Asinh(%g) = %g, want %g", vfasinhSC[i], f, asinhSC[i])
+		}
+	}
+}
+
+func TestAtan(t *testing.T) {
+	for i := 0; i < len(vf); i++ {
+		if f := Atan(vf[i]); !veryclose(atan[i], f) {
+			t.Errorf("Atan(%g) = %g, want %g", vf[i], f, atan[i])
+		}
+	}
+	for i := 0; i < len(vfatanSC); i++ {
+		if f := Atan(vfatanSC[i]); !alike(atanSC[i], f) {
+			t.Errorf("Atan(%g) = %g, want %g", vfatanSC[i], f, atanSC[i])
+		}
+	}
+}
+
+func TestAtanh(t *testing.T) {
+	for i := 0; i < len(vf); i++ {
+		a := vf[i] / 10
+		if f := Atanh(a); !veryclose(atanh[i], f) {
+			t.Errorf("Atanh(%g) = %g, want %g", a, f, atanh[i])
+		}
+	}
+	for i := 0; i < len(vfatanhSC); i++ {
+		if f := Atanh(vfatanhSC[i]); !alike(atanhSC[i], f) {
+			t.Errorf("Atanh(%g) = %g, want %g", vfatanhSC[i], f, atanhSC[i])
+		}
+	}
+}
+
+func TestAtan2(t *testing.T) {
+	for i := 0; i < len(vf); i++ {
+		if f := Atan2(10, vf[i]); !veryclose(atan2[i], f) {
+			t.Errorf("Atan2(10, %g) = %g, want %g", vf[i], f, atan2[i])
+		}
+	}
+	for i := 0; i < len(vfatan2SC); i++ {
+		if f := Atan2(vfatan2SC[i][0], vfatan2SC[i][1]); !alike(atan2SC[i], f) {
+			t.Errorf("Atan2(%g, %g) = %g, want %g", vfatan2SC[i][0], vfatan2SC[i][1], f, atan2SC[i])
+		}
+	}
+}
+
+func TestCbrt(t *testing.T) {
+	for i := 0; i < len(vf); i++ {
+		if f := Cbrt(vf[i]); !veryclose(cbrt[i], f) {
+			t.Errorf("Cbrt(%g) = %g, want %g", vf[i], f, cbrt[i])
+		}
+	}
+	for i := 0; i < len(vfcbrtSC); i++ {
+		if f := Cbrt(vfcbrtSC[i]); !alike(cbrtSC[i], f) {
+			t.Errorf("Cbrt(%g) = %g, want %g", vfcbrtSC[i], f, cbrtSC[i])
+		}
+	}
+}
+
+func TestCeil(t *testing.T) {
+	for i := 0; i < len(vf); i++ {
+		if f := Ceil(vf[i]); !alike(ceil[i], f) {
+			t.Errorf("Ceil(%g) = %g, want %g", vf[i], f, ceil[i])
+		}
+	}
+	for i := 0; i < len(vfceilSC); i++ {
+		if f := Ceil(vfceilSC[i]); !alike(ceilSC[i], f) {
+			t.Errorf("Ceil(%g) = %g, want %g", vfceilSC[i], f, ceilSC[i])
+		}
+	}
+}
+
+func TestCopysign(t *testing.T) {
+	for i := 0; i < len(vf); i++ {
+		if f := Copysign(vf[i], -1); copysign[i] != f {
+			t.Errorf("Copysign(%g, -1) = %g, want %g", vf[i], f, copysign[i])
+		}
+	}
+	for i := 0; i < len(vf); i++ {
+		if f := Copysign(vf[i], 1); -copysign[i] != f {
+			t.Errorf("Copysign(%g, 1) = %g, want %g", vf[i], f, -copysign[i])
+		}
+	}
+	for i := 0; i < len(vfcopysignSC); i++ {
+		if f := Copysign(vfcopysignSC[i], -1); !alike(copysignSC[i], f) {
+			t.Errorf("Copysign(%g, -1) = %g, want %g", vfcopysignSC[i], f, copysignSC[i])
+		}
+	}
+}
+
+func TestCos(t *testing.T) {
+	for i := 0; i < len(vf); i++ {
+		if f := Cos(vf[i]); !veryclose(cos[i], f) {
+			t.Errorf("Cos(%g) = %g, want %g", vf[i], f, cos[i])
+		}
+	}
+	for i := 0; i < len(vfcosSC); i++ {
+		if f := Cos(vfcosSC[i]); !alike(cosSC[i], f) {
+			t.Errorf("Cos(%g) = %g, want %g", vfcosSC[i], f, cosSC[i])
+		}
+	}
+}
+
+func TestCosh(t *testing.T) {
+	for i := 0; i < len(vf); i++ {
+		if f := Cosh(vf[i]); !close(cosh[i], f) {
+			t.Errorf("Cosh(%g) = %g, want %g", vf[i], f, cosh[i])
+		}
+	}
+	for i := 0; i < len(vfcoshSC); i++ {
+		if f := Cosh(vfcoshSC[i]); !alike(coshSC[i], f) {
+			t.Errorf("Cosh(%g) = %g, want %g", vfcoshSC[i], f, coshSC[i])
+		}
+	}
+}
+
+func TestErf(t *testing.T) {
+	for i := 0; i < len(vf); i++ {
+		a := vf[i] / 10
+		if f := Erf(a); !veryclose(erf[i], f) {
+			t.Errorf("Erf(%g) = %g, want %g", a, f, erf[i])
+		}
+	}
+	for i := 0; i < len(vferfSC); i++ {
+		if f := Erf(vferfSC[i]); !alike(erfSC[i], f) {
+			t.Errorf("Erf(%g) = %g, want %g", vferfSC[i], f, erfSC[i])
+		}
+	}
+}
+
+func TestErfc(t *testing.T) {
+	for i := 0; i < len(vf); i++ {
+		a := vf[i] / 10
+		if f := Erfc(a); !veryclose(erfc[i], f) {
+			t.Errorf("Erfc(%g) = %g, want %g", a, f, erfc[i])
+		}
+	}
+	for i := 0; i < len(vferfcSC); i++ {
+		if f := Erfc(vferfcSC[i]); !alike(erfcSC[i], f) {
+			t.Errorf("Erfc(%g) = %g, want %g", vferfcSC[i], f, erfcSC[i])
+		}
+	}
+}
+
+func TestErfinv(t *testing.T) {
+	for i := 0; i < len(vf); i++ {
+		a := vf[i] / 10
+		if f := Erfinv(a); !veryclose(erfinv[i], f) {
+			t.Errorf("Erfinv(%g) = %g, want %g", a, f, erfinv[i])
+		}
+	}
+	for i := 0; i < len(vferfinvSC); i++ {
+		if f := Erfinv(vferfinvSC[i]); !alike(erfinvSC[i], f) {
+			t.Errorf("Erfinv(%g) = %g, want %g", vferfinvSC[i], f, erfinvSC[i])
+		}
+	}
+	for x := -0.9; x <= 0.90; x += 1e-2 {
+		if f := Erf(Erfinv(x)); !close(x, f) {
+			t.Errorf("Erf(Erfinv(%g)) = %g, want %g", x, f, x)
+		}
+	}
+	for x := -0.9; x <= 0.90; x += 1e-2 {
+		if f := Erfinv(Erf(x)); !close(x, f) {
+			t.Errorf("Erfinv(Erf(%g)) = %g, want %g", x, f, x)
+		}
+	}
+}
+
+func TestErfcinv(t *testing.T) {
+	for i := 0; i < len(vf); i++ {
+		a := 1.0 - (vf[i] / 10)
+		if f := Erfcinv(a); !veryclose(erfinv[i], f) {
+			t.Errorf("Erfcinv(%g) = %g, want %g", a, f, erfinv[i])
+		}
+	}
+	for i := 0; i < len(vferfcinvSC); i++ {
+		if f := Erfcinv(vferfcinvSC[i]); !alike(erfcinvSC[i], f) {
+			t.Errorf("Erfcinv(%g) = %g, want %g", vferfcinvSC[i], f, erfcinvSC[i])
+		}
+	}
+	for x := 0.1; x <= 1.9; x += 1e-2 {
+		if f := Erfc(Erfcinv(x)); !close(x, f) {
+			t.Errorf("Erfc(Erfcinv(%g)) = %g, want %g", x, f, x)
+		}
+	}
+	for x := 0.1; x <= 1.9; x += 1e-2 {
+		if f := Erfcinv(Erfc(x)); !close(x, f) {
+			t.Errorf("Erfcinv(Erfc(%g)) = %g, want %g", x, f, x)
+		}
+	}
+}
+
+func TestExp(t *testing.T) {
+	testExp(t, Exp, "Exp")
+	testExp(t, ExpGo, "ExpGo")
+}
+
+func testExp(t *testing.T, Exp func(float64) float64, name string) {
+	for i := 0; i < len(vf); i++ {
+		if f := Exp(vf[i]); !veryclose(exp[i], f) {
+			t.Errorf("%s(%g) = %g, want %g", name, vf[i], f, exp[i])
+		}
+	}
+	for i := 0; i < len(vfexpSC); i++ {
+		if f := Exp(vfexpSC[i]); !alike(expSC[i], f) {
+			t.Errorf("%s(%g) = %g, want %g", name, vfexpSC[i], f, expSC[i])
+		}
+	}
+}
+
+func TestExpm1(t *testing.T) {
+	for i := 0; i < len(vf); i++ {
+		a := vf[i] / 100
+		if f := Expm1(a); !veryclose(expm1[i], f) {
+			t.Errorf("Expm1(%g) = %g, want %g", a, f, expm1[i])
+		}
+	}
+	for i := 0; i < len(vf); i++ {
+		a := vf[i] * 10
+		if f := Expm1(a); !close(expm1Large[i], f) {
+			t.Errorf("Expm1(%g) = %g, want %g", a, f, expm1Large[i])
+		}
+	}
+	for i := 0; i < len(vfexpm1SC); i++ {
+		if f := Expm1(vfexpm1SC[i]); !alike(expm1SC[i], f) {
+			t.Errorf("Expm1(%g) = %g, want %g", vfexpm1SC[i], f, expm1SC[i])
+		}
+	}
+}
+
+func TestExp2(t *testing.T) {
+	testExp2(t, Exp2, "Exp2")
+	testExp2(t, Exp2Go, "Exp2Go")
+}
+
+func testExp2(t *testing.T, Exp2 func(float64) float64, name string) {
+	for i := 0; i < len(vf); i++ {
+		if f := Exp2(vf[i]); !close(exp2[i], f) {
+			t.Errorf("%s(%g) = %g, want %g", name, vf[i], f, exp2[i])
+		}
+	}
+	for i := 0; i < len(vfexp2SC); i++ {
+		if f := Exp2(vfexp2SC[i]); !alike(exp2SC[i], f) {
+			t.Errorf("%s(%g) = %g, want %g", name, vfexp2SC[i], f, exp2SC[i])
+		}
+	}
+	for n := -1074; n < 1024; n++ {
+		f := Exp2(float64(n))
+		vf := Ldexp(1, n)
+		if f != vf {
+			t.Errorf("%s(%d) = %g, want %g", name, n, f, vf)
+		}
+	}
+}
+
+func TestAbs(t *testing.T) {
+	for i := 0; i < len(vf); i++ {
+		if f := Abs(vf[i]); fabs[i] != f {
+			t.Errorf("Abs(%g) = %g, want %g", vf[i], f, fabs[i])
+		}
+	}
+	for i := 0; i < len(vffabsSC); i++ {
+		if f := Abs(vffabsSC[i]); !alike(fabsSC[i], f) {
+			t.Errorf("Abs(%g) = %g, want %g", vffabsSC[i], f, fabsSC[i])
+		}
+	}
+}
+
+func TestDim(t *testing.T) {
+	for i := 0; i < len(vf); i++ {
+		if f := Dim(vf[i], 0); fdim[i] != f {
+			t.Errorf("Dim(%g, %g) = %g, want %g", vf[i], 0.0, f, fdim[i])
+		}
+	}
+	for i := 0; i < len(vffdimSC); i++ {
+		if f := Dim(vffdimSC[i][0], vffdimSC[i][1]); !alike(fdimSC[i], f) {
+			t.Errorf("Dim(%g, %g) = %g, want %g", vffdimSC[i][0], vffdimSC[i][1], f, fdimSC[i])
+		}
+	}
+	for i := 0; i < len(vffdim2SC); i++ {
+		if f := Dim(vffdim2SC[i][0], vffdim2SC[i][1]); !alike(fdimSC[i], f) {
+			t.Errorf("Dim(%g, %g) = %g, want %g", vffdim2SC[i][0], vffdim2SC[i][1], f, fdimSC[i])
+		}
+	}
+}
+
+func TestFloor(t *testing.T) {
+	for i := 0; i < len(vf); i++ {
+		if f := Floor(vf[i]); !alike(floor[i], f) {
+			t.Errorf("Floor(%g) = %g, want %g", vf[i], f, floor[i])
+		}
+	}
+	for i := 0; i < len(vfceilSC); i++ {
+		if f := Floor(vfceilSC[i]); !alike(ceilSC[i], f) {
+			t.Errorf("Floor(%g) = %g, want %g", vfceilSC[i], f, ceilSC[i])
+		}
+	}
+}
+
+func TestMax(t *testing.T) {
+	for i := 0; i < len(vf); i++ {
+		if f := Max(vf[i], ceil[i]); ceil[i] != f {
+			t.Errorf("Max(%g, %g) = %g, want %g", vf[i], ceil[i], f, ceil[i])
+		}
+	}
+	for i := 0; i < len(vffdimSC); i++ {
+		if f := Max(vffdimSC[i][0], vffdimSC[i][1]); !alike(fmaxSC[i], f) {
+			t.Errorf("Max(%g, %g) = %g, want %g", vffdimSC[i][0], vffdimSC[i][1], f, fmaxSC[i])
+		}
+	}
+	for i := 0; i < len(vffdim2SC); i++ {
+		if f := Max(vffdim2SC[i][0], vffdim2SC[i][1]); !alike(fmaxSC[i], f) {
+			t.Errorf("Max(%g, %g) = %g, want %g", vffdim2SC[i][0], vffdim2SC[i][1], f, fmaxSC[i])
+		}
+	}
+}
+
+func TestMin(t *testing.T) {
+	for i := 0; i < len(vf); i++ {
+		if f := Min(vf[i], floor[i]); floor[i] != f {
+			t.Errorf("Min(%g, %g) = %g, want %g", vf[i], floor[i], f, floor[i])
+		}
+	}
+	for i := 0; i < len(vffdimSC); i++ {
+		if f := Min(vffdimSC[i][0], vffdimSC[i][1]); !alike(fminSC[i], f) {
+			t.Errorf("Min(%g, %g) = %g, want %g", vffdimSC[i][0], vffdimSC[i][1], f, fminSC[i])
+		}
+	}
+	for i := 0; i < len(vffdim2SC); i++ {
+		if f := Min(vffdim2SC[i][0], vffdim2SC[i][1]); !alike(fminSC[i], f) {
+			t.Errorf("Min(%g, %g) = %g, want %g", vffdim2SC[i][0], vffdim2SC[i][1], f, fminSC[i])
+		}
+	}
+}
+
+func TestMod(t *testing.T) {
+	for i := 0; i < len(vf); i++ {
+		if f := Mod(10, vf[i]); fmod[i] != f {
+			t.Errorf("Mod(10, %g) = %g, want %g", vf[i], f, fmod[i])
+		}
+	}
+	for i := 0; i < len(vffmodSC); i++ {
+		if f := Mod(vffmodSC[i][0], vffmodSC[i][1]); !alike(fmodSC[i], f) {
+			t.Errorf("Mod(%g, %g) = %g, want %g", vffmodSC[i][0], vffmodSC[i][1], f, fmodSC[i])
+		}
+	}
+	// verify precision of result for extreme inputs
+	if f := Mod(5.9790119248836734e+200, 1.1258465975523544); 0.6447968302508578 != f {
+		t.Errorf("Remainder(5.9790119248836734e+200, 1.1258465975523544) = %g, want 0.6447968302508578", f)
+	}
+}
+
+func TestFrexp(t *testing.T) {
+	for i := 0; i < len(vf); i++ {
+		if f, j := Frexp(vf[i]); !veryclose(frexp[i].f, f) || frexp[i].i != j {
+			t.Errorf("Frexp(%g) = %g, %d, want %g, %d", vf[i], f, j, frexp[i].f, frexp[i].i)
+		}
+	}
+	for i := 0; i < len(vffrexpSC); i++ {
+		if f, j := Frexp(vffrexpSC[i]); !alike(frexpSC[i].f, f) || frexpSC[i].i != j {
+			t.Errorf("Frexp(%g) = %g, %d, want %g, %d", vffrexpSC[i], f, j, frexpSC[i].f, frexpSC[i].i)
+		}
+	}
+	for i := 0; i < len(vffrexpBC); i++ {
+		if f, j := Frexp(vffrexpBC[i]); !alike(frexpBC[i].f, f) || frexpBC[i].i != j {
+			t.Errorf("Frexp(%g) = %g, %d, want %g, %d", vffrexpBC[i], f, j, frexpBC[i].f, frexpBC[i].i)
+		}
+	}
+}
+
+func TestGamma(t *testing.T) {
+	for i := 0; i < len(vf); i++ {
+		if f := Gamma(vf[i]); !close(gamma[i], f) {
+			t.Errorf("Gamma(%g) = %g, want %g", vf[i], f, gamma[i])
+		}
+	}
+	for _, g := range vfgamma {
+		f := Gamma(g[0])
+		var ok bool
+		if IsNaN(g[1]) || IsInf(g[1], 0) || g[1] == 0 || f == 0 {
+			ok = alike(g[1], f)
+		} else if g[0] > -50 && g[0] <= 171 {
+			ok = veryclose(g[1], f)
+		} else {
+			ok = close(g[1], f)
+		}
+		if !ok {
+			t.Errorf("Gamma(%g) = %g, want %g", g[0], f, g[1])
+		}
+	}
+}
+
+func TestHypot(t *testing.T) {
+	for i := 0; i < len(vf); i++ {
+		a := Abs(1e200 * tanh[i] * Sqrt(2))
+		if f := Hypot(1e200*tanh[i], 1e200*tanh[i]); !veryclose(a, f) {
+			t.Errorf("Hypot(%g, %g) = %g, want %g", 1e200*tanh[i], 1e200*tanh[i], f, a)
+		}
+	}
+	for i := 0; i < len(vfhypotSC); i++ {
+		if f := Hypot(vfhypotSC[i][0], vfhypotSC[i][1]); !alike(hypotSC[i], f) {
+			t.Errorf("Hypot(%g, %g) = %g, want %g", vfhypotSC[i][0], vfhypotSC[i][1], f, hypotSC[i])
+		}
+	}
+}
+
+func TestHypotGo(t *testing.T) {
+	for i := 0; i < len(vf); i++ {
+		a := Abs(1e200 * tanh[i] * Sqrt(2))
+		if f := HypotGo(1e200*tanh[i], 1e200*tanh[i]); !veryclose(a, f) {
+			t.Errorf("HypotGo(%g, %g) = %g, want %g", 1e200*tanh[i], 1e200*tanh[i], f, a)
+		}
+	}
+	for i := 0; i < len(vfhypotSC); i++ {
+		if f := HypotGo(vfhypotSC[i][0], vfhypotSC[i][1]); !alike(hypotSC[i], f) {
+			t.Errorf("HypotGo(%g, %g) = %g, want %g", vfhypotSC[i][0], vfhypotSC[i][1], f, hypotSC[i])
+		}
+	}
+}
+
+func TestIlogb(t *testing.T) {
+	for i := 0; i < len(vf); i++ {
+		a := frexp[i].i - 1 // adjust because fr in the interval [½, 1)
+		if e := Ilogb(vf[i]); a != e {
+			t.Errorf("Ilogb(%g) = %d, want %d", vf[i], e, a)
+		}
+	}
+	for i := 0; i < len(vflogbSC); i++ {
+		if e := Ilogb(vflogbSC[i]); ilogbSC[i] != e {
+			t.Errorf("Ilogb(%g) = %d, want %d", vflogbSC[i], e, ilogbSC[i])
+		}
+	}
+	for i := 0; i < len(vffrexpBC); i++ {
+		if e := Ilogb(vffrexpBC[i]); int(logbBC[i]) != e {
+			t.Errorf("Ilogb(%g) = %d, want %d", vffrexpBC[i], e, int(logbBC[i]))
+		}
+	}
+}
+
+func TestJ0(t *testing.T) {
+	for i := 0; i < len(vf); i++ {
+		if f := J0(vf[i]); !soclose(j0[i], f, 4e-14) {
+			t.Errorf("J0(%g) = %g, want %g", vf[i], f, j0[i])
+		}
+	}
+	for i := 0; i < len(vfj0SC); i++ {
+		if f := J0(vfj0SC[i]); !alike(j0SC[i], f) {
+			t.Errorf("J0(%g) = %g, want %g", vfj0SC[i], f, j0SC[i])
+		}
+	}
+}
+
+func TestJ1(t *testing.T) {
+	for i := 0; i < len(vf); i++ {
+		if f := J1(vf[i]); !close(j1[i], f) {
+			t.Errorf("J1(%g) = %g, want %g", vf[i], f, j1[i])
+		}
+	}
+	for i := 0; i < len(vfj0SC); i++ {
+		if f := J1(vfj0SC[i]); !alike(j1SC[i], f) {
+			t.Errorf("J1(%g) = %g, want %g", vfj0SC[i], f, j1SC[i])
+		}
+	}
+}
+
+func TestJn(t *testing.T) {
+	for i := 0; i < len(vf); i++ {
+		if f := Jn(2, vf[i]); !close(j2[i], f) {
+			t.Errorf("Jn(2, %g) = %g, want %g", vf[i], f, j2[i])
+		}
+		if f := Jn(-3, vf[i]); !close(jM3[i], f) {
+			t.Errorf("Jn(-3, %g) = %g, want %g", vf[i], f, jM3[i])
+		}
+	}
+	for i := 0; i < len(vfj0SC); i++ {
+		if f := Jn(2, vfj0SC[i]); !alike(j2SC[i], f) {
+			t.Errorf("Jn(2, %g) = %g, want %g", vfj0SC[i], f, j2SC[i])
+		}
+		if f := Jn(-3, vfj0SC[i]); !alike(jM3SC[i], f) {
+			t.Errorf("Jn(-3, %g) = %g, want %g", vfj0SC[i], f, jM3SC[i])
+		}
+	}
+}
+
+func TestLdexp(t *testing.T) {
+	for i := 0; i < len(vf); i++ {
+		if f := Ldexp(frexp[i].f, frexp[i].i); !veryclose(vf[i], f) {
+			t.Errorf("Ldexp(%g, %d) = %g, want %g", frexp[i].f, frexp[i].i, f, vf[i])
+		}
+	}
+	for i := 0; i < len(vffrexpSC); i++ {
+		if f := Ldexp(frexpSC[i].f, frexpSC[i].i); !alike(vffrexpSC[i], f) {
+			t.Errorf("Ldexp(%g, %d) = %g, want %g", frexpSC[i].f, frexpSC[i].i, f, vffrexpSC[i])
+		}
+	}
+	for i := 0; i < len(vfldexpSC); i++ {
+		if f := Ldexp(vfldexpSC[i].f, vfldexpSC[i].i); !alike(ldexpSC[i], f) {
+			t.Errorf("Ldexp(%g, %d) = %g, want %g", vfldexpSC[i].f, vfldexpSC[i].i, f, ldexpSC[i])
+		}
+	}
+	for i := 0; i < len(vffrexpBC); i++ {
+		if f := Ldexp(frexpBC[i].f, frexpBC[i].i); !alike(vffrexpBC[i], f) {
+			t.Errorf("Ldexp(%g, %d) = %g, want %g", frexpBC[i].f, frexpBC[i].i, f, vffrexpBC[i])
+		}
+	}
+	for i := 0; i < len(vfldexpBC); i++ {
+		if f := Ldexp(vfldexpBC[i].f, vfldexpBC[i].i); !alike(ldexpBC[i], f) {
+			t.Errorf("Ldexp(%g, %d) = %g, want %g", vfldexpBC[i].f, vfldexpBC[i].i, f, ldexpBC[i])
+		}
+	}
+}
+
+func TestLgamma(t *testing.T) {
+	for i := 0; i < len(vf); i++ {
+		if f, s := Lgamma(vf[i]); !close(lgamma[i].f, f) || lgamma[i].i != s {
+			t.Errorf("Lgamma(%g) = %g, %d, want %g, %d", vf[i], f, s, lgamma[i].f, lgamma[i].i)
+		}
+	}
+	for i := 0; i < len(vflgammaSC); i++ {
+		if f, s := Lgamma(vflgammaSC[i]); !alike(lgammaSC[i].f, f) || lgammaSC[i].i != s {
+			t.Errorf("Lgamma(%g) = %g, %d, want %g, %d", vflgammaSC[i], f, s, lgammaSC[i].f, lgammaSC[i].i)
+		}
+	}
+}
+
+func TestLog(t *testing.T) {
+	for i := 0; i < len(vf); i++ {
+		a := Abs(vf[i])
+		if f := Log(a); log[i] != f {
+			t.Errorf("Log(%g) = %g, want %g", a, f, log[i])
+		}
+	}
+	if f := Log(10); f != Ln10 {
+		t.Errorf("Log(%g) = %g, want %g", 10.0, f, Ln10)
+	}
+	for i := 0; i < len(vflogSC); i++ {
+		if f := Log(vflogSC[i]); !alike(logSC[i], f) {
+			t.Errorf("Log(%g) = %g, want %g", vflogSC[i], f, logSC[i])
+		}
+	}
+}
+
+func TestLogb(t *testing.T) {
+	for i := 0; i < len(vf); i++ {
+		if f := Logb(vf[i]); logb[i] != f {
+			t.Errorf("Logb(%g) = %g, want %g", vf[i], f, logb[i])
+		}
+	}
+	for i := 0; i < len(vflogbSC); i++ {
+		if f := Logb(vflogbSC[i]); !alike(logbSC[i], f) {
+			t.Errorf("Logb(%g) = %g, want %g", vflogbSC[i], f, logbSC[i])
+		}
+	}
+	for i := 0; i < len(vffrexpBC); i++ {
+		if f := Logb(vffrexpBC[i]); !alike(logbBC[i], f) {
+			t.Errorf("Logb(%g) = %g, want %g", vffrexpBC[i], f, logbBC[i])
+		}
+	}
+}
+
+func TestLog10(t *testing.T) {
+	for i := 0; i < len(vf); i++ {
+		a := Abs(vf[i])
+		if f := Log10(a); !veryclose(log10[i], f) {
+			t.Errorf("Log10(%g) = %g, want %g", a, f, log10[i])
+		}
+	}
+	if f := Log10(E); f != Log10E {
+		t.Errorf("Log10(%g) = %g, want %g", E, f, Log10E)
+	}
+	for i := 0; i < len(vflogSC); i++ {
+		if f := Log10(vflogSC[i]); !alike(logSC[i], f) {
+			t.Errorf("Log10(%g) = %g, want %g", vflogSC[i], f, logSC[i])
+		}
+	}
+}
+
+func TestLog1p(t *testing.T) {
+	for i := 0; i < len(vf); i++ {
+		a := vf[i] / 100
+		if f := Log1p(a); !veryclose(log1p[i], f) {
+			t.Errorf("Log1p(%g) = %g, want %g", a, f, log1p[i])
+		}
+	}
+	a := 9.0
+	if f := Log1p(a); f != Ln10 {
+		t.Errorf("Log1p(%g) = %g, want %g", a, f, Ln10)
+	}
+	for i := 0; i < len(vflogSC); i++ {
+		if f := Log1p(vflog1pSC[i]); !alike(log1pSC[i], f) {
+			t.Errorf("Log1p(%g) = %g, want %g", vflog1pSC[i], f, log1pSC[i])
+		}
+	}
+}
+
+func TestLog2(t *testing.T) {
+	for i := 0; i < len(vf); i++ {
+		a := Abs(vf[i])
+		if f := Log2(a); !veryclose(log2[i], f) {
+			t.Errorf("Log2(%g) = %g, want %g", a, f, log2[i])
+		}
+	}
+	if f := Log2(E); f != Log2E {
+		t.Errorf("Log2(%g) = %g, want %g", E, f, Log2E)
+	}
+	for i := 0; i < len(vflogSC); i++ {
+		if f := Log2(vflogSC[i]); !alike(logSC[i], f) {
+			t.Errorf("Log2(%g) = %g, want %g", vflogSC[i], f, logSC[i])
+		}
+	}
+	for i := -1074; i <= 1023; i++ {
+		f := Ldexp(1, i)
+		l := Log2(f)
+		if l != float64(i) {
+			t.Errorf("Log2(2**%d) = %g, want %d", i, l, i)
+		}
+	}
+}
+
+func TestModf(t *testing.T) {
+	for i := 0; i < len(vf); i++ {
+		if f, g := Modf(vf[i]); !veryclose(modf[i][0], f) || !veryclose(modf[i][1], g) {
+			t.Errorf("Modf(%g) = %g, %g, want %g, %g", vf[i], f, g, modf[i][0], modf[i][1])
+		}
+	}
+	for i := 0; i < len(vfmodfSC); i++ {
+		if f, g := Modf(vfmodfSC[i]); !alike(modfSC[i][0], f) || !alike(modfSC[i][1], g) {
+			t.Errorf("Modf(%g) = %g, %g, want %g, %g", vfmodfSC[i], f, g, modfSC[i][0], modfSC[i][1])
+		}
+	}
+}
+
+func TestNextafter32(t *testing.T) {
+	for i := 0; i < len(vf); i++ {
+		vfi := float32(vf[i])
+		if f := Nextafter32(vfi, 10); nextafter32[i] != f {
+			t.Errorf("Nextafter32(%g, %g) = %g want %g", vfi, 10.0, f, nextafter32[i])
+		}
+	}
+	for i := 0; i < len(vfnextafter32SC); i++ {
+		if f := Nextafter32(vfnextafter32SC[i][0], vfnextafter32SC[i][1]); !alike(float64(nextafter32SC[i]), float64(f)) {
+			t.Errorf("Nextafter32(%g, %g) = %g want %g", vfnextafter32SC[i][0], vfnextafter32SC[i][1], f, nextafter32SC[i])
+		}
+	}
+}
+
+func TestNextafter64(t *testing.T) {
+	for i := 0; i < len(vf); i++ {
+		if f := Nextafter(vf[i], 10); nextafter64[i] != f {
+			t.Errorf("Nextafter64(%g, %g) = %g want %g", vf[i], 10.0, f, nextafter64[i])
+		}
+	}
+	for i := 0; i < len(vfnextafter64SC); i++ {
+		if f := Nextafter(vfnextafter64SC[i][0], vfnextafter64SC[i][1]); !alike(nextafter64SC[i], f) {
+			t.Errorf("Nextafter64(%g, %g) = %g want %g", vfnextafter64SC[i][0], vfnextafter64SC[i][1], f, nextafter64SC[i])
+		}
+	}
+}
+
+func TestPow(t *testing.T) {
+	for i := 0; i < len(vf); i++ {
+		if f := Pow(10, vf[i]); !close(pow[i], f) {
+			t.Errorf("Pow(10, %g) = %g, want %g", vf[i], f, pow[i])
+		}
+	}
+	for i := 0; i < len(vfpowSC); i++ {
+		if f := Pow(vfpowSC[i][0], vfpowSC[i][1]); !alike(powSC[i], f) {
+			t.Errorf("Pow(%g, %g) = %g, want %g", vfpowSC[i][0], vfpowSC[i][1], f, powSC[i])
+		}
+	}
+}
+
+func TestPow10(t *testing.T) {
+	for i := 0; i < len(vfpow10SC); i++ {
+		if f := Pow10(vfpow10SC[i]); !alike(pow10SC[i], f) {
+			t.Errorf("Pow10(%d) = %g, want %g", vfpow10SC[i], f, pow10SC[i])
+		}
+	}
+}
+
+func TestRemainder(t *testing.T) {
+	for i := 0; i < len(vf); i++ {
+		if f := Remainder(10, vf[i]); remainder[i] != f {
+			t.Errorf("Remainder(10, %g) = %g, want %g", vf[i], f, remainder[i])
+		}
+	}
+	for i := 0; i < len(vffmodSC); i++ {
+		if f := Remainder(vffmodSC[i][0], vffmodSC[i][1]); !alike(fmodSC[i], f) {
+			t.Errorf("Remainder(%g, %g) = %g, want %g", vffmodSC[i][0], vffmodSC[i][1], f, fmodSC[i])
+		}
+	}
+	// verify precision of result for extreme inputs
+	if f := Remainder(5.9790119248836734e+200, 1.1258465975523544); -0.4810497673014966 != f {
+		t.Errorf("Remainder(5.9790119248836734e+200, 1.1258465975523544) = %g, want -0.4810497673014966", f)
+	}
+	// verify that sign is correct when r == 0.
+	test := func(x, y float64) {
+		if r := Remainder(x, y); r == 0 && Signbit(r) != Signbit(x) {
+			t.Errorf("Remainder(x=%f, y=%f) = %f, sign of (zero) result should agree with sign of x", x, y, r)
+		}
+	}
+	for x := 0.0; x <= 3.0; x += 1 {
+		for y := 1.0; y <= 3.0; y += 1 {
+			test(x, y)
+			test(x, -y)
+			test(-x, y)
+			test(-x, -y)
+		}
+	}
+}
+
+func TestRound(t *testing.T) {
+	for i := 0; i < len(vf); i++ {
+		if f := Round(vf[i]); !alike(round[i], f) {
+			t.Errorf("Round(%g) = %g, want %g", vf[i], f, round[i])
+		}
+	}
+	for i := 0; i < len(vfroundSC); i++ {
+		if f := Round(vfroundSC[i][0]); !alike(vfroundSC[i][1], f) {
+			t.Errorf("Round(%g) = %g, want %g", vfroundSC[i][0], f, vfroundSC[i][1])
+		}
+	}
+}
+
+func TestRoundToEven(t *testing.T) {
+	for i := 0; i < len(vf); i++ {
+		if f := RoundToEven(vf[i]); !alike(round[i], f) {
+			t.Errorf("RoundToEven(%g) = %g, want %g", vf[i], f, round[i])
+		}
+	}
+	for i := 0; i < len(vfroundEvenSC); i++ {
+		if f := RoundToEven(vfroundEvenSC[i][0]); !alike(vfroundEvenSC[i][1], f) {
+			t.Errorf("RoundToEven(%g) = %g, want %g", vfroundEvenSC[i][0], f, vfroundEvenSC[i][1])
+		}
+	}
+}
+
+func TestSignbit(t *testing.T) {
+	for i := 0; i < len(vf); i++ {
+		if f := Signbit(vf[i]); signbit[i] != f {
+			t.Errorf("Signbit(%g) = %t, want %t", vf[i], f, signbit[i])
+		}
+	}
+	for i := 0; i < len(vfsignbitSC); i++ {
+		if f := Signbit(vfsignbitSC[i]); signbitSC[i] != f {
+			t.Errorf("Signbit(%g) = %t, want %t", vfsignbitSC[i], f, signbitSC[i])
+		}
+	}
+}
+func TestSin(t *testing.T) {
+	for i := 0; i < len(vf); i++ {
+		if f := Sin(vf[i]); !veryclose(sin[i], f) {
+			t.Errorf("Sin(%g) = %g, want %g", vf[i], f, sin[i])
+		}
+	}
+	for i := 0; i < len(vfsinSC); i++ {
+		if f := Sin(vfsinSC[i]); !alike(sinSC[i], f) {
+			t.Errorf("Sin(%g) = %g, want %g", vfsinSC[i], f, sinSC[i])
+		}
+	}
+}
+
+func TestSincos(t *testing.T) {
+	for i := 0; i < len(vf); i++ {
+		if s, c := Sincos(vf[i]); !veryclose(sin[i], s) || !veryclose(cos[i], c) {
+			t.Errorf("Sincos(%g) = %g, %g want %g, %g", vf[i], s, c, sin[i], cos[i])
+		}
+	}
+}
+
+func TestSinh(t *testing.T) {
+	for i := 0; i < len(vf); i++ {
+		if f := Sinh(vf[i]); !close(sinh[i], f) {
+			t.Errorf("Sinh(%g) = %g, want %g", vf[i], f, sinh[i])
+		}
+	}
+	for i := 0; i < len(vfsinhSC); i++ {
+		if f := Sinh(vfsinhSC[i]); !alike(sinhSC[i], f) {
+			t.Errorf("Sinh(%g) = %g, want %g", vfsinhSC[i], f, sinhSC[i])
+		}
+	}
+}
+
+func TestSqrt(t *testing.T) {
+	for i := 0; i < len(vf); i++ {
+		a := Abs(vf[i])
+		if f := SqrtGo(a); sqrt[i] != f {
+			t.Errorf("SqrtGo(%g) = %g, want %g", a, f, sqrt[i])
+		}
+		a = Abs(vf[i])
+		if f := Sqrt(a); sqrt[i] != f {
+			t.Errorf("Sqrt(%g) = %g, want %g", a, f, sqrt[i])
+		}
+	}
+	for i := 0; i < len(vfsqrtSC); i++ {
+		if f := SqrtGo(vfsqrtSC[i]); !alike(sqrtSC[i], f) {
+			t.Errorf("SqrtGo(%g) = %g, want %g", vfsqrtSC[i], f, sqrtSC[i])
+		}
+		if f := Sqrt(vfsqrtSC[i]); !alike(sqrtSC[i], f) {
+			t.Errorf("Sqrt(%g) = %g, want %g", vfsqrtSC[i], f, sqrtSC[i])
+		}
+	}
+}
+
+func TestTan(t *testing.T) {
+	for i := 0; i < len(vf); i++ {
+		if f := Tan(vf[i]); !veryclose(tan[i], f) {
+			t.Errorf("Tan(%g) = %g, want %g", vf[i], f, tan[i])
+		}
+	}
+	// same special cases as Sin
+	for i := 0; i < len(vfsinSC); i++ {
+		if f := Tan(vfsinSC[i]); !alike(sinSC[i], f) {
+			t.Errorf("Tan(%g) = %g, want %g", vfsinSC[i], f, sinSC[i])
+		}
+	}
+}
+
+func TestTanh(t *testing.T) {
+	for i := 0; i < len(vf); i++ {
+		if f := Tanh(vf[i]); !veryclose(tanh[i], f) {
+			t.Errorf("Tanh(%g) = %g, want %g", vf[i], f, tanh[i])
+		}
+	}
+	for i := 0; i < len(vftanhSC); i++ {
+		if f := Tanh(vftanhSC[i]); !alike(tanhSC[i], f) {
+			t.Errorf("Tanh(%g) = %g, want %g", vftanhSC[i], f, tanhSC[i])
+		}
+	}
+}
+
+func TestTrunc(t *testing.T) {
+	for i := 0; i < len(vf); i++ {
+		if f := Trunc(vf[i]); !alike(trunc[i], f) {
+			t.Errorf("Trunc(%g) = %g, want %g", vf[i], f, trunc[i])
+		}
+	}
+	for i := 0; i < len(vfceilSC); i++ {
+		if f := Trunc(vfceilSC[i]); !alike(ceilSC[i], f) {
+			t.Errorf("Trunc(%g) = %g, want %g", vfceilSC[i], f, ceilSC[i])
+		}
+	}
+}
+
+func TestY0(t *testing.T) {
+	for i := 0; i < len(vf); i++ {
+		a := Abs(vf[i])
+		if f := Y0(a); !close(y0[i], f) {
+			t.Errorf("Y0(%g) = %g, want %g", a, f, y0[i])
+		}
+	}
+	for i := 0; i < len(vfy0SC); i++ {
+		if f := Y0(vfy0SC[i]); !alike(y0SC[i], f) {
+			t.Errorf("Y0(%g) = %g, want %g", vfy0SC[i], f, y0SC[i])
+		}
+	}
+}
+
+func TestY1(t *testing.T) {
+	for i := 0; i < len(vf); i++ {
+		a := Abs(vf[i])
+		if f := Y1(a); !soclose(y1[i], f, 2e-14) {
+			t.Errorf("Y1(%g) = %g, want %g", a, f, y1[i])
+		}
+	}
+	for i := 0; i < len(vfy0SC); i++ {
+		if f := Y1(vfy0SC[i]); !alike(y1SC[i], f) {
+			t.Errorf("Y1(%g) = %g, want %g", vfy0SC[i], f, y1SC[i])
+		}
+	}
+}
+
+func TestYn(t *testing.T) {
+	for i := 0; i < len(vf); i++ {
+		a := Abs(vf[i])
+		if f := Yn(2, a); !close(y2[i], f) {
+			t.Errorf("Yn(2, %g) = %g, want %g", a, f, y2[i])
+		}
+		if f := Yn(-3, a); !close(yM3[i], f) {
+			t.Errorf("Yn(-3, %g) = %g, want %g", a, f, yM3[i])
+		}
+	}
+	for i := 0; i < len(vfy0SC); i++ {
+		if f := Yn(2, vfy0SC[i]); !alike(y2SC[i], f) {
+			t.Errorf("Yn(2, %g) = %g, want %g", vfy0SC[i], f, y2SC[i])
+		}
+		if f := Yn(-3, vfy0SC[i]); !alike(yM3SC[i], f) {
+			t.Errorf("Yn(-3, %g) = %g, want %g", vfy0SC[i], f, yM3SC[i])
+		}
+	}
+	if f := Yn(0, 0); !alike(Inf(-1), f) {
+		t.Errorf("Yn(0, 0) = %g, want %g", f, Inf(-1))
+	}
+}
+
+var PortableFMA = FMA // hide call from compiler intrinsic; falls back to portable code
+
+func TestFMA(t *testing.T) {
+	for _, c := range fmaC {
+		got := FMA(c.x, c.y, c.z)
+		if !alike(got, c.want) {
+			t.Errorf("FMA(%g,%g,%g) == %g; want %g", c.x, c.y, c.z, got, c.want)
+		}
+		got = PortableFMA(c.x, c.y, c.z)
+		if !alike(got, c.want) {
+			t.Errorf("PortableFMA(%g,%g,%g) == %g; want %g", c.x, c.y, c.z, got, c.want)
+		}
+	}
+}
+
+//go:noinline
+func fmsub(x, y, z float64) float64 {
+	return FMA(x, y, -z)
+}
+
+//go:noinline
+func fnmsub(x, y, z float64) float64 {
+	return FMA(-x, y, z)
+}
+
+//go:noinline
+func fnmadd(x, y, z float64) float64 {
+	return FMA(-x, y, -z)
+}
+
+func TestFMANegativeArgs(t *testing.T) {
+	// Some architectures have instructions for fused multiply-subtract and
+	// also negated variants of fused multiply-add and subtract. This test
+	// aims to check that the optimizations that generate those instructions
+	// are applied correctly, if they exist.
+	for _, c := range fmaC {
+		want := PortableFMA(c.x, c.y, -c.z)
+		got := fmsub(c.x, c.y, c.z)
+		if !alike(got, want) {
+			t.Errorf("FMA(%g, %g, -(%g)) == %g, want %g", c.x, c.y, c.z, got, want)
+		}
+		want = PortableFMA(-c.x, c.y, c.z)
+		got = fnmsub(c.x, c.y, c.z)
+		if !alike(got, want) {
+			t.Errorf("FMA(-(%g), %g, %g) == %g, want %g", c.x, c.y, c.z, got, want)
+		}
+		want = PortableFMA(-c.x, c.y, -c.z)
+		got = fnmadd(c.x, c.y, c.z)
+		if !alike(got, want) {
+			t.Errorf("FMA(-(%g), %g, -(%g)) == %g, want %g", c.x, c.y, c.z, got, want)
+		}
+	}
+}
+
+// Check that math functions of high angle values
+// return accurate results. [Since (vf[i] + large) - large != vf[i],
+// testing for Trig(vf[i] + large) == Trig(vf[i]), where large is
+// a multiple of 2*Pi, is misleading.]
+func TestLargeCos(t *testing.T) {
+	large := float64(100000 * Pi)
+	for i := 0; i < len(vf); i++ {
+		f1 := cosLarge[i]
+		f2 := Cos(vf[i] + large)
+		if !close(f1, f2) {
+			t.Errorf("Cos(%g) = %g, want %g", vf[i]+large, f2, f1)
+		}
+	}
+}
+
+func TestLargeSin(t *testing.T) {
+	large := float64(100000 * Pi)
+	for i := 0; i < len(vf); i++ {
+		f1 := sinLarge[i]
+		f2 := Sin(vf[i] + large)
+		if !close(f1, f2) {
+			t.Errorf("Sin(%g) = %g, want %g", vf[i]+large, f2, f1)
+		}
+	}
+}
+
+func TestLargeSincos(t *testing.T) {
+	large := float64(100000 * Pi)
+	for i := 0; i < len(vf); i++ {
+		f1, g1 := sinLarge[i], cosLarge[i]
+		f2, g2 := Sincos(vf[i] + large)
+		if !close(f1, f2) || !close(g1, g2) {
+			t.Errorf("Sincos(%g) = %g, %g, want %g, %g", vf[i]+large, f2, g2, f1, g1)
+		}
+	}
+}
+
+func TestLargeTan(t *testing.T) {
+	large := float64(100000 * Pi)
+	for i := 0; i < len(vf); i++ {
+		f1 := tanLarge[i]
+		f2 := Tan(vf[i] + large)
+		if !close(f1, f2) {
+			t.Errorf("Tan(%g) = %g, want %g", vf[i]+large, f2, f1)
+		}
+	}
+}
+
+// Check that trigReduce matches the standard reduction results for input values
+// below reduceThreshold.
+func TestTrigReduce(t *testing.T) {
+	inputs := make([]float64, len(vf))
+	// all of the standard inputs
+	copy(inputs, vf)
+	// all of the large inputs
+	large := float64(100000 * Pi)
+	for _, v := range vf {
+		inputs = append(inputs, v+large)
+	}
+	// Also test some special inputs, Pi and right below the reduceThreshold
+	inputs = append(inputs, Pi, Nextafter(ReduceThreshold, 0))
+	for _, x := range inputs {
+		// reduce the value to compare
+		j, z := TrigReduce(x)
+		xred := float64(j)*(Pi/4) + z
+
+		if f, fred := Sin(x), Sin(xred); !close(f, fred) {
+			t.Errorf("Sin(trigReduce(%g)) != Sin(%g), got %g, want %g", x, x, fred, f)
+		}
+		if f, fred := Cos(x), Cos(xred); !close(f, fred) {
+			t.Errorf("Cos(trigReduce(%g)) != Cos(%g), got %g, want %g", x, x, fred, f)
+		}
+		if f, fred := Tan(x), Tan(xred); !close(f, fred) {
+			t.Errorf(" Tan(trigReduce(%g)) != Tan(%g), got %g, want %g", x, x, fred, f)
+		}
+		f, g := Sincos(x)
+		fred, gred := Sincos(xred)
+		if !close(f, fred) || !close(g, gred) {
+			t.Errorf(" Sincos(trigReduce(%g)) != Sincos(%g), got %g, %g, want %g, %g", x, x, fred, gred, f, g)
+		}
+	}
+}
+
+// Check that math constants are accepted by compiler
+// and have right value (assumes strconv.ParseFloat works).
+// https://golang.org/issue/201
+
+type floatTest struct {
+	val  any
+	name string
+	str  string
+}
+
+var floatTests = []floatTest{
+	{float64(MaxFloat64), "MaxFloat64", "1.7976931348623157e+308"},
+	{float64(SmallestNonzeroFloat64), "SmallestNonzeroFloat64", "5e-324"},
+	{float32(MaxFloat32), "MaxFloat32", "3.4028235e+38"},
+	{float32(SmallestNonzeroFloat32), "SmallestNonzeroFloat32", "1e-45"},
+}
+
+func TestFloatMinMax(t *testing.T) {
+	for _, tt := range floatTests {
+		s := fmt.Sprint(tt.val)
+		if s != tt.str {
+			t.Errorf("Sprint(%v) = %s, want %s", tt.name, s, tt.str)
+		}
+	}
+}
+
+func TestFloatMinima(t *testing.T) {
+	if q := float32(SmallestNonzeroFloat32 / 2); q != 0 {
+		t.Errorf("float32(SmallestNonzeroFloat32 / 2) = %g, want 0", q)
+	}
+	if q := float64(SmallestNonzeroFloat64 / 2); q != 0 {
+		t.Errorf("float64(SmallestNonzeroFloat64 / 2) = %g, want 0", q)
+	}
+}
+
+var indirectSqrt = Sqrt
+
+// TestFloat32Sqrt checks the correctness of the float32 square root optimization result.
+func TestFloat32Sqrt(t *testing.T) {
+	for _, v := range sqrt32 {
+		want := float32(indirectSqrt(float64(v)))
+		got := float32(Sqrt(float64(v)))
+		if IsNaN(float64(want)) {
+			if !IsNaN(float64(got)) {
+				t.Errorf("got=%#v want=NaN, v=%#v", got, v)
+			}
+			continue
+		}
+		if got != want {
+			t.Errorf("got=%#v want=%#v, v=%#v", got, want, v)
+		}
+	}
+}
+
+// Benchmarks
+
+// Global exported variables are used to store the
+// return values of functions measured in the benchmarks.
+// Storing the results in these variables prevents the compiler
+// from completely optimizing the benchmarked functions away.
+var (
+	GlobalI int
+	GlobalB bool
+	GlobalF float64
+)
+
+func BenchmarkAcos(b *testing.B) {
+	x := 0.0
+	for i := 0; i < b.N; i++ {
+		x = Acos(.5)
+	}
+	GlobalF = x
+}
+
+func BenchmarkAcosh(b *testing.B) {
+	x := 0.0
+	for i := 0; i < b.N; i++ {
+		x = Acosh(1.5)
+	}
+	GlobalF = x
+}
+
+func BenchmarkAsin(b *testing.B) {
+	x := 0.0
+	for i := 0; i < b.N; i++ {
+		x = Asin(.5)
+	}
+	GlobalF = x
+}
+
+func BenchmarkAsinh(b *testing.B) {
+	x := 0.0
+	for i := 0; i < b.N; i++ {
+		x = Asinh(.5)
+	}
+	GlobalF = x
+}
+
+func BenchmarkAtan(b *testing.B) {
+	x := 0.0
+	for i := 0; i < b.N; i++ {
+		x = Atan(.5)
+	}
+	GlobalF = x
+}
+
+func BenchmarkAtanh(b *testing.B) {
+	x := 0.0
+	for i := 0; i < b.N; i++ {
+		x = Atanh(.5)
+	}
+	GlobalF = x
+}
+
+func BenchmarkAtan2(b *testing.B) {
+	x := 0.0
+	for i := 0; i < b.N; i++ {
+		x = Atan2(.5, 1)
+	}
+	GlobalF = x
+}
+
+func BenchmarkCbrt(b *testing.B) {
+	x := 0.0
+	for i := 0; i < b.N; i++ {
+		x = Cbrt(10)
+	}
+	GlobalF = x
+}
+
+func BenchmarkCeil(b *testing.B) {
+	x := 0.0
+	for i := 0; i < b.N; i++ {
+		x = Ceil(.5)
+	}
+	GlobalF = x
+}
+
+var copysignNeg = -1.0
+
+func BenchmarkCopysign(b *testing.B) {
+	x := 0.0
+	for i := 0; i < b.N; i++ {
+		x = Copysign(.5, copysignNeg)
+	}
+	GlobalF = x
+}
+
+func BenchmarkCos(b *testing.B) {
+	x := 0.0
+	for i := 0; i < b.N; i++ {
+		x = Cos(.5)
+	}
+	GlobalF = x
+}
+
+func BenchmarkCosh(b *testing.B) {
+	x := 0.0
+	for i := 0; i < b.N; i++ {
+		x = Cosh(2.5)
+	}
+	GlobalF = x
+}
+
+func BenchmarkErf(b *testing.B) {
+	x := 0.0
+	for i := 0; i < b.N; i++ {
+		x = Erf(.5)
+	}
+	GlobalF = x
+}
+
+func BenchmarkErfc(b *testing.B) {
+	x := 0.0
+	for i := 0; i < b.N; i++ {
+		x = Erfc(.5)
+	}
+	GlobalF = x
+}
+
+func BenchmarkErfinv(b *testing.B) {
+	x := 0.0
+	for i := 0; i < b.N; i++ {
+		x = Erfinv(.5)
+	}
+	GlobalF = x
+}
+
+func BenchmarkErfcinv(b *testing.B) {
+	x := 0.0
+	for i := 0; i < b.N; i++ {
+		x = Erfcinv(.5)
+	}
+	GlobalF = x
+}
+
+func BenchmarkExp(b *testing.B) {
+	x := 0.0
+	for i := 0; i < b.N; i++ {
+		x = Exp(.5)
+	}
+	GlobalF = x
+}
+
+func BenchmarkExpGo(b *testing.B) {
+	x := 0.0
+	for i := 0; i < b.N; i++ {
+		x = ExpGo(.5)
+	}
+	GlobalF = x
+}
+
+func BenchmarkExpm1(b *testing.B) {
+	x := 0.0
+	for i := 0; i < b.N; i++ {
+		x = Expm1(.5)
+	}
+	GlobalF = x
+}
+
+func BenchmarkExp2(b *testing.B) {
+	x := 0.0
+	for i := 0; i < b.N; i++ {
+		x = Exp2(.5)
+	}
+	GlobalF = x
+}
+
+func BenchmarkExp2Go(b *testing.B) {
+	x := 0.0
+	for i := 0; i < b.N; i++ {
+		x = Exp2Go(.5)
+	}
+	GlobalF = x
+}
+
+var absPos = .5
+
+func BenchmarkAbs(b *testing.B) {
+	x := 0.0
+	for i := 0; i < b.N; i++ {
+		x = Abs(absPos)
+	}
+	GlobalF = x
+
+}
+
+func BenchmarkDim(b *testing.B) {
+	x := 0.0
+	for i := 0; i < b.N; i++ {
+		x = Dim(GlobalF, x)
+	}
+	GlobalF = x
+}
+
+func BenchmarkFloor(b *testing.B) {
+	x := 0.0
+	for i := 0; i < b.N; i++ {
+		x = Floor(.5)
+	}
+	GlobalF = x
+}
+
+func BenchmarkMax(b *testing.B) {
+	x := 0.0
+	for i := 0; i < b.N; i++ {
+		x = Max(10, 3)
+	}
+	GlobalF = x
+}
+
+func BenchmarkMin(b *testing.B) {
+	x := 0.0
+	for i := 0; i < b.N; i++ {
+		x = Min(10, 3)
+	}
+	GlobalF = x
+}
+
+func BenchmarkMod(b *testing.B) {
+	x := 0.0
+	for i := 0; i < b.N; i++ {
+		x = Mod(10, 3)
+	}
+	GlobalF = x
+}
+
+func BenchmarkFrexp(b *testing.B) {
+	x := 0.0
+	y := 0
+	for i := 0; i < b.N; i++ {
+		x, y = Frexp(8)
+	}
+	GlobalF = x
+	GlobalI = y
+}
+
+func BenchmarkGamma(b *testing.B) {
+	x := 0.0
+	for i := 0; i < b.N; i++ {
+		x = Gamma(2.5)
+	}
+	GlobalF = x
+}
+
+func BenchmarkHypot(b *testing.B) {
+	x := 0.0
+	for i := 0; i < b.N; i++ {
+		x = Hypot(3, 4)
+	}
+	GlobalF = x
+}
+
+func BenchmarkHypotGo(b *testing.B) {
+	x := 0.0
+	for i := 0; i < b.N; i++ {
+		x = HypotGo(3, 4)
+	}
+	GlobalF = x
+}
+
+func BenchmarkIlogb(b *testing.B) {
+	x := 0
+	for i := 0; i < b.N; i++ {
+		x = Ilogb(.5)
+	}
+	GlobalI = x
+}
+
+func BenchmarkJ0(b *testing.B) {
+	x := 0.0
+	for i := 0; i < b.N; i++ {
+		x = J0(2.5)
+	}
+	GlobalF = x
+}
+
+func BenchmarkJ1(b *testing.B) {
+	x := 0.0
+	for i := 0; i < b.N; i++ {
+		x = J1(2.5)
+	}
+	GlobalF = x
+}
+
+func BenchmarkJn(b *testing.B) {
+	x := 0.0
+	for i := 0; i < b.N; i++ {
+		x = Jn(2, 2.5)
+	}
+	GlobalF = x
+}
+
+func BenchmarkLdexp(b *testing.B) {
+	x := 0.0
+	for i := 0; i < b.N; i++ {
+		x = Ldexp(.5, 2)
+	}
+	GlobalF = x
+}
+
+func BenchmarkLgamma(b *testing.B) {
+	x := 0.0
+	y := 0
+	for i := 0; i < b.N; i++ {
+		x, y = Lgamma(2.5)
+	}
+	GlobalF = x
+	GlobalI = y
+}
+
+func BenchmarkLog(b *testing.B) {
+	x := 0.0
+	for i := 0; i < b.N; i++ {
+		x = Log(.5)
+	}
+	GlobalF = x
+}
+
+func BenchmarkLogb(b *testing.B) {
+	x := 0.0
+	for i := 0; i < b.N; i++ {
+		x = Logb(.5)
+	}
+	GlobalF = x
+}
+
+func BenchmarkLog1p(b *testing.B) {
+	x := 0.0
+	for i := 0; i < b.N; i++ {
+		x = Log1p(.5)
+	}
+	GlobalF = x
+}
+
+func BenchmarkLog10(b *testing.B) {
+	x := 0.0
+	for i := 0; i < b.N; i++ {
+		x = Log10(.5)
+	}
+	GlobalF = x
+}
+
+func BenchmarkLog2(b *testing.B) {
+	x := 0.0
+	for i := 0; i < b.N; i++ {
+		x = Log2(.5)
+	}
+	GlobalF += x
+}
+
+func BenchmarkModf(b *testing.B) {
+	x := 0.0
+	y := 0.0
+	for i := 0; i < b.N; i++ {
+		x, y = Modf(1.5)
+	}
+	GlobalF += x
+	GlobalF += y
+}
+
+func BenchmarkNextafter32(b *testing.B) {
+	x := float32(0.0)
+	for i := 0; i < b.N; i++ {
+		x = Nextafter32(.5, 1)
+	}
+	GlobalF = float64(x)
+}
+
+func BenchmarkNextafter64(b *testing.B) {
+	x := 0.0
+	for i := 0; i < b.N; i++ {
+		x = Nextafter(.5, 1)
+	}
+	GlobalF = x
+}
+
+func BenchmarkPowInt(b *testing.B) {
+	x := 0.0
+	for i := 0; i < b.N; i++ {
+		x = Pow(2, 2)
+	}
+	GlobalF = x
+}
+
+func BenchmarkPowFrac(b *testing.B) {
+	x := 0.0
+	for i := 0; i < b.N; i++ {
+		x = Pow(2.5, 1.5)
+	}
+	GlobalF = x
+}
+
+var pow10pos = int(300)
+
+func BenchmarkPow10Pos(b *testing.B) {
+	x := 0.0
+	for i := 0; i < b.N; i++ {
+		x = Pow10(pow10pos)
+	}
+	GlobalF = x
+}
+
+var pow10neg = int(-300)
+
+func BenchmarkPow10Neg(b *testing.B) {
+	x := 0.0
+	for i := 0; i < b.N; i++ {
+		x = Pow10(pow10neg)
+	}
+	GlobalF = x
+}
+
+var roundNeg = float64(-2.5)
+
+func BenchmarkRound(b *testing.B) {
+	x := 0.0
+	for i := 0; i < b.N; i++ {
+		x = Round(roundNeg)
+	}
+	GlobalF = x
+}
+
+func BenchmarkRoundToEven(b *testing.B) {
+	x := 0.0
+	for i := 0; i < b.N; i++ {
+		x = RoundToEven(roundNeg)
+	}
+	GlobalF = x
+}
+
+func BenchmarkRemainder(b *testing.B) {
+	x := 0.0
+	for i := 0; i < b.N; i++ {
+		x = Remainder(10, 3)
+	}
+	GlobalF = x
+}
+
+var signbitPos = 2.5
+
+func BenchmarkSignbit(b *testing.B) {
+	x := false
+	for i := 0; i < b.N; i++ {
+		x = Signbit(signbitPos)
+	}
+	GlobalB = x
+}
+
+func BenchmarkSin(b *testing.B) {
+	x := 0.0
+	for i := 0; i < b.N; i++ {
+		x = Sin(.5)
+	}
+	GlobalF = x
+}
+
+func BenchmarkSincos(b *testing.B) {
+	x := 0.0
+	y := 0.0
+	for i := 0; i < b.N; i++ {
+		x, y = Sincos(.5)
+	}
+	GlobalF += x
+	GlobalF += y
+}
+
+func BenchmarkSinh(b *testing.B) {
+	x := 0.0
+	for i := 0; i < b.N; i++ {
+		x = Sinh(2.5)
+	}
+	GlobalF = x
+}
+
+func BenchmarkSqrtIndirect(b *testing.B) {
+	x, y := 0.0, 10.0
+	f := Sqrt
+	for i := 0; i < b.N; i++ {
+		x += f(y)
+	}
+	GlobalF = x
+}
+
+func BenchmarkSqrtLatency(b *testing.B) {
+	x := 10.0
+	for i := 0; i < b.N; i++ {
+		x = Sqrt(x)
+	}
+	GlobalF = x
+}
+
+func BenchmarkSqrtIndirectLatency(b *testing.B) {
+	x := 10.0
+	f := Sqrt
+	for i := 0; i < b.N; i++ {
+		x = f(x)
+	}
+	GlobalF = x
+}
+
+func BenchmarkSqrtGoLatency(b *testing.B) {
+	x := 10.0
+	for i := 0; i < b.N; i++ {
+		x = SqrtGo(x)
+	}
+	GlobalF = x
+}
+
+func isPrime(i int) bool {
+	// Yes, this is a dumb way to write this code,
+	// but calling Sqrt repeatedly in this way demonstrates
+	// the benefit of using a direct SQRT instruction on systems
+	// that have one, whereas the obvious loop seems not to
+	// demonstrate such a benefit.
+	for j := 2; float64(j) <= Sqrt(float64(i)); j++ {
+		if i%j == 0 {
+			return false
+		}
+	}
+	return true
+}
+
+func BenchmarkSqrtPrime(b *testing.B) {
+	x := false
+	for i := 0; i < b.N; i++ {
+		x = isPrime(100003)
+	}
+	GlobalB = x
+}
+
+func BenchmarkTan(b *testing.B) {
+	x := 0.0
+	for i := 0; i < b.N; i++ {
+		x = Tan(.5)
+	}
+	GlobalF = x
+}
+
+func BenchmarkTanh(b *testing.B) {
+	x := 0.0
+	for i := 0; i < b.N; i++ {
+		x = Tanh(2.5)
+	}
+	GlobalF = x
+}
+func BenchmarkTrunc(b *testing.B) {
+	x := 0.0
+	for i := 0; i < b.N; i++ {
+		x = Trunc(.5)
+	}
+	GlobalF = x
+}
+
+func BenchmarkY0(b *testing.B) {
+	x := 0.0
+	for i := 0; i < b.N; i++ {
+		x = Y0(2.5)
+	}
+	GlobalF = x
+}
+
+func BenchmarkY1(b *testing.B) {
+	x := 0.0
+	for i := 0; i < b.N; i++ {
+		x = Y1(2.5)
+	}
+	GlobalF = x
+}
+
+func BenchmarkYn(b *testing.B) {
+	x := 0.0
+	for i := 0; i < b.N; i++ {
+		x = Yn(2, 2.5)
+	}
+	GlobalF = x
+}
+
+func BenchmarkFloat64bits(b *testing.B) {
+	y := uint64(0)
+	for i := 0; i < b.N; i++ {
+		y = Float64bits(roundNeg)
+	}
+	GlobalI = int(y)
+}
+
+var roundUint64 = uint64(5)
+
+func BenchmarkFloat64frombits(b *testing.B) {
+	x := 0.0
+	for i := 0; i < b.N; i++ {
+		x = Float64frombits(roundUint64)
+	}
+	GlobalF = x
+}
+
+var roundFloat32 = float32(-2.5)
+
+func BenchmarkFloat32bits(b *testing.B) {
+	y := uint32(0)
+	for i := 0; i < b.N; i++ {
+		y = Float32bits(roundFloat32)
+	}
+	GlobalI = int(y)
+}
+
+var roundUint32 = uint32(5)
+
+func BenchmarkFloat32frombits(b *testing.B) {
+	x := float32(0.0)
+	for i := 0; i < b.N; i++ {
+		x = Float32frombits(roundUint32)
+	}
+	GlobalF = float64(x)
+}
+
+func BenchmarkFMA(b *testing.B) {
+	x := 0.0
+	for i := 0; i < b.N; i++ {
+		x = FMA(E, Pi, x)
+	}
+	GlobalF = x
+}
diff --git a/src/math/arith_s390x.go b/src/math/arith_s390x.go
new file mode 100644
index 0000000..129156a
--- /dev/null
+++ b/src/math/arith_s390x.go
@@ -0,0 +1,170 @@
+// Copyright 2016 The Go Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+package math
+
+import "internal/cpu"
+
+func expTrampolineSetup(x float64) float64
+func expAsm(x float64) float64
+
+func logTrampolineSetup(x float64) float64
+func logAsm(x float64) float64
+
+// Below here all functions are grouped in stubs.go for other
+// architectures.
+
+const haveArchLog10 = true
+
+func archLog10(x float64) float64
+func log10TrampolineSetup(x float64) float64
+func log10Asm(x float64) float64
+
+const haveArchCos = true
+
+func archCos(x float64) float64
+func cosTrampolineSetup(x float64) float64
+func cosAsm(x float64) float64
+
+const haveArchCosh = true
+
+func archCosh(x float64) float64
+func coshTrampolineSetup(x float64) float64
+func coshAsm(x float64) float64
+
+const haveArchSin = true
+
+func archSin(x float64) float64
+func sinTrampolineSetup(x float64) float64
+func sinAsm(x float64) float64
+
+const haveArchSinh = true
+
+func archSinh(x float64) float64
+func sinhTrampolineSetup(x float64) float64
+func sinhAsm(x float64) float64
+
+const haveArchTanh = true
+
+func archTanh(x float64) float64
+func tanhTrampolineSetup(x float64) float64
+func tanhAsm(x float64) float64
+
+const haveArchLog1p = true
+
+func archLog1p(x float64) float64
+func log1pTrampolineSetup(x float64) float64
+func log1pAsm(x float64) float64
+
+const haveArchAtanh = true
+
+func archAtanh(x float64) float64
+func atanhTrampolineSetup(x float64) float64
+func atanhAsm(x float64) float64
+
+const haveArchAcos = true
+
+func archAcos(x float64) float64
+func acosTrampolineSetup(x float64) float64
+func acosAsm(x float64) float64
+
+const haveArchAcosh = true
+
+func archAcosh(x float64) float64
+func acoshTrampolineSetup(x float64) float64
+func acoshAsm(x float64) float64
+
+const haveArchAsin = true
+
+func archAsin(x float64) float64
+func asinTrampolineSetup(x float64) float64
+func asinAsm(x float64) float64
+
+const haveArchAsinh = true
+
+func archAsinh(x float64) float64
+func asinhTrampolineSetup(x float64) float64
+func asinhAsm(x float64) float64
+
+const haveArchErf = true
+
+func archErf(x float64) float64
+func erfTrampolineSetup(x float64) float64
+func erfAsm(x float64) float64
+
+const haveArchErfc = true
+
+func archErfc(x float64) float64
+func erfcTrampolineSetup(x float64) float64
+func erfcAsm(x float64) float64
+
+const haveArchAtan = true
+
+func archAtan(x float64) float64
+func atanTrampolineSetup(x float64) float64
+func atanAsm(x float64) float64
+
+const haveArchAtan2 = true
+
+func archAtan2(y, x float64) float64
+func atan2TrampolineSetup(x, y float64) float64
+func atan2Asm(x, y float64) float64
+
+const haveArchCbrt = true
+
+func archCbrt(x float64) float64
+func cbrtTrampolineSetup(x float64) float64
+func cbrtAsm(x float64) float64
+
+const haveArchTan = true
+
+func archTan(x float64) float64
+func tanTrampolineSetup(x float64) float64
+func tanAsm(x float64) float64
+
+const haveArchExpm1 = true
+
+func archExpm1(x float64) float64
+func expm1TrampolineSetup(x float64) float64
+func expm1Asm(x float64) float64
+
+const haveArchPow = true
+
+func archPow(x, y float64) float64
+func powTrampolineSetup(x, y float64) float64
+func powAsm(x, y float64) float64
+
+const haveArchFrexp = false
+
+func archFrexp(x float64) (float64, int) {
+	panic("not implemented")
+}
+
+const haveArchLdexp = false
+
+func archLdexp(frac float64, exp int) float64 {
+	panic("not implemented")
+}
+
+const haveArchLog2 = false
+
+func archLog2(x float64) float64 {
+	panic("not implemented")
+}
+
+const haveArchMod = false
+
+func archMod(x, y float64) float64 {
+	panic("not implemented")
+}
+
+const haveArchRemainder = false
+
+func archRemainder(x, y float64) float64 {
+	panic("not implemented")
+}
+
+// hasVX reports whether the machine has the z/Architecture
+// vector facility installed and enabled.
+var hasVX = cpu.S390X.HasVX
diff --git a/src/math/arith_s390x_test.go b/src/math/arith_s390x_test.go
new file mode 100644
index 0000000..cfbc7b7
--- /dev/null
+++ b/src/math/arith_s390x_test.go
@@ -0,0 +1,442 @@
+// Copyright 2016 The Go Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+// Tests whether the non vector routines are working, even when the tests are run on a
+// vector-capable machine.
+package math_test
+
+import (
+	. "math"
+	"testing"
+)
+
+func TestCosNovec(t *testing.T) {
+	if !HasVX {
+		t.Skipf("no vector support")
+	}
+	for i := 0; i < len(vf); i++ {
+		if f := CosNoVec(vf[i]); !veryclose(cos[i], f) {
+			t.Errorf("Cos(%g) = %g, want %g", vf[i], f, cos[i])
+		}
+	}
+	for i := 0; i < len(vfcosSC); i++ {
+		if f := CosNoVec(vfcosSC[i]); !alike(cosSC[i], f) {
+			t.Errorf("Cos(%g) = %g, want %g", vfcosSC[i], f, cosSC[i])
+		}
+	}
+}
+
+func TestCoshNovec(t *testing.T) {
+	if !HasVX {
+		t.Skipf("no vector support")
+	}
+	for i := 0; i < len(vf); i++ {
+		if f := CoshNoVec(vf[i]); !close(cosh[i], f) {
+			t.Errorf("Cosh(%g) = %g, want %g", vf[i], f, cosh[i])
+		}
+	}
+	for i := 0; i < len(vfcoshSC); i++ {
+		if f := CoshNoVec(vfcoshSC[i]); !alike(coshSC[i], f) {
+			t.Errorf("Cosh(%g) = %g, want %g", vfcoshSC[i], f, coshSC[i])
+		}
+	}
+}
+func TestSinNovec(t *testing.T) {
+	if !HasVX {
+		t.Skipf("no vector support")
+	}
+	for i := 0; i < len(vf); i++ {
+		if f := SinNoVec(vf[i]); !veryclose(sin[i], f) {
+			t.Errorf("Sin(%g) = %g, want %g", vf[i], f, sin[i])
+		}
+	}
+	for i := 0; i < len(vfsinSC); i++ {
+		if f := SinNoVec(vfsinSC[i]); !alike(sinSC[i], f) {
+			t.Errorf("Sin(%g) = %g, want %g", vfsinSC[i], f, sinSC[i])
+		}
+	}
+}
+
+func TestSinhNovec(t *testing.T) {
+	if !HasVX {
+		t.Skipf("no vector support")
+	}
+	for i := 0; i < len(vf); i++ {
+		if f := SinhNoVec(vf[i]); !close(sinh[i], f) {
+			t.Errorf("Sinh(%g) = %g, want %g", vf[i], f, sinh[i])
+		}
+	}
+	for i := 0; i < len(vfsinhSC); i++ {
+		if f := SinhNoVec(vfsinhSC[i]); !alike(sinhSC[i], f) {
+			t.Errorf("Sinh(%g) = %g, want %g", vfsinhSC[i], f, sinhSC[i])
+		}
+	}
+}
+
+// Check that math functions of high angle values
+// return accurate results. [Since (vf[i] + large) - large != vf[i],
+// testing for Trig(vf[i] + large) == Trig(vf[i]), where large is
+// a multiple of 2*Pi, is misleading.]
+func TestLargeCosNovec(t *testing.T) {
+	if !HasVX {
+		t.Skipf("no vector support")
+	}
+	large := float64(100000 * Pi)
+	for i := 0; i < len(vf); i++ {
+		f1 := cosLarge[i]
+		f2 := CosNoVec(vf[i] + large)
+		if !close(f1, f2) {
+			t.Errorf("Cos(%g) = %g, want %g", vf[i]+large, f2, f1)
+		}
+	}
+}
+
+func TestLargeSinNovec(t *testing.T) {
+	if !HasVX {
+		t.Skipf("no vector support")
+	}
+	large := float64(100000 * Pi)
+	for i := 0; i < len(vf); i++ {
+		f1 := sinLarge[i]
+		f2 := SinNoVec(vf[i] + large)
+		if !close(f1, f2) {
+			t.Errorf("Sin(%g) = %g, want %g", vf[i]+large, f2, f1)
+		}
+	}
+}
+
+func TestLargeTanNovec(t *testing.T) {
+	if !HasVX {
+		t.Skipf("no vector support")
+	}
+	large := float64(100000 * Pi)
+	for i := 0; i < len(vf); i++ {
+		f1 := tanLarge[i]
+		f2 := TanNovec(vf[i] + large)
+		if !close(f1, f2) {
+			t.Errorf("Tan(%g) = %g, want %g", vf[i]+large, f2, f1)
+		}
+	}
+}
+
+func TestTanNovec(t *testing.T) {
+	if !HasVX {
+		t.Skipf("no vector support")
+	}
+	for i := 0; i < len(vf); i++ {
+		if f := TanNovec(vf[i]); !veryclose(tan[i], f) {
+			t.Errorf("Tan(%g) = %g, want %g", vf[i], f, tan[i])
+		}
+	}
+	// same special cases as Sin
+	for i := 0; i < len(vfsinSC); i++ {
+		if f := TanNovec(vfsinSC[i]); !alike(sinSC[i], f) {
+			t.Errorf("Tan(%g) = %g, want %g", vfsinSC[i], f, sinSC[i])
+		}
+	}
+}
+
+func TestTanhNovec(t *testing.T) {
+	if !HasVX {
+		t.Skipf("no vector support")
+	}
+	for i := 0; i < len(vf); i++ {
+		if f := TanhNoVec(vf[i]); !veryclose(tanh[i], f) {
+			t.Errorf("Tanh(%g) = %g, want %g", vf[i], f, tanh[i])
+		}
+	}
+	for i := 0; i < len(vftanhSC); i++ {
+		if f := TanhNoVec(vftanhSC[i]); !alike(tanhSC[i], f) {
+			t.Errorf("Tanh(%g) = %g, want %g", vftanhSC[i], f, tanhSC[i])
+		}
+	}
+
+}
+
+func TestLog10Novec(t *testing.T) {
+	if !HasVX {
+		t.Skipf("no vector support")
+	}
+	for i := 0; i < len(vf); i++ {
+		a := Abs(vf[i])
+		if f := Log10NoVec(a); !veryclose(log10[i], f) {
+			t.Errorf("Log10(%g) = %g, want %g", a, f, log10[i])
+		}
+	}
+	if f := Log10NoVec(E); f != Log10E {
+		t.Errorf("Log10(%g) = %g, want %g", E, f, Log10E)
+	}
+	for i := 0; i < len(vflogSC); i++ {
+		if f := Log10NoVec(vflogSC[i]); !alike(logSC[i], f) {
+			t.Errorf("Log10(%g) = %g, want %g", vflogSC[i], f, logSC[i])
+		}
+	}
+}
+
+func TestLog1pNovec(t *testing.T) {
+	if !HasVX {
+		t.Skipf("no vector support")
+	}
+	for i := 0; i < len(vf); i++ {
+		a := vf[i] / 100
+		if f := Log1pNovec(a); !veryclose(log1p[i], f) {
+			t.Errorf("Log1p(%g) = %g, want %g", a, f, log1p[i])
+		}
+	}
+	a := 9.0
+	if f := Log1pNovec(a); f != Ln10 {
+		t.Errorf("Log1p(%g) = %g, want %g", a, f, Ln10)
+	}
+	for i := 0; i < len(vflogSC); i++ {
+		if f := Log1pNovec(vflog1pSC[i]); !alike(log1pSC[i], f) {
+			t.Errorf("Log1p(%g) = %g, want %g", vflog1pSC[i], f, log1pSC[i])
+		}
+	}
+}
+
+func TestAtanhNovec(t *testing.T) {
+	if !HasVX {
+		t.Skipf("no vector support")
+	}
+	for i := 0; i < len(vf); i++ {
+		a := vf[i] / 10
+		if f := AtanhNovec(a); !veryclose(atanh[i], f) {
+			t.Errorf("Atanh(%g) = %g, want %g", a, f, atanh[i])
+		}
+	}
+	for i := 0; i < len(vfatanhSC); i++ {
+		if f := AtanhNovec(vfatanhSC[i]); !alike(atanhSC[i], f) {
+			t.Errorf("Atanh(%g) = %g, want %g", vfatanhSC[i], f, atanhSC[i])
+		}
+	}
+}
+
+func TestAcosNovec(t *testing.T) {
+	if !HasVX {
+		t.Skipf("no vector support")
+	}
+	for i := 0; i < len(vf); i++ {
+		a := vf[i] / 10
+		if f := AcosNovec(a); !close(acos[i], f) {
+			t.Errorf("Acos(%g) = %g, want %g", a, f, acos[i])
+		}
+	}
+	for i := 0; i < len(vfacosSC); i++ {
+		if f := AcosNovec(vfacosSC[i]); !alike(acosSC[i], f) {
+			t.Errorf("Acos(%g) = %g, want %g", vfacosSC[i], f, acosSC[i])
+		}
+	}
+}
+
+func TestAsinNovec(t *testing.T) {
+	if !HasVX {
+		t.Skipf("no vector support")
+	}
+	for i := 0; i < len(vf); i++ {
+		a := vf[i] / 10
+		if f := AsinNovec(a); !veryclose(asin[i], f) {
+			t.Errorf("Asin(%g) = %g, want %g", a, f, asin[i])
+		}
+	}
+	for i := 0; i < len(vfasinSC); i++ {
+		if f := AsinNovec(vfasinSC[i]); !alike(asinSC[i], f) {
+			t.Errorf("Asin(%g) = %g, want %g", vfasinSC[i], f, asinSC[i])
+		}
+	}
+}
+
+func TestAcoshNovec(t *testing.T) {
+	if !HasVX {
+		t.Skipf("no vector support")
+	}
+	for i := 0; i < len(vf); i++ {
+		a := 1 + Abs(vf[i])
+		if f := AcoshNovec(a); !veryclose(acosh[i], f) {
+			t.Errorf("Acosh(%g) = %g, want %g", a, f, acosh[i])
+		}
+	}
+	for i := 0; i < len(vfacoshSC); i++ {
+		if f := AcoshNovec(vfacoshSC[i]); !alike(acoshSC[i], f) {
+			t.Errorf("Acosh(%g) = %g, want %g", vfacoshSC[i], f, acoshSC[i])
+		}
+	}
+}
+
+func TestAsinhNovec(t *testing.T) {
+	if !HasVX {
+		t.Skipf("no vector support")
+	}
+	for i := 0; i < len(vf); i++ {
+		if f := AsinhNovec(vf[i]); !veryclose(asinh[i], f) {
+			t.Errorf("Asinh(%g) = %g, want %g", vf[i], f, asinh[i])
+		}
+	}
+	for i := 0; i < len(vfasinhSC); i++ {
+		if f := AsinhNovec(vfasinhSC[i]); !alike(asinhSC[i], f) {
+			t.Errorf("Asinh(%g) = %g, want %g", vfasinhSC[i], f, asinhSC[i])
+		}
+	}
+}
+
+func TestErfNovec(t *testing.T) {
+	if !HasVX {
+		t.Skipf("no vector support")
+	}
+	for i := 0; i < len(vf); i++ {
+		a := vf[i] / 10
+		if f := ErfNovec(a); !veryclose(erf[i], f) {
+			t.Errorf("Erf(%g) = %g, want %g", a, f, erf[i])
+		}
+	}
+	for i := 0; i < len(vferfSC); i++ {
+		if f := ErfNovec(vferfSC[i]); !alike(erfSC[i], f) {
+			t.Errorf("Erf(%g) = %g, want %g", vferfSC[i], f, erfSC[i])
+		}
+	}
+}
+
+func TestErfcNovec(t *testing.T) {
+	if !HasVX {
+		t.Skipf("no vector support")
+	}
+	for i := 0; i < len(vf); i++ {
+		a := vf[i] / 10
+		if f := ErfcNovec(a); !veryclose(erfc[i], f) {
+			t.Errorf("Erfc(%g) = %g, want %g", a, f, erfc[i])
+		}
+	}
+	for i := 0; i < len(vferfcSC); i++ {
+		if f := ErfcNovec(vferfcSC[i]); !alike(erfcSC[i], f) {
+			t.Errorf("Erfc(%g) = %g, want %g", vferfcSC[i], f, erfcSC[i])
+		}
+	}
+}
+
+func TestAtanNovec(t *testing.T) {
+	if !HasVX {
+		t.Skipf("no vector support")
+	}
+	for i := 0; i < len(vf); i++ {
+		if f := AtanNovec(vf[i]); !veryclose(atan[i], f) {
+			t.Errorf("Atan(%g) = %g, want %g", vf[i], f, atan[i])
+		}
+	}
+	for i := 0; i < len(vfatanSC); i++ {
+		if f := AtanNovec(vfatanSC[i]); !alike(atanSC[i], f) {
+			t.Errorf("Atan(%g) = %g, want %g", vfatanSC[i], f, atanSC[i])
+		}
+	}
+}
+
+func TestAtan2Novec(t *testing.T) {
+	if !HasVX {
+		t.Skipf("no vector support")
+	}
+	for i := 0; i < len(vf); i++ {
+		if f := Atan2Novec(10, vf[i]); !veryclose(atan2[i], f) {
+			t.Errorf("Atan2(10, %g) = %g, want %g", vf[i], f, atan2[i])
+		}
+	}
+	for i := 0; i < len(vfatan2SC); i++ {
+		if f := Atan2Novec(vfatan2SC[i][0], vfatan2SC[i][1]); !alike(atan2SC[i], f) {
+			t.Errorf("Atan2(%g, %g) = %g, want %g", vfatan2SC[i][0], vfatan2SC[i][1], f, atan2SC[i])
+		}
+	}
+}
+
+func TestCbrtNovec(t *testing.T) {
+	if !HasVX {
+		t.Skipf("no vector support")
+	}
+	for i := 0; i < len(vf); i++ {
+		if f := CbrtNovec(vf[i]); !veryclose(cbrt[i], f) {
+			t.Errorf("Cbrt(%g) = %g, want %g", vf[i], f, cbrt[i])
+		}
+	}
+	for i := 0; i < len(vfcbrtSC); i++ {
+		if f := CbrtNovec(vfcbrtSC[i]); !alike(cbrtSC[i], f) {
+			t.Errorf("Cbrt(%g) = %g, want %g", vfcbrtSC[i], f, cbrtSC[i])
+		}
+	}
+}
+
+func TestLogNovec(t *testing.T) {
+	if !HasVX {
+		t.Skipf("no vector support")
+	}
+	for i := 0; i < len(vf); i++ {
+		a := Abs(vf[i])
+		if f := LogNovec(a); log[i] != f {
+			t.Errorf("Log(%g) = %g, want %g", a, f, log[i])
+		}
+	}
+	if f := LogNovec(10); f != Ln10 {
+		t.Errorf("Log(%g) = %g, want %g", 10.0, f, Ln10)
+	}
+	for i := 0; i < len(vflogSC); i++ {
+		if f := LogNovec(vflogSC[i]); !alike(logSC[i], f) {
+			t.Errorf("Log(%g) = %g, want %g", vflogSC[i], f, logSC[i])
+		}
+	}
+}
+
+func TestExpNovec(t *testing.T) {
+	if !HasVX {
+		t.Skipf("no vector support")
+	}
+	testExpNovec(t, Exp, "Exp")
+	testExpNovec(t, ExpGo, "ExpGo")
+}
+
+func testExpNovec(t *testing.T, Exp func(float64) float64, name string) {
+	for i := 0; i < len(vf); i++ {
+		if f := ExpNovec(vf[i]); !veryclose(exp[i], f) {
+			t.Errorf("%s(%g) = %g, want %g", name, vf[i], f, exp[i])
+		}
+	}
+	for i := 0; i < len(vfexpSC); i++ {
+		if f := ExpNovec(vfexpSC[i]); !alike(expSC[i], f) {
+			t.Errorf("%s(%g) = %g, want %g", name, vfexpSC[i], f, expSC[i])
+		}
+	}
+}
+
+func TestExpm1Novec(t *testing.T) {
+	if !HasVX {
+		t.Skipf("no vector support")
+	}
+	for i := 0; i < len(vf); i++ {
+		a := vf[i] / 100
+		if f := Expm1Novec(a); !veryclose(expm1[i], f) {
+			t.Errorf("Expm1(%g) = %g, want %g", a, f, expm1[i])
+		}
+	}
+	for i := 0; i < len(vf); i++ {
+		a := vf[i] * 10
+		if f := Expm1Novec(a); !close(expm1Large[i], f) {
+			t.Errorf("Expm1(%g) = %g, want %g", a, f, expm1Large[i])
+		}
+	}
+	for i := 0; i < len(vfexpm1SC); i++ {
+		if f := Expm1Novec(vfexpm1SC[i]); !alike(expm1SC[i], f) {
+			t.Errorf("Expm1(%g) = %g, want %g", vfexpm1SC[i], f, expm1SC[i])
+		}
+	}
+}
+
+func TestPowNovec(t *testing.T) {
+	if !HasVX {
+		t.Skipf("no vector support")
+	}
+	for i := 0; i < len(vf); i++ {
+		if f := PowNovec(10, vf[i]); !close(pow[i], f) {
+			t.Errorf("Pow(10, %g) = %g, want %g", vf[i], f, pow[i])
+		}
+	}
+	for i := 0; i < len(vfpowSC); i++ {
+		if f := PowNovec(vfpowSC[i][0], vfpowSC[i][1]); !alike(powSC[i], f) {
+			t.Errorf("Pow(%g, %g) = %g, want %g", vfpowSC[i][0], vfpowSC[i][1], f, powSC[i])
+		}
+	}
+}
diff --git a/src/math/asin.go b/src/math/asin.go
new file mode 100644
index 0000000..8e1b2ab
--- /dev/null
+++ b/src/math/asin.go
@@ -0,0 +1,67 @@
+// Copyright 2009 The Go Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+package math
+
+/*
+	Floating-point arcsine and arccosine.
+
+	They are implemented by computing the arctangent
+	after appropriate range reduction.
+*/
+
+// Asin returns the arcsine, in radians, of x.
+//
+// Special cases are:
+//
+//	Asin(±0) = ±0
+//	Asin(x) = NaN if x < -1 or x > 1
+func Asin(x float64) float64 {
+	if haveArchAsin {
+		return archAsin(x)
+	}
+	return asin(x)
+}
+
+func asin(x float64) float64 {
+	if x == 0 {
+		return x // special case
+	}
+	sign := false
+	if x < 0 {
+		x = -x
+		sign = true
+	}
+	if x > 1 {
+		return NaN() // special case
+	}
+
+	temp := Sqrt(1 - x*x)
+	if x > 0.7 {
+		temp = Pi/2 - satan(temp/x)
+	} else {
+		temp = satan(x / temp)
+	}
+
+	if sign {
+		temp = -temp
+	}
+	return temp
+}
+
+// Acos returns the arccosine, in radians, of x.
+//
+// Special case is:
+//
+//	Acos(x) = NaN if x < -1 or x > 1
+func Acos(x float64) float64 {
+	if haveArchAcos {
+		return archAcos(x)
+	}
+	return acos(x)
+}
+
+func acos(x float64) float64 {
+	return Pi/2 - Asin(x)
+}
diff --git a/src/math/asin_s390x.s b/src/math/asin_s390x.s
new file mode 100644
index 0000000..dc54d05
--- /dev/null
+++ b/src/math/asin_s390x.s
@@ -0,0 +1,162 @@
+// Copyright 2017 The Go Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+#include "textflag.h"
+
+// Minimax polynomial coefficients and other constants
+DATA ·asinrodataL15<> + 0(SB)/8, $-1.309611320495605469
+DATA ·asinrodataL15<> + 8(SB)/8, $0x3ff921fb54442d18
+DATA ·asinrodataL15<> + 16(SB)/8, $0xbff921fb54442d18
+DATA ·asinrodataL15<> + 24(SB)/8, $1.309611320495605469
+DATA ·asinrodataL15<> + 32(SB)/8, $-0.0
+DATA ·asinrodataL15<> + 40(SB)/8, $1.199437040755305217
+DATA ·asinrodataL15<> + 48(SB)/8, $0.166666666666651626E+00
+DATA ·asinrodataL15<> + 56(SB)/8, $0.750000000042621169E-01
+DATA ·asinrodataL15<> + 64(SB)/8, $0.446428567178116477E-01
+DATA ·asinrodataL15<> + 72(SB)/8, $0.303819660378071894E-01
+DATA ·asinrodataL15<> + 80(SB)/8, $0.223715011892010405E-01
+DATA ·asinrodataL15<> + 88(SB)/8, $0.173659424522364952E-01
+DATA ·asinrodataL15<> + 96(SB)/8, $0.137810186504372266E-01
+DATA ·asinrodataL15<> + 104(SB)/8, $0.134066870961173521E-01
+DATA ·asinrodataL15<> + 112(SB)/8, $-.412335502831898721E-02
+DATA ·asinrodataL15<> + 120(SB)/8, $0.867383739532082719E-01
+DATA ·asinrodataL15<> + 128(SB)/8, $-.328765950607171649E+00
+DATA ·asinrodataL15<> + 136(SB)/8, $0.110401073869414626E+01
+DATA ·asinrodataL15<> + 144(SB)/8, $-.270694366992537307E+01
+DATA ·asinrodataL15<> + 152(SB)/8, $0.500196500770928669E+01
+DATA ·asinrodataL15<> + 160(SB)/8, $-.665866959108585165E+01
+DATA ·asinrodataL15<> + 168(SB)/8, $-.344895269334086578E+01
+DATA ·asinrodataL15<> + 176(SB)/8, $0.927437952918301659E+00
+DATA ·asinrodataL15<> + 184(SB)/8, $0.610487478874645653E+01
+DATA ·asinrodataL15<> + 192(SB)/8, $0x7ff8000000000000			//+Inf
+DATA ·asinrodataL15<> + 200(SB)/8, $-1.0
+DATA ·asinrodataL15<> + 208(SB)/8, $1.0
+DATA ·asinrodataL15<> + 216(SB)/8, $1.00000000000000000e-20
+GLOBL ·asinrodataL15<> + 0(SB), RODATA, $224
+
+// Asin returns the arcsine, in radians, of the argument.
+//
+// Special cases are:
+//      Asin(±0) = ±0=
+//      Asin(x) = NaN if x < -1 or x > 1
+// The algorithm used is minimax polynomial approximation
+// with coefficients determined with a Remez exchange algorithm.
+
+TEXT	·asinAsm(SB), NOSPLIT, $0-16
+	FMOVD	x+0(FP), F0
+	MOVD	$·asinrodataL15<>+0(SB), R9
+	LGDR	F0, R7
+	FMOVD	F0, F8
+	SRAD	$32, R7
+	WORD	$0xC0193FE6 //iilf  %r1,1072079005
+	BYTE	$0xA0
+	BYTE	$0x9D
+	WORD	$0xB91700C7 //llgtr %r12,%r7
+	MOVW	R12, R8
+	MOVW	R1, R6
+	CMPBGT	R8, R6, L2
+	WORD	$0xC0193BFF //iilf  %r1,1006632959
+	BYTE	$0xFF
+	BYTE	$0xFF
+	MOVW	R1, R6
+	CMPBGT	R8, R6, L13
+L3:
+	FMOVD	216(R9), F0
+	FMADD	F0, F8, F8
+L1:
+	FMOVD	F8, ret+8(FP)
+	RET
+L2:
+	WORD	$0xC0193FEF	//iilf	%r1,1072693247
+	BYTE	$0xFF
+	BYTE	$0xFF
+	CMPW	R12, R1
+	BLE	L14
+L5:
+	WORD	$0xED0090D0	//cdb	%f0,.L17-.L15(%r9)
+	BYTE	$0x00
+	BYTE	$0x19
+	BEQ		L9
+	WORD	$0xED0090C8	//cdb	%f0,.L18-.L15(%r9)
+	BYTE	$0x00
+	BYTE	$0x19
+	BEQ	L10
+	WFCEDBS	V8, V8, V0
+	BVS	L1
+	FMOVD	192(R9), F8
+	BR	L1
+L13:
+	WFMDB	V0, V0, V10
+L4:
+	WFMDB	V10, V10, V0
+	FMOVD	184(R9), F6
+	FMOVD	176(R9), F2
+	FMOVD	168(R9), F4
+	WFMADB	V0, V2, V6, V2
+	FMOVD	160(R9), F6
+	WFMADB	V0, V4, V6, V4
+	FMOVD	152(R9), F6
+	WFMADB	V0, V2, V6, V2
+	FMOVD	144(R9), F6
+	WFMADB	V0, V4, V6, V4
+	FMOVD	136(R9), F6
+	WFMADB	V0, V2, V6, V2
+	WORD	$0xC0193FE6	//iilf	%r1,1072079005
+	BYTE	$0xA0
+	BYTE	$0x9D
+	FMOVD	128(R9), F6
+	WFMADB	V0, V4, V6, V4
+	FMOVD	120(R9), F6
+	WFMADB	V0, V2, V6, V2
+	FMOVD	112(R9), F6
+	WFMADB	V0, V4, V6, V4
+	FMOVD	104(R9), F6
+	WFMADB	V0, V2, V6, V2
+	FMOVD	96(R9), F6
+	WFMADB	V0, V4, V6, V4
+	FMOVD	88(R9), F6
+	WFMADB	V0, V2, V6, V2
+	FMOVD	80(R9), F6
+	WFMADB	V0, V4, V6, V4
+	FMOVD	72(R9), F6
+	WFMADB	V0, V2, V6, V2
+	FMOVD	64(R9), F6
+	WFMADB	V0, V4, V6, V4
+	FMOVD	56(R9), F6
+	WFMADB	V0, V2, V6, V2
+	FMOVD	48(R9), F6
+	WFMADB	V0, V4, V6, V0
+	WFMDB	V8, V10, V4
+	FMADD	F2, F10, F0
+	FMADD	F0, F4, F8
+	CMPW	R12, R1
+	BLE	L1
+	FMOVD	40(R9), F0
+	FMADD	F0, F1, F8
+	FMOVD	F8, ret+8(FP)
+	RET
+L14:
+	FMOVD	200(R9), F0
+	FMADD	F8, F8, F0
+	WORD	$0xB31300A0	//lcdbr	%f10,%f0
+	WORD	$0xED009020	//cdb	%f0,.L39-.L15(%r9)
+	BYTE	$0x00
+	BYTE	$0x19
+	FSQRT	F10, F8
+L6:
+	MOVW	R7, R6
+	CMPBLE	R6, $0, L8
+	WORD	$0xB3130088	//lcdbr	%f8,%f8
+	FMOVD	24(R9), F1
+	BR	L4
+L10:
+	FMOVD	16(R9), F8
+	BR	L1
+L9:
+	FMOVD	8(R9), F8
+	FMOVD	F8, ret+8(FP)
+	RET
+L8:
+	FMOVD	0(R9), F1
+	BR	L4
diff --git a/src/math/asinh.go b/src/math/asinh.go
new file mode 100644
index 0000000..d913239
--- /dev/null
+++ b/src/math/asinh.go
@@ -0,0 +1,77 @@
+// Copyright 2010 The Go Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+package math
+
+// The original C code, the long comment, and the constants
+// below are from FreeBSD's /usr/src/lib/msun/src/s_asinh.c
+// and came with this notice. The go code is a simplified
+// version of the original C.
+//
+// ====================================================
+// Copyright (C) 1993 by Sun Microsystems, Inc. All rights reserved.
+//
+// Developed at SunPro, a Sun Microsystems, Inc. business.
+// Permission to use, copy, modify, and distribute this
+// software is freely granted, provided that this notice
+// is preserved.
+// ====================================================
+//
+//
+// asinh(x)
+// Method :
+//	Based on
+//	        asinh(x) = sign(x) * log [ |x| + sqrt(x*x+1) ]
+//	we have
+//	asinh(x) := x  if  1+x*x=1,
+//	         := sign(x)*(log(x)+ln2) for large |x|, else
+//	         := sign(x)*log(2|x|+1/(|x|+sqrt(x*x+1))) if|x|>2, else
+//	         := sign(x)*log1p(|x| + x**2/(1 + sqrt(1+x**2)))
+//
+
+// Asinh returns the inverse hyperbolic sine of x.
+//
+// Special cases are:
+//
+//	Asinh(±0) = ±0
+//	Asinh(±Inf) = ±Inf
+//	Asinh(NaN) = NaN
+func Asinh(x float64) float64 {
+	if haveArchAsinh {
+		return archAsinh(x)
+	}
+	return asinh(x)
+}
+
+func asinh(x float64) float64 {
+	const (
+		Ln2      = 6.93147180559945286227e-01 // 0x3FE62E42FEFA39EF
+		NearZero = 1.0 / (1 << 28)            // 2**-28
+		Large    = 1 << 28                    // 2**28
+	)
+	// special cases
+	if IsNaN(x) || IsInf(x, 0) {
+		return x
+	}
+	sign := false
+	if x < 0 {
+		x = -x
+		sign = true
+	}
+	var temp float64
+	switch {
+	case x > Large:
+		temp = Log(x) + Ln2 // |x| > 2**28
+	case x > 2:
+		temp = Log(2*x + 1/(Sqrt(x*x+1)+x)) // 2**28 > |x| > 2.0
+	case x < NearZero:
+		temp = x // |x| < 2**-28
+	default:
+		temp = Log1p(x + x*x/(1+Sqrt(1+x*x))) // 2.0 > |x| > 2**-28
+	}
+	if sign {
+		temp = -temp
+	}
+	return temp
+}
diff --git a/src/math/asinh_s390x.s b/src/math/asinh_s390x.s
new file mode 100644
index 0000000..1bcf295
--- /dev/null
+++ b/src/math/asinh_s390x.s
@@ -0,0 +1,213 @@
+// Copyright 2017 The Go Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+#include "textflag.h"
+
+// Minimax polynomial coefficients and other constants
+DATA ·asinhrodataL18<> + 0(SB)/8, $0.749999999977387502E-01
+DATA ·asinhrodataL18<> + 8(SB)/8, $-.166666666666657082E+00
+DATA ·asinhrodataL18<> + 16(SB)/8, $0.303819368237360639E-01
+DATA ·asinhrodataL18<> + 24(SB)/8, $-.446428569571752982E-01
+DATA ·asinhrodataL18<> + 32(SB)/8, $0.173500047922695924E-01
+DATA ·asinhrodataL18<> + 40(SB)/8, $-.223719767210027185E-01
+DATA ·asinhrodataL18<> + 48(SB)/8, $0.113655037946822130E-01
+DATA ·asinhrodataL18<> + 56(SB)/8, $0.579747490622448943E-02
+DATA ·asinhrodataL18<> + 64(SB)/8, $-.139372433914359122E-01
+DATA ·asinhrodataL18<> + 72(SB)/8, $-.218674325255800840E-02
+DATA ·asinhrodataL18<> + 80(SB)/8, $-.891074277756961157E-02
+DATA ·asinhrodataL18<> + 88(SB)/8, $.41375273347623353626
+DATA ·asinhrodataL18<> + 96(SB)/8, $.51487302528619766235E+04
+DATA ·asinhrodataL18<> + 104(SB)/8, $-1.67526912689208984375
+DATA ·asinhrodataL18<> + 112(SB)/8, $0.181818181818181826E+00
+DATA ·asinhrodataL18<> + 120(SB)/8, $-.165289256198351540E-01
+DATA ·asinhrodataL18<> + 128(SB)/8, $0.200350613573012186E-02
+DATA ·asinhrodataL18<> + 136(SB)/8, $-.273205381970859341E-03
+DATA ·asinhrodataL18<> + 144(SB)/8, $0.397389654305194527E-04
+DATA ·asinhrodataL18<> + 152(SB)/8, $0.938370938292558173E-06
+DATA ·asinhrodataL18<> + 160(SB)/8, $0.212881813645679599E-07
+DATA ·asinhrodataL18<> + 168(SB)/8, $-.602107458843052029E-05
+DATA ·asinhrodataL18<> + 176(SB)/8, $-.148682720127920854E-06
+DATA ·asinhrodataL18<> + 184(SB)/8, $-5.5
+DATA ·asinhrodataL18<> + 192(SB)/8, $1.0
+DATA ·asinhrodataL18<> + 200(SB)/8, $1.0E-20
+GLOBL ·asinhrodataL18<> + 0(SB), RODATA, $208
+
+// Table of log correction terms
+DATA ·asinhtab2080<> + 0(SB)/8, $0.585235384085551248E-01
+DATA ·asinhtab2080<> + 8(SB)/8, $0.412206153771168640E-01
+DATA ·asinhtab2080<> + 16(SB)/8, $0.273839003221648339E-01
+DATA ·asinhtab2080<> + 24(SB)/8, $0.166383778368856480E-01
+DATA ·asinhtab2080<> + 32(SB)/8, $0.866678223433169637E-02
+DATA ·asinhtab2080<> + 40(SB)/8, $0.319831684989627514E-02
+DATA ·asinhtab2080<> + 48(SB)/8, $0.0
+DATA ·asinhtab2080<> + 56(SB)/8, $-.113006378583725549E-02
+DATA ·asinhtab2080<> + 64(SB)/8, $-.367979419636602491E-03
+DATA ·asinhtab2080<> + 72(SB)/8, $0.213172484510484979E-02
+DATA ·asinhtab2080<> + 80(SB)/8, $0.623271047682013536E-02
+DATA ·asinhtab2080<> + 88(SB)/8, $0.118140812789696885E-01
+DATA ·asinhtab2080<> + 96(SB)/8, $0.187681358930914206E-01
+DATA ·asinhtab2080<> + 104(SB)/8, $0.269985148668178992E-01
+DATA ·asinhtab2080<> + 112(SB)/8, $0.364186619761331328E-01
+DATA ·asinhtab2080<> + 120(SB)/8, $0.469505379381388441E-01
+GLOBL ·asinhtab2080<> + 0(SB), RODATA, $128
+
+// Asinh returns the inverse hyperbolic sine of the argument.
+//
+// Special cases are:
+//      Asinh(±0) = ±0
+//      Asinh(±Inf) = ±Inf
+//      Asinh(NaN) = NaN
+// The algorithm used is minimax polynomial approximation
+// with coefficients determined with a Remez exchange algorithm.
+
+TEXT	·asinhAsm(SB), NOSPLIT, $0-16
+	FMOVD	x+0(FP), F0
+	MOVD	$·asinhrodataL18<>+0(SB), R9
+	LGDR	F0, R12
+	WORD	$0xC0293FDF	//iilf	%r2,1071644671
+	BYTE	$0xFF
+	BYTE	$0xFF
+	SRAD	$32, R12
+	WORD	$0xB917001C	//llgtr	%r1,%r12
+	MOVW	R1, R6
+	MOVW	R2, R7
+	CMPBLE	R6, R7, L2
+	WORD	$0xC0295FEF	//iilf	%r2,1609564159
+	BYTE	$0xFF
+	BYTE	$0xFF
+	MOVW	R2, R7
+	CMPBLE	R6, R7, L14
+L3:
+	WORD	$0xC0297FEF	//iilf	%r2,2146435071
+	BYTE	$0xFF
+	BYTE	$0xFF
+	CMPW	R1, R2
+	BGT	L1
+	LTDBR	F0, F0
+	FMOVD	F0, F10
+	BLTU	L15
+L9:
+	FMOVD	$0, F0
+	WFADB	V0, V10, V0
+	WORD	$0xC0398006	//iilf	%r3,2147909631
+	BYTE	$0x7F
+	BYTE	$0xFF
+	LGDR	F0, R5
+	SRAD	$32, R5
+	MOVH	$0x0, R2
+	SUBW	R5, R3
+	FMOVD	$0, F8
+	RISBGZ	$32, $47, $0, R3, R4
+	BYTE	$0x18	//lr	%r1,%r4
+	BYTE	$0x14
+	RISBGN	$0, $31, $32, R4, R2
+	SUBW	$0x100000, R1
+	SRAW	$8, R1, R1
+	ORW	$0x45000000, R1
+	BR	L6
+L2:
+	MOVD	$0x30000000, R2
+	CMPW	R1, R2
+	BGT	L16
+	FMOVD	200(R9), F2
+	FMADD	F2, F0, F0
+L1:
+	FMOVD	F0, ret+8(FP)
+	RET
+L14:
+	LTDBR	F0, F0
+	BLTU	L17
+	FMOVD	F0, F10
+L4:
+	FMOVD	192(R9), F2
+	WFMADB	V0, V0, V2, V0
+	LTDBR	F0, F0
+	FSQRT	F0, F8
+L5:
+	WFADB	V8, V10, V0
+	WORD	$0xC0398006	//iilf	%r3,2147909631
+	BYTE	$0x7F
+	BYTE	$0xFF
+	LGDR	F0, R5
+	SRAD	$32, R5
+	MOVH	$0x0, R2
+	SUBW	R5, R3
+	RISBGZ	$32, $47, $0, R3, R4
+	SRAW	$8, R4, R1
+	RISBGN	$0, $31, $32, R4, R2
+	ORW	$0x45000000, R1
+L6:
+	LDGR	R2, F2
+	FMOVD	184(R9), F0
+	WFMADB	V8, V2, V0, V8
+	FMOVD	176(R9), F4
+	WFMADB	V10, V2, V8, V2
+	FMOVD	168(R9), F0
+	FMOVD	160(R9), F6
+	FMOVD	152(R9), F1
+	WFMADB	V2, V6, V4, V6
+	WFMADB	V2, V1, V0, V1
+	WFMDB	V2, V2, V4
+	FMOVD	144(R9), F0
+	WFMADB	V6, V4, V1, V6
+	FMOVD	136(R9), F1
+	RISBGZ	$57, $60, $51, R3, R3
+	WFMADB	V2, V0, V1, V0
+	FMOVD	128(R9), F1
+	WFMADB	V4, V6, V0, V6
+	FMOVD	120(R9), F0
+	WFMADB	V2, V1, V0, V1
+	VLVGF	$0, R1, V0
+	WFMADB	V4, V6, V1, V4
+	LDEBR	F0, F0
+	FMOVD	112(R9), F6
+	WFMADB	V2, V4, V6, V4
+	MOVD	$·asinhtab2080<>+0(SB), R1
+	FMOVD	104(R9), F1
+	WORD	$0x68331000	//ld	%f3,0(%r3,%r1)
+	FMOVD	96(R9), F6
+	WFMADB	V2, V4, V3, V2
+	WFMADB	V0, V1, V6, V0
+	FMOVD	88(R9), F4
+	WFMADB	V0, V4, V2, V0
+	MOVD	R12, R6
+	CMPBGT	R6, $0, L1
+
+	WORD	$0xB3130000	//lcdbr	%f0,%f0
+	FMOVD	F0, ret+8(FP)
+	RET
+L16:
+	WFMDB	V0, V0, V1
+	FMOVD	80(R9), F6
+	WFMDB	V1, V1, V4
+	FMOVD	72(R9), F2
+	WFMADB	V4, V2, V6, V2
+	FMOVD	64(R9), F3
+	FMOVD	56(R9), F6
+	WFMADB	V4, V2, V3, V2
+	FMOVD	48(R9), F3
+	WFMADB	V4, V6, V3, V6
+	FMOVD	40(R9), F5
+	FMOVD	32(R9), F3
+	WFMADB	V4, V2, V5, V2
+	WFMADB	V4, V6, V3, V6
+	FMOVD	24(R9), F5
+	FMOVD	16(R9), F3
+	WFMADB	V4, V2, V5, V2
+	WFMADB	V4, V6, V3, V6
+	FMOVD	8(R9), F5
+	FMOVD	0(R9), F3
+	WFMADB	V4, V2, V5, V2
+	WFMADB	V4, V6, V3, V4
+	WFMDB	V0, V1, V6
+	WFMADB	V1, V4, V2, V4
+	FMADD	F4, F6, F0
+	FMOVD	F0, ret+8(FP)
+	RET
+L17:
+	WORD	$0xB31300A0	//lcdbr	%f10,%f0
+	BR	L4
+L15:
+	WORD	$0xB31300A0	//lcdbr	%f10,%f0
+	BR	L9
diff --git a/src/math/atan.go b/src/math/atan.go
new file mode 100644
index 0000000..e722e99
--- /dev/null
+++ b/src/math/atan.go
@@ -0,0 +1,111 @@
+// Copyright 2009 The Go Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+package math
+
+/*
+	Floating-point arctangent.
+*/
+
+// The original C code, the long comment, and the constants below were
+// from http://netlib.sandia.gov/cephes/cmath/atan.c, available from
+// http://www.netlib.org/cephes/cmath.tgz.
+// The go code is a version of the original C.
+//
+// atan.c
+// Inverse circular tangent (arctangent)
+//
+// SYNOPSIS:
+// double x, y, atan();
+// y = atan( x );
+//
+// DESCRIPTION:
+// Returns radian angle between -pi/2 and +pi/2 whose tangent is x.
+//
+// Range reduction is from three intervals into the interval from zero to 0.66.
+// The approximant uses a rational function of degree 4/5 of the form
+// x + x**3 P(x)/Q(x).
+//
+// ACCURACY:
+//                      Relative error:
+// arithmetic   domain    # trials  peak     rms
+//    DEC       -10, 10   50000     2.4e-17  8.3e-18
+//    IEEE      -10, 10   10^6      1.8e-16  5.0e-17
+//
+// Cephes Math Library Release 2.8:  June, 2000
+// Copyright 1984, 1987, 1989, 1992, 2000 by Stephen L. Moshier
+//
+// The readme file at http://netlib.sandia.gov/cephes/ says:
+//    Some software in this archive may be from the book _Methods and
+// Programs for Mathematical Functions_ (Prentice-Hall or Simon & Schuster
+// International, 1989) or from the Cephes Mathematical Library, a
+// commercial product. In either event, it is copyrighted by the author.
+// What you see here may be used freely but it comes with no support or
+// guarantee.
+//
+//   The two known misprints in the book are repaired here in the
+// source listings for the gamma function and the incomplete beta
+// integral.
+//
+//   Stephen L. Moshier
+//   moshier@na-net.ornl.gov
+
+// xatan evaluates a series valid in the range [0, 0.66].
+func xatan(x float64) float64 {
+	const (
+		P0 = -8.750608600031904122785e-01
+		P1 = -1.615753718733365076637e+01
+		P2 = -7.500855792314704667340e+01
+		P3 = -1.228866684490136173410e+02
+		P4 = -6.485021904942025371773e+01
+		Q0 = +2.485846490142306297962e+01
+		Q1 = +1.650270098316988542046e+02
+		Q2 = +4.328810604912902668951e+02
+		Q3 = +4.853903996359136964868e+02
+		Q4 = +1.945506571482613964425e+02
+	)
+	z := x * x
+	z = z * ((((P0*z+P1)*z+P2)*z+P3)*z + P4) / (((((z+Q0)*z+Q1)*z+Q2)*z+Q3)*z + Q4)
+	z = x*z + x
+	return z
+}
+
+// satan reduces its argument (known to be positive)
+// to the range [0, 0.66] and calls xatan.
+func satan(x float64) float64 {
+	const (
+		Morebits = 6.123233995736765886130e-17 // pi/2 = PIO2 + Morebits
+		Tan3pio8 = 2.41421356237309504880      // tan(3*pi/8)
+	)
+	if x <= 0.66 {
+		return xatan(x)
+	}
+	if x > Tan3pio8 {
+		return Pi/2 - xatan(1/x) + Morebits
+	}
+	return Pi/4 + xatan((x-1)/(x+1)) + 0.5*Morebits
+}
+
+// Atan returns the arctangent, in radians, of x.
+//
+// Special cases are:
+//
+//	Atan(±0) = ±0
+//	Atan(±Inf) = ±Pi/2
+func Atan(x float64) float64 {
+	if haveArchAtan {
+		return archAtan(x)
+	}
+	return atan(x)
+}
+
+func atan(x float64) float64 {
+	if x == 0 {
+		return x
+	}
+	if x > 0 {
+		return satan(x)
+	}
+	return -satan(-x)
+}
diff --git a/src/math/atan2.go b/src/math/atan2.go
new file mode 100644
index 0000000..c324ed0
--- /dev/null
+++ b/src/math/atan2.go
@@ -0,0 +1,77 @@
+// Copyright 2009 The Go Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+package math
+
+// Atan2 returns the arc tangent of y/x, using
+// the signs of the two to determine the quadrant
+// of the return value.
+//
+// Special cases are (in order):
+//
+//	Atan2(y, NaN) = NaN
+//	Atan2(NaN, x) = NaN
+//	Atan2(+0, x>=0) = +0
+//	Atan2(-0, x>=0) = -0
+//	Atan2(+0, x<=-0) = +Pi
+//	Atan2(-0, x<=-0) = -Pi
+//	Atan2(y>0, 0) = +Pi/2
+//	Atan2(y<0, 0) = -Pi/2
+//	Atan2(+Inf, +Inf) = +Pi/4
+//	Atan2(-Inf, +Inf) = -Pi/4
+//	Atan2(+Inf, -Inf) = 3Pi/4
+//	Atan2(-Inf, -Inf) = -3Pi/4
+//	Atan2(y, +Inf) = 0
+//	Atan2(y>0, -Inf) = +Pi
+//	Atan2(y<0, -Inf) = -Pi
+//	Atan2(+Inf, x) = +Pi/2
+//	Atan2(-Inf, x) = -Pi/2
+func Atan2(y, x float64) float64 {
+	if haveArchAtan2 {
+		return archAtan2(y, x)
+	}
+	return atan2(y, x)
+}
+
+func atan2(y, x float64) float64 {
+	// special cases
+	switch {
+	case IsNaN(y) || IsNaN(x):
+		return NaN()
+	case y == 0:
+		if x >= 0 && !Signbit(x) {
+			return Copysign(0, y)
+		}
+		return Copysign(Pi, y)
+	case x == 0:
+		return Copysign(Pi/2, y)
+	case IsInf(x, 0):
+		if IsInf(x, 1) {
+			switch {
+			case IsInf(y, 0):
+				return Copysign(Pi/4, y)
+			default:
+				return Copysign(0, y)
+			}
+		}
+		switch {
+		case IsInf(y, 0):
+			return Copysign(3*Pi/4, y)
+		default:
+			return Copysign(Pi, y)
+		}
+	case IsInf(y, 0):
+		return Copysign(Pi/2, y)
+	}
+
+	// Call atan and determine the quadrant.
+	q := Atan(y / x)
+	if x < 0 {
+		if q <= 0 {
+			return q + Pi
+		}
+		return q - Pi
+	}
+	return q
+}
diff --git a/src/math/atan2_s390x.s b/src/math/atan2_s390x.s
new file mode 100644
index 0000000..587b89e
--- /dev/null
+++ b/src/math/atan2_s390x.s
@@ -0,0 +1,297 @@
+// Copyright 2017 The Go Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+#include "textflag.h"
+
+#define PosInf		0x7FF0000000000000
+#define NegInf		0xFFF0000000000000
+#define NegZero		0x8000000000000000
+#define Pi		0x400921FB54442D18
+#define NegPi		0xC00921FB54442D18
+#define Pi3Div4		0x4002D97C7F3321D2	// 3Pi/4
+#define NegPi3Div4	0xC002D97C7F3321D2	// -3Pi/4
+#define PiDiv4		0x3FE921FB54442D18	// Pi/4
+#define NegPiDiv4	0xBFE921FB54442D18	// -Pi/4
+
+// Minimax polynomial coefficients and other constants
+DATA ·atan2rodataL25<> + 0(SB)/8, $0.199999999999554423E+00
+DATA ·atan2rodataL25<> + 8(SB)/8, $-.333333333333330928E+00
+DATA ·atan2rodataL25<> + 16(SB)/8, $0.111111110136634272E+00
+DATA ·atan2rodataL25<> + 24(SB)/8, $-.142857142828026806E+00
+DATA ·atan2rodataL25<> + 32(SB)/8, $0.769228118888682505E-01
+DATA ·atan2rodataL25<> + 40(SB)/8, $0.588059263575587687E-01
+DATA ·atan2rodataL25<> + 48(SB)/8, $-.909090711945939878E-01
+DATA ·atan2rodataL25<> + 56(SB)/8, $-.666641501287528609E-01
+DATA ·atan2rodataL25<> + 64(SB)/8, $0.472329433805024762E-01
+DATA ·atan2rodataL25<> + 72(SB)/8, $-.525380587584426406E-01
+DATA ·atan2rodataL25<> + 80(SB)/8, $-.422172007412067035E-01
+DATA ·atan2rodataL25<> + 88(SB)/8, $0.366935664549587481E-01
+DATA ·atan2rodataL25<> + 96(SB)/8, $0.220852012160300086E-01
+DATA ·atan2rodataL25<> + 104(SB)/8, $-.299856214685512712E-01
+DATA ·atan2rodataL25<> + 112(SB)/8, $0.726338160757602439E-02
+DATA ·atan2rodataL25<> + 120(SB)/8, $0.134893651284712515E-04
+DATA ·atan2rodataL25<> + 128(SB)/8, $-.291935324869629616E-02
+DATA ·atan2rodataL25<> + 136(SB)/8, $-.154797890856877418E-03
+DATA ·atan2rodataL25<> + 144(SB)/8, $0.843488472994227321E-03
+DATA ·atan2rodataL25<> + 152(SB)/8, $-.139950258898989925E-01
+GLOBL ·atan2rodataL25<> + 0(SB), RODATA, $160
+
+DATA ·atan2xpi2h<> + 0(SB)/8, $0x3ff330e4e4fa7b1b
+DATA ·atan2xpi2h<> + 8(SB)/8, $0xbff330e4e4fa7b1b
+DATA ·atan2xpi2h<> + 16(SB)/8, $0x400330e4e4fa7b1b
+DATA ·atan2xpi2h<> + 24(SB)/8, $0xc00330e4e4fa7b1b
+GLOBL ·atan2xpi2h<> + 0(SB), RODATA, $32
+DATA ·atan2xpim<> + 0(SB)/8, $0x3ff4f42b00000000
+GLOBL ·atan2xpim<> + 0(SB), RODATA, $8
+
+// Atan2 returns the arc tangent of y/x, using
+// the signs of the two to determine the quadrant
+// of the return value.
+//
+// Special cases are (in order):
+//      Atan2(y, NaN) = NaN
+//      Atan2(NaN, x) = NaN
+//      Atan2(+0, x>=0) = +0
+//      Atan2(-0, x>=0) = -0
+//      Atan2(+0, x<=-0) = +Pi
+//      Atan2(-0, x<=-0) = -Pi
+//      Atan2(y>0, 0) = +Pi/2
+//      Atan2(y<0, 0) = -Pi/2
+//      Atan2(+Inf, +Inf) = +Pi/4
+//      Atan2(-Inf, +Inf) = -Pi/4
+//      Atan2(+Inf, -Inf) = 3Pi/4
+//      Atan2(-Inf, -Inf) = -3Pi/4
+//      Atan2(y, +Inf) = 0
+//      Atan2(y>0, -Inf) = +Pi
+//      Atan2(y<0, -Inf) = -Pi
+//      Atan2(+Inf, x) = +Pi/2
+//      Atan2(-Inf, x) = -Pi/2
+// The algorithm used is minimax polynomial approximation
+// with coefficients determined with a Remez exchange algorithm.
+
+TEXT	·atan2Asm(SB), NOSPLIT, $0-24
+	// special case
+	MOVD	x+0(FP), R1
+	MOVD	y+8(FP), R2
+
+	// special case Atan2(NaN, y) = NaN
+	MOVD	$~(1<<63), R5
+	AND	R1, R5		// x = |x|
+	MOVD	$PosInf, R3
+	CMPUBLT	R3, R5, returnX
+
+	// special case Atan2(x, NaN) = NaN
+	MOVD	$~(1<<63), R5
+	AND	R2, R5
+	CMPUBLT R3, R5, returnY
+
+	MOVD	$NegZero, R3
+	CMPUBEQ	R3, R1, xIsNegZero
+
+	MOVD	$0, R3
+	CMPUBEQ	R3, R1, xIsPosZero
+
+	MOVD	$PosInf, R4
+	CMPUBEQ	R4, R2, yIsPosInf
+
+	MOVD	$NegInf, R4
+	CMPUBEQ	R4, R2, yIsNegInf
+	BR	Normal
+xIsNegZero:
+	// special case Atan(-0, y>=0) = -0
+	MOVD	$0, R4
+	CMPBLE	R4, R2, returnX
+
+	//special case Atan2(-0, y<=-0) = -Pi
+	MOVD	$NegZero, R4
+	CMPBGE	R4, R2, returnNegPi
+	BR	Normal
+xIsPosZero:
+	//special case Atan2(0, 0) = 0
+	MOVD	$0, R4
+	CMPUBEQ	R4, R2, returnX
+
+	//special case Atan2(0, y<=-0) = Pi
+	MOVD	$NegZero, R4
+	CMPBGE	R4, R2, returnPi
+	BR Normal
+yIsNegInf:
+	//special case Atan2(+Inf, -Inf) = 3Pi/4
+	MOVD	$PosInf, R3
+	CMPUBEQ	R3, R1, posInfNegInf
+
+	//special case Atan2(-Inf, -Inf) = -3Pi/4
+	MOVD	$NegInf, R3
+	CMPUBEQ	R3, R1, negInfNegInf
+	BR Normal
+yIsPosInf:
+	//special case Atan2(+Inf, +Inf) = Pi/4
+	MOVD	$PosInf, R3
+	CMPUBEQ	R3, R1, posInfPosInf
+
+	//special case Atan2(-Inf, +Inf) = -Pi/4
+	MOVD	$NegInf, R3
+	CMPUBEQ	R3, R1, negInfPosInf
+
+	//special case Atan2(x, +Inf) = Copysign(0, x)
+	CMPBLT	R1, $0, returnNegZero
+	BR returnPosZero
+
+Normal:
+	FMOVD	x+0(FP), F0
+	FMOVD	y+8(FP), F2
+	MOVD	$·atan2rodataL25<>+0(SB), R9
+	LGDR	F0, R2
+	LGDR	F2, R1
+	RISBGNZ	$32, $63, $32, R2, R2
+	RISBGNZ	$32, $63, $32, R1, R1
+	WORD	$0xB9170032	//llgtr	%r3,%r2
+	RISBGZ	$63, $63, $33, R2, R5
+	WORD	$0xB9170041	//llgtr	%r4,%r1
+	WFLCDB	V0, V20
+	MOVW	R4, R6
+	MOVW	R3, R7
+	CMPUBLT	R6, R7, L17
+	WFDDB	V2, V0, V3
+	ADDW	$2, R5, R2
+	MOVW	R4, R6
+	MOVW	R3, R7
+	CMPUBLE	R6, R7, L20
+L3:
+	WFMDB	V3, V3, V4
+	VLEG	$0, 152(R9), V18
+	VLEG	$0, 144(R9), V16
+	FMOVD	136(R9), F1
+	FMOVD	128(R9), F5
+	FMOVD	120(R9), F6
+	WFMADB	V4, V16, V5, V16
+	WFMADB	V4, V6, V1, V6
+	FMOVD	112(R9), F7
+	WFMDB	V4, V4, V1
+	WFMADB	V4, V7, V18, V7
+	VLEG	$0, 104(R9), V18
+	WFMADB	V1, V6, V16, V6
+	CMPWU	R4, R3
+	FMOVD	96(R9), F5
+	VLEG	$0, 88(R9), V16
+	WFMADB	V4, V5, V18, V5
+	VLEG	$0, 80(R9), V18
+	VLEG	$0, 72(R9), V22
+	WFMADB	V4, V16, V18, V16
+	VLEG	$0, 64(R9), V18
+	WFMADB	V1, V7, V5, V7
+	WFMADB	V4, V18, V22, V18
+	WFMDB	V1, V1, V5
+	WFMADB	V1, V16, V18, V16
+	VLEG	$0, 56(R9), V18
+	WFMADB	V5, V6, V7, V6
+	VLEG	$0, 48(R9), V22
+	FMOVD	40(R9), F7
+	WFMADB	V4, V7, V18, V7
+	VLEG	$0, 32(R9), V18
+	WFMADB	V5, V6, V16, V6
+	WFMADB	V4, V18, V22, V18
+	VLEG	$0, 24(R9), V16
+	WFMADB	V1, V7, V18, V7
+	VLEG	$0, 16(R9), V18
+	VLEG	$0, 8(R9), V22
+	WFMADB	V4, V18, V16, V18
+	VLEG	$0, 0(R9), V16
+	WFMADB	V5, V6, V7, V6
+	WFMADB	V4, V16, V22, V16
+	FMUL	F3, F4
+	WFMADB	V1, V18, V16, V1
+	FMADD	F6, F5, F1
+	WFMADB	V4, V1, V3, V4
+	BLT	L18
+	BGT	L7
+	LTDBR	F2, F2
+	BLTU	L21
+L8:
+	LTDBR	F0, F0
+	BLTU	L22
+L9:
+	WFCHDBS	V2, V0, V0
+	BNE	L18
+L7:
+	MOVW	R1, R6
+	CMPBGE	R6, $0, L1
+L18:
+	RISBGZ	$58, $60, $3, R2, R2
+	MOVD	$·atan2xpi2h<>+0(SB), R1
+	MOVD	·atan2xpim<>+0(SB), R3
+	LDGR	R3, F0
+	WORD	$0xED021000	//madb	%f4,%f0,0(%r2,%r1)
+	BYTE	$0x40
+	BYTE	$0x1E
+L1:
+	FMOVD	F4, ret+16(FP)
+	RET
+
+L20:
+	LTDBR	F2, F2
+	BLTU	L23
+	FMOVD	F2, F6
+L4:
+	LTDBR	F0, F0
+	BLTU	L24
+	FMOVD	F0, F4
+L5:
+	WFCHDBS	V6, V4, V4
+	BEQ	L3
+L17:
+	WFDDB	V0, V2, V4
+	BYTE	$0x18	//lr	%r2,%r5
+	BYTE	$0x25
+	WORD	$0xB3130034	//lcdbr	%f3,%f4
+	BR	L3
+L23:
+	WORD	$0xB3130062	//lcdbr	%f6,%f2
+	BR	L4
+L22:
+	VLR	V20, V0
+	BR	L9
+L21:
+	WORD	$0xB3130022	//lcdbr	%f2,%f2
+	BR	L8
+L24:
+	VLR	V20, V4
+	BR	L5
+returnX:	//the result is same as the first argument
+	MOVD	R1, ret+16(FP)
+	RET
+returnY:	//the result is same as the second argument
+	MOVD	R2, ret+16(FP)
+	RET
+returnPi:
+	MOVD	$Pi, R1
+	MOVD	R1, ret+16(FP)
+	RET
+returnNegPi:
+	MOVD	$NegPi, R1
+	MOVD	R1, ret+16(FP)
+	RET
+posInfNegInf:
+	MOVD	$Pi3Div4, R1
+	MOVD	R1, ret+16(FP)
+	RET
+negInfNegInf:
+	MOVD	$NegPi3Div4, R1
+	MOVD	R1, ret+16(FP)
+	RET
+posInfPosInf:
+	MOVD	$PiDiv4, R1
+	MOVD	R1, ret+16(FP)
+	RET
+negInfPosInf:
+	MOVD	$NegPiDiv4, R1
+	MOVD	R1, ret+16(FP)
+	RET
+returnNegZero:
+	MOVD	$NegZero, R1
+	MOVD	R1, ret+16(FP)
+	RET
+returnPosZero:
+	MOVD	$0, ret+16(FP)
+	RET
diff --git a/src/math/atan_s390x.s b/src/math/atan_s390x.s
new file mode 100644
index 0000000..3a7e59b
--- /dev/null
+++ b/src/math/atan_s390x.s
@@ -0,0 +1,128 @@
+// Copyright 2017 The Go Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+#include "textflag.h"
+
+// Minimax polynomial coefficients and other constants
+DATA ·atanrodataL8<> + 0(SB)/8, $0.199999999999554423E+00
+DATA ·atanrodataL8<> + 8(SB)/8, $0.111111110136634272E+00
+DATA ·atanrodataL8<> + 16(SB)/8, $-.142857142828026806E+00
+DATA ·atanrodataL8<> + 24(SB)/8, $-.333333333333330928E+00
+DATA ·atanrodataL8<> + 32(SB)/8, $0.769228118888682505E-01
+DATA ·atanrodataL8<> + 40(SB)/8, $0.588059263575587687E-01
+DATA ·atanrodataL8<> + 48(SB)/8, $-.666641501287528609E-01
+DATA ·atanrodataL8<> + 56(SB)/8, $-.909090711945939878E-01
+DATA ·atanrodataL8<> + 64(SB)/8, $0.472329433805024762E-01
+DATA ·atanrodataL8<> + 72(SB)/8, $0.366935664549587481E-01
+DATA ·atanrodataL8<> + 80(SB)/8, $-.422172007412067035E-01
+DATA ·atanrodataL8<> + 88(SB)/8, $-.299856214685512712E-01
+DATA ·atanrodataL8<> + 96(SB)/8, $0.220852012160300086E-01
+DATA ·atanrodataL8<> + 104(SB)/8, $0.726338160757602439E-02
+DATA ·atanrodataL8<> + 112(SB)/8, $0.843488472994227321E-03
+DATA ·atanrodataL8<> + 120(SB)/8, $0.134893651284712515E-04
+DATA ·atanrodataL8<> + 128(SB)/8, $-.525380587584426406E-01
+DATA ·atanrodataL8<> + 136(SB)/8, $-.139950258898989925E-01
+DATA ·atanrodataL8<> + 144(SB)/8, $-.291935324869629616E-02
+DATA ·atanrodataL8<> + 152(SB)/8, $-.154797890856877418E-03
+GLOBL ·atanrodataL8<> + 0(SB), RODATA, $160
+
+DATA ·atanxpi2h<> + 0(SB)/8, $0x3ff330e4e4fa7b1b
+DATA ·atanxpi2h<> + 8(SB)/8, $0xbff330e4e4fa7b1b
+DATA ·atanxpi2h<> + 16(SB)/8, $0x400330e4e4fa7b1b
+DATA ·atanxpi2h<> + 24(SB)/4, $0xc00330e4e4fa7b1b
+GLOBL ·atanxpi2h<> + 0(SB), RODATA, $32
+DATA ·atanxpim<> + 0(SB)/8, $0x3ff4f42b00000000
+GLOBL ·atanxpim<> + 0(SB), RODATA, $8
+DATA ·atanxmone<> + 0(SB)/8, $-1.0
+GLOBL ·atanxmone<> + 0(SB), RODATA, $8
+
+// Atan returns the arctangent, in radians, of the argument.
+//
+// Special cases are:
+//      Atan(±0) = ±0
+//      Atan(±Inf) = ±Pi/2Pi
+// The algorithm used is minimax polynomial approximation
+// with coefficients determined with a Remez exchange algorithm.
+
+TEXT	·atanAsm(SB), NOSPLIT, $0-16
+	FMOVD	x+0(FP), F0
+	//special case Atan(±0) = ±0
+	FMOVD   $(0.0), F1
+	FCMPU   F0, F1
+	BEQ     atanIsZero
+
+	MOVD	$·atanrodataL8<>+0(SB), R5
+	MOVH	$0x3FE0, R3
+	LGDR	F0, R1
+	RISBGNZ	$32, $63, $32, R1, R1
+	RLL	$16, R1, R2
+	ANDW	$0x7FF0, R2
+	MOVW	R2, R6
+	MOVW	R3, R7
+	CMPUBLE	R6, R7, L6
+	MOVD	$·atanxmone<>+0(SB), R3
+	FMOVD	0(R3), F2
+	WFDDB	V0, V2, V0
+	RISBGZ	$63, $63, $33, R1, R1
+	MOVD	$·atanxpi2h<>+0(SB), R3
+	MOVWZ	R1, R1
+	SLD	$3, R1, R1
+	WORD	$0x68813000	//ld	%f8,0(%r1,%r3)
+L6:
+	WFMDB	V0, V0, V2
+	FMOVD	152(R5), F6
+	FMOVD	144(R5), F1
+	FMOVD	136(R5), F7
+	VLEG	$0, 128(R5), V16
+	FMOVD	120(R5), F4
+	FMOVD	112(R5), F5
+	WFMADB	V2, V4, V6, V4
+	WFMADB	V2, V5, V1, V5
+	WFMDB	V2, V2, V6
+	FMOVD	104(R5), F3
+	FMOVD	96(R5), F1
+	WFMADB	V2, V3, V7, V3
+	MOVH	$0x3FE0, R1
+	FMOVD	88(R5), F7
+	WFMADB	V2, V1, V7, V1
+	FMOVD	80(R5), F7
+	WFMADB	V6, V3, V1, V3
+	WFMADB	V6, V4, V5, V4
+	WFMDB	V6, V6, V1
+	FMOVD	72(R5), F5
+	WFMADB	V2, V5, V7, V5
+	FMOVD	64(R5), F7
+	WFMADB	V2, V7, V16, V7
+	VLEG	$0, 56(R5), V16
+	WFMADB	V6, V5, V7, V5
+	WFMADB	V1, V4, V3, V4
+	FMOVD	48(R5), F7
+	FMOVD	40(R5), F3
+	WFMADB	V2, V3, V7, V3
+	FMOVD	32(R5), F7
+	WFMADB	V2, V7, V16, V7
+	VLEG	$0, 24(R5), V16
+	WFMADB	V1, V4, V5, V4
+	FMOVD	16(R5), F5
+	WFMADB	V6, V3, V7, V3
+	FMOVD	8(R5), F7
+	WFMADB	V2, V7, V5, V7
+	FMOVD	0(R5), F5
+	WFMADB	V2, V5, V16, V5
+	WFMADB	V1, V4, V3, V4
+	WFMADB	V6, V7, V5, V6
+	FMUL	F0, F2
+	FMADD	F4, F1, F6
+	FMADD	F6, F2, F0
+	MOVW	R2, R6
+	MOVW	R1, R7
+	CMPUBLE	R6, R7, L1
+	MOVD	$·atanxpim<>+0(SB), R1
+	WORD	$0xED801000	//madb	%f0,%f8,0(%r1)
+	BYTE	$0x00
+	BYTE	$0x1E
+L1:
+atanIsZero:
+	FMOVD	F0, ret+8(FP)
+	RET
diff --git a/src/math/atanh.go b/src/math/atanh.go
new file mode 100644
index 0000000..9d59462
--- /dev/null
+++ b/src/math/atanh.go
@@ -0,0 +1,85 @@
+// Copyright 2010 The Go Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+package math
+
+// The original C code, the long comment, and the constants
+// below are from FreeBSD's /usr/src/lib/msun/src/e_atanh.c
+// and came with this notice. The go code is a simplified
+// version of the original C.
+//
+// ====================================================
+// Copyright (C) 1993 by Sun Microsystems, Inc. All rights reserved.
+//
+// Developed at SunPro, a Sun Microsystems, Inc. business.
+// Permission to use, copy, modify, and distribute this
+// software is freely granted, provided that this notice
+// is preserved.
+// ====================================================
+//
+//
+// __ieee754_atanh(x)
+// Method :
+//	1. Reduce x to positive by atanh(-x) = -atanh(x)
+//	2. For x>=0.5
+//	            1              2x                          x
+//	atanh(x) = --- * log(1 + -------) = 0.5 * log1p(2 * --------)
+//	            2             1 - x                      1 - x
+//
+//	For x<0.5
+//	atanh(x) = 0.5*log1p(2x+2x*x/(1-x))
+//
+// Special cases:
+//	atanh(x) is NaN if |x| > 1 with signal;
+//	atanh(NaN) is that NaN with no signal;
+//	atanh(+-1) is +-INF with signal.
+//
+
+// Atanh returns the inverse hyperbolic tangent of x.
+//
+// Special cases are:
+//
+//	Atanh(1) = +Inf
+//	Atanh(±0) = ±0
+//	Atanh(-1) = -Inf
+//	Atanh(x) = NaN if x < -1 or x > 1
+//	Atanh(NaN) = NaN
+func Atanh(x float64) float64 {
+	if haveArchAtanh {
+		return archAtanh(x)
+	}
+	return atanh(x)
+}
+
+func atanh(x float64) float64 {
+	const NearZero = 1.0 / (1 << 28) // 2**-28
+	// special cases
+	switch {
+	case x < -1 || x > 1 || IsNaN(x):
+		return NaN()
+	case x == 1:
+		return Inf(1)
+	case x == -1:
+		return Inf(-1)
+	}
+	sign := false
+	if x < 0 {
+		x = -x
+		sign = true
+	}
+	var temp float64
+	switch {
+	case x < NearZero:
+		temp = x
+	case x < 0.5:
+		temp = x + x
+		temp = 0.5 * Log1p(temp+temp*x/(1-x))
+	default:
+		temp = 0.5 * Log1p((x+x)/(1-x))
+	}
+	if sign {
+		temp = -temp
+	}
+	return temp
+}
diff --git a/src/math/atanh_s390x.s b/src/math/atanh_s390x.s
new file mode 100644
index 0000000..ba0e926
--- /dev/null
+++ b/src/math/atanh_s390x.s
@@ -0,0 +1,174 @@
+// Copyright 2017 The Go Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+#include "textflag.h"
+
+// Minimax polynomial coefficients and other constants
+DATA ·atanhrodataL10<> + 0(SB)/8, $.41375273347623353626
+DATA ·atanhrodataL10<> + 8(SB)/8, $.51487302528619766235E+04
+DATA ·atanhrodataL10<> + 16(SB)/8, $-1.67526912689208984375
+DATA ·atanhrodataL10<> + 24(SB)/8, $0.181818181818181826E+00
+DATA ·atanhrodataL10<> + 32(SB)/8, $-.165289256198351540E-01
+DATA ·atanhrodataL10<> + 40(SB)/8, $0.200350613573012186E-02
+DATA ·atanhrodataL10<> + 48(SB)/8, $0.397389654305194527E-04
+DATA ·atanhrodataL10<> + 56(SB)/8, $-.273205381970859341E-03
+DATA ·atanhrodataL10<> + 64(SB)/8, $0.938370938292558173E-06
+DATA ·atanhrodataL10<> + 72(SB)/8, $-.148682720127920854E-06
+DATA ·atanhrodataL10<> + 80(SB)/8, $ 0.212881813645679599E-07
+DATA ·atanhrodataL10<> + 88(SB)/8, $-.602107458843052029E-05
+DATA ·atanhrodataL10<> + 96(SB)/8, $-5.5
+DATA ·atanhrodataL10<> + 104(SB)/8, $-0.5
+DATA ·atanhrodataL10<> + 112(SB)/8, $0.0
+DATA ·atanhrodataL10<> + 120(SB)/8, $0x7ff8000000000000      //Nan
+DATA ·atanhrodataL10<> + 128(SB)/8, $-1.0
+DATA ·atanhrodataL10<> + 136(SB)/8, $1.0
+DATA ·atanhrodataL10<> + 144(SB)/8, $1.0E-20
+GLOBL ·atanhrodataL10<> + 0(SB), RODATA, $152
+
+// Table of log correction terms
+DATA ·atanhtab2076<> + 0(SB)/8, $0.585235384085551248E-01
+DATA ·atanhtab2076<> + 8(SB)/8, $0.412206153771168640E-01
+DATA ·atanhtab2076<> + 16(SB)/8, $0.273839003221648339E-01
+DATA ·atanhtab2076<> + 24(SB)/8, $0.166383778368856480E-01
+DATA ·atanhtab2076<> + 32(SB)/8, $0.866678223433169637E-02
+DATA ·atanhtab2076<> + 40(SB)/8, $0.319831684989627514E-02
+DATA ·atanhtab2076<> + 48(SB)/8, $0.000000000000000000E+00
+DATA ·atanhtab2076<> + 56(SB)/8, $-.113006378583725549E-02
+DATA ·atanhtab2076<> + 64(SB)/8, $-.367979419636602491E-03
+DATA ·atanhtab2076<> + 72(SB)/8, $0.213172484510484979E-02
+DATA ·atanhtab2076<> + 80(SB)/8, $0.623271047682013536E-02
+DATA ·atanhtab2076<> + 88(SB)/8, $0.118140812789696885E-01
+DATA ·atanhtab2076<> + 96(SB)/8, $0.187681358930914206E-01
+DATA ·atanhtab2076<> + 104(SB)/8, $0.269985148668178992E-01
+DATA ·atanhtab2076<> + 112(SB)/8, $0.364186619761331328E-01
+DATA ·atanhtab2076<> + 120(SB)/8, $0.469505379381388441E-01
+GLOBL ·atanhtab2076<> + 0(SB), RODATA, $128
+
+// Table of +/- .5
+DATA ·atanhtabh2075<> + 0(SB)/8, $0.5
+DATA ·atanhtabh2075<> + 8(SB)/8, $-.5
+GLOBL ·atanhtabh2075<> + 0(SB), RODATA, $16
+
+// Atanh returns the inverse hyperbolic tangent of the argument.
+//
+// Special cases are:
+//      Atanh(1) = +Inf
+//      Atanh(±0) = ±0
+//      Atanh(-1) = -Inf
+//      Atanh(x) = NaN if x < -1 or x > 1
+//      Atanh(NaN) = NaN
+// The algorithm used is minimax polynomial approximation
+// with coefficients determined with a Remez exchange algorithm.
+
+TEXT    ·atanhAsm(SB), NOSPLIT, $0-16
+	FMOVD   x+0(FP), F0
+	MOVD    $·atanhrodataL10<>+0(SB), R5
+	LGDR    F0, R1
+	WORD    $0xC0393FEF //iilf  %r3,1072693247
+	BYTE    $0xFF
+	BYTE    $0xFF
+	SRAD    $32, R1
+	WORD    $0xB9170021 //llgtr %r2,%r1
+	MOVW    R2, R6
+	MOVW    R3, R7
+	CMPBGT  R6, R7, L2
+	WORD    $0xC0392FFF //iilf  %r3,805306367
+	BYTE    $0xFF
+	BYTE    $0xFF
+	MOVW    R2, R6
+	MOVW    R3, R7
+	CMPBGT  R6, R7, L9
+L3:
+	FMOVD   144(R5), F2
+	FMADD   F2, F0, F0
+L1:
+	FMOVD   F0, ret+8(FP)
+	RET
+
+L2:
+	WORD    $0xED005088 //cdb   %f0,.L12-.L10(%r5)
+	BYTE    $0x00
+	BYTE    $0x19
+	BEQ L5
+	WORD    $0xED005080 //cdb   %f0,.L13-.L10(%r5)
+	BYTE    $0x00
+	BYTE    $0x19
+	BEQ L5
+	WFCEDBS V0, V0, V2
+	BVS L1
+	FMOVD   120(R5), F0
+	BR  L1
+L5:
+	WORD    $0xED005070 //ddb   %f0,.L15-.L10(%r5)
+	BYTE    $0x00
+	BYTE    $0x1D
+	FMOVD   F0, ret+8(FP)
+	RET
+
+L9:
+	FMOVD   F0, F2
+	MOVD    $·atanhtabh2075<>+0(SB), R2
+	SRW $31, R1, R1
+	FMOVD   104(R5), F4
+	MOVW    R1, R1
+	SLD $3, R1, R1
+	WORD    $0x68012000 //ld    %f0,0(%r1,%r2)
+	WFMADB  V2, V4, V0, V4
+	VLEG    $0, 96(R5), V16
+	FDIV    F4, F2
+	WORD    $0xC0298006 //iilf  %r2,2147909631
+	BYTE    $0x7F
+	BYTE    $0xFF
+	FMOVD   88(R5), F6
+	FMOVD   80(R5), F1
+	FMOVD   72(R5), F7
+	FMOVD   64(R5), F5
+	FMOVD   F2, F4
+	WORD    $0xED405088 //adb   %f4,.L12-.L10(%r5)
+	BYTE    $0x00
+	BYTE    $0x1A
+	LGDR    F4, R4
+	SRAD    $32, R4
+	FMOVD   F4, F3
+	WORD    $0xED305088 //sdb   %f3,.L12-.L10(%r5)
+	BYTE    $0x00
+	BYTE    $0x1B
+	SUBW    R4, R2
+	WFSDB   V3, V2, V3
+	RISBGZ  $32, $47, $0, R2, R1
+	SLD $32, R1, R1
+	LDGR    R1, F2
+	WFMADB  V4, V2, V16, V4
+	SRAW    $8, R2, R1
+	WFMADB  V4, V5, V6, V5
+	WFMDB   V4, V4, V6
+	WFMADB  V4, V1, V7, V1
+	WFMADB  V2, V3, V4, V2
+	WFMADB  V1, V6, V5, V1
+	FMOVD   56(R5), F3
+	FMOVD   48(R5), F5
+	WFMADB  V4, V5, V3, V4
+	FMOVD   40(R5), F3
+	FMADD   F1, F6, F4
+	FMOVD   32(R5), F1
+	FMADD   F3, F2, F1
+	ANDW    $0xFFFFFF00, R1
+	WFMADB  V6, V4, V1, V6
+	FMOVD   24(R5), F3
+	ORW $0x45000000, R1
+	WFMADB  V2, V6, V3, V6
+	VLVGF   $0, R1, V4
+	LDEBR   F4, F4
+	RISBGZ  $57, $60, $51, R2, R2
+	MOVD    $·atanhtab2076<>+0(SB), R1
+	FMOVD   16(R5), F3
+	WORD    $0x68521000 //ld    %f5,0(%r2,%r1)
+	FMOVD   8(R5), F1
+	WFMADB  V2, V6, V5, V2
+	WFMADB  V4, V3, V1, V4
+	FMOVD   0(R5), F6
+	FMADD   F6, F4, F2
+	FMUL    F2, F0
+	FMOVD   F0, ret+8(FP)
+	RET
diff --git a/src/math/big/accuracy_string.go b/src/math/big/accuracy_string.go
new file mode 100644
index 0000000..aae9238
--- /dev/null
+++ b/src/math/big/accuracy_string.go
@@ -0,0 +1,26 @@
+// Code generated by "stringer -type=Accuracy"; DO NOT EDIT.
+
+package big
+
+import "strconv"
+
+func _() {
+	// An "invalid array index" compiler error signifies that the constant values have changed.
+	// Re-run the stringer command to generate them again.
+	var x [1]struct{}
+	_ = x[Below - -1]
+	_ = x[Exact-0]
+	_ = x[Above-1]
+}
+
+const _Accuracy_name = "BelowExactAbove"
+
+var _Accuracy_index = [...]uint8{0, 5, 10, 15}
+
+func (i Accuracy) String() string {
+	i -= -1
+	if i < 0 || i >= Accuracy(len(_Accuracy_index)-1) {
+		return "Accuracy(" + strconv.FormatInt(int64(i+-1), 10) + ")"
+	}
+	return _Accuracy_name[_Accuracy_index[i]:_Accuracy_index[i+1]]
+}
diff --git a/src/math/big/alias_test.go b/src/math/big/alias_test.go
new file mode 100644
index 0000000..36c37fb
--- /dev/null
+++ b/src/math/big/alias_test.go
@@ -0,0 +1,312 @@
+// Copyright 2019 The Go Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+package big_test
+
+import (
+	cryptorand "crypto/rand"
+	"math/big"
+	"math/rand"
+	"reflect"
+	"testing"
+	"testing/quick"
+)
+
+func equal(z, x *big.Int) bool {
+	return z.Cmp(x) == 0
+}
+
+type bigInt struct {
+	*big.Int
+}
+
+func generatePositiveInt(rand *rand.Rand, size int) *big.Int {
+	n := big.NewInt(1)
+	n.Lsh(n, uint(rand.Intn(size*8)))
+	n.Rand(rand, n)
+	return n
+}
+
+func (bigInt) Generate(rand *rand.Rand, size int) reflect.Value {
+	n := generatePositiveInt(rand, size)
+	if rand.Intn(4) == 0 {
+		n.Neg(n)
+	}
+	return reflect.ValueOf(bigInt{n})
+}
+
+type notZeroInt struct {
+	*big.Int
+}
+
+func (notZeroInt) Generate(rand *rand.Rand, size int) reflect.Value {
+	n := generatePositiveInt(rand, size)
+	if rand.Intn(4) == 0 {
+		n.Neg(n)
+	}
+	if n.Sign() == 0 {
+		n.SetInt64(1)
+	}
+	return reflect.ValueOf(notZeroInt{n})
+}
+
+type positiveInt struct {
+	*big.Int
+}
+
+func (positiveInt) Generate(rand *rand.Rand, size int) reflect.Value {
+	n := generatePositiveInt(rand, size)
+	return reflect.ValueOf(positiveInt{n})
+}
+
+type prime struct {
+	*big.Int
+}
+
+func (prime) Generate(r *rand.Rand, size int) reflect.Value {
+	n, err := cryptorand.Prime(r, r.Intn(size*8-2)+2)
+	if err != nil {
+		panic(err)
+	}
+	return reflect.ValueOf(prime{n})
+}
+
+type zeroOrOne struct {
+	uint
+}
+
+func (zeroOrOne) Generate(rand *rand.Rand, size int) reflect.Value {
+	return reflect.ValueOf(zeroOrOne{uint(rand.Intn(2))})
+}
+
+type smallUint struct {
+	uint
+}
+
+func (smallUint) Generate(rand *rand.Rand, size int) reflect.Value {
+	return reflect.ValueOf(smallUint{uint(rand.Intn(1024))})
+}
+
+// checkAliasingOneArg checks if f returns a correct result when v and x alias.
+//
+// f is a function that takes x as an argument, doesn't modify it, sets v to the
+// result, and returns v. It is the function signature of unbound methods like
+//
+//	func (v *big.Int) m(x *big.Int) *big.Int
+//
+// v and x are two random Int values. v is randomized even if it will be
+// overwritten to test for improper buffer reuse.
+func checkAliasingOneArg(t *testing.T, f func(v, x *big.Int) *big.Int, v, x *big.Int) bool {
+	x1, v1 := new(big.Int).Set(x), new(big.Int).Set(x)
+
+	// Calculate a reference f(x) without aliasing.
+	if out := f(v, x); out != v {
+		return false
+	}
+
+	// Test aliasing the argument and the receiver.
+	if out := f(v1, v1); out != v1 || !equal(v1, v) {
+		t.Logf("f(v, x) != f(x, x)")
+		return false
+	}
+
+	// Ensure the arguments was not modified.
+	return equal(x, x1)
+}
+
+// checkAliasingTwoArgs checks if f returns a correct result when any
+// combination of v, x and y alias.
+//
+// f is a function that takes x and y as arguments, doesn't modify them, sets v
+// to the result, and returns v. It is the function signature of unbound methods
+// like
+//
+//	func (v *big.Int) m(x, y *big.Int) *big.Int
+//
+// v, x and y are random Int values. v is randomized even if it will be
+// overwritten to test for improper buffer reuse.
+func checkAliasingTwoArgs(t *testing.T, f func(v, x, y *big.Int) *big.Int, v, x, y *big.Int) bool {
+	x1, y1, v1 := new(big.Int).Set(x), new(big.Int).Set(y), new(big.Int).Set(v)
+
+	// Calculate a reference f(x, y) without aliasing.
+	if out := f(v, x, y); out == nil {
+		// Certain functions like ModInverse return nil for certain inputs.
+		// Check that receiver and arguments were unchanged and move on.
+		return equal(x, x1) && equal(y, y1) && equal(v, v1)
+	} else if out != v {
+		return false
+	}
+
+	// Test aliasing the first argument and the receiver.
+	v1.Set(x)
+	if out := f(v1, v1, y); out != v1 || !equal(v1, v) {
+		t.Logf("f(v, x, y) != f(x, x, y)")
+		return false
+	}
+	// Test aliasing the second argument and the receiver.
+	v1.Set(y)
+	if out := f(v1, x, v1); out != v1 || !equal(v1, v) {
+		t.Logf("f(v, x, y) != f(y, x, y)")
+		return false
+	}
+
+	// Calculate a reference f(y, y) without aliasing.
+	// We use y because it's the one that commonly has restrictions
+	// like being prime or non-zero.
+	v1.Set(v)
+	y2 := new(big.Int).Set(y)
+	if out := f(v, y, y2); out == nil {
+		return equal(y, y1) && equal(y2, y1) && equal(v, v1)
+	} else if out != v {
+		return false
+	}
+
+	// Test aliasing the two arguments.
+	if out := f(v1, y, y); out != v1 || !equal(v1, v) {
+		t.Logf("f(v, y1, y2) != f(v, y, y)")
+		return false
+	}
+	// Test aliasing the two arguments and the receiver.
+	v1.Set(y)
+	if out := f(v1, v1, v1); out != v1 || !equal(v1, v) {
+		t.Logf("f(v, y1, y2) != f(y, y, y)")
+		return false
+	}
+
+	// Ensure the arguments were not modified.
+	return equal(x, x1) && equal(y, y1)
+}
+
+func TestAliasing(t *testing.T) {
+	for name, f := range map[string]interface{}{
+		"Abs": func(v, x bigInt) bool {
+			return checkAliasingOneArg(t, (*big.Int).Abs, v.Int, x.Int)
+		},
+		"Add": func(v, x, y bigInt) bool {
+			return checkAliasingTwoArgs(t, (*big.Int).Add, v.Int, x.Int, y.Int)
+		},
+		"And": func(v, x, y bigInt) bool {
+			return checkAliasingTwoArgs(t, (*big.Int).And, v.Int, x.Int, y.Int)
+		},
+		"AndNot": func(v, x, y bigInt) bool {
+			return checkAliasingTwoArgs(t, (*big.Int).AndNot, v.Int, x.Int, y.Int)
+		},
+		"Div": func(v, x bigInt, y notZeroInt) bool {
+			return checkAliasingTwoArgs(t, (*big.Int).Div, v.Int, x.Int, y.Int)
+		},
+		"Exp-XY": func(v, x, y bigInt, z notZeroInt) bool {
+			return checkAliasingTwoArgs(t, func(v, x, y *big.Int) *big.Int {
+				return v.Exp(x, y, z.Int)
+			}, v.Int, x.Int, y.Int)
+		},
+		"Exp-XZ": func(v, x, y bigInt, z notZeroInt) bool {
+			return checkAliasingTwoArgs(t, func(v, x, z *big.Int) *big.Int {
+				return v.Exp(x, y.Int, z)
+			}, v.Int, x.Int, z.Int)
+		},
+		"Exp-YZ": func(v, x, y bigInt, z notZeroInt) bool {
+			return checkAliasingTwoArgs(t, func(v, y, z *big.Int) *big.Int {
+				return v.Exp(x.Int, y, z)
+			}, v.Int, y.Int, z.Int)
+		},
+		"GCD": func(v, x, y bigInt) bool {
+			return checkAliasingTwoArgs(t, func(v, x, y *big.Int) *big.Int {
+				return v.GCD(nil, nil, x, y)
+			}, v.Int, x.Int, y.Int)
+		},
+		"GCD-X": func(v, x, y bigInt) bool {
+			a, b := new(big.Int), new(big.Int)
+			return checkAliasingTwoArgs(t, func(v, x, y *big.Int) *big.Int {
+				a.GCD(v, b, x, y)
+				return v
+			}, v.Int, x.Int, y.Int)
+		},
+		"GCD-Y": func(v, x, y bigInt) bool {
+			a, b := new(big.Int), new(big.Int)
+			return checkAliasingTwoArgs(t, func(v, x, y *big.Int) *big.Int {
+				a.GCD(b, v, x, y)
+				return v
+			}, v.Int, x.Int, y.Int)
+		},
+		"Lsh": func(v, x bigInt, n smallUint) bool {
+			return checkAliasingOneArg(t, func(v, x *big.Int) *big.Int {
+				return v.Lsh(x, n.uint)
+			}, v.Int, x.Int)
+		},
+		"Mod": func(v, x bigInt, y notZeroInt) bool {
+			return checkAliasingTwoArgs(t, (*big.Int).Mod, v.Int, x.Int, y.Int)
+		},
+		"ModInverse": func(v, x bigInt, y notZeroInt) bool {
+			return checkAliasingTwoArgs(t, (*big.Int).ModInverse, v.Int, x.Int, y.Int)
+		},
+		"ModSqrt": func(v, x bigInt, p prime) bool {
+			return checkAliasingTwoArgs(t, (*big.Int).ModSqrt, v.Int, x.Int, p.Int)
+		},
+		"Mul": func(v, x, y bigInt) bool {
+			return checkAliasingTwoArgs(t, (*big.Int).Mul, v.Int, x.Int, y.Int)
+		},
+		"Neg": func(v, x bigInt) bool {
+			return checkAliasingOneArg(t, (*big.Int).Neg, v.Int, x.Int)
+		},
+		"Not": func(v, x bigInt) bool {
+			return checkAliasingOneArg(t, (*big.Int).Not, v.Int, x.Int)
+		},
+		"Or": func(v, x, y bigInt) bool {
+			return checkAliasingTwoArgs(t, (*big.Int).Or, v.Int, x.Int, y.Int)
+		},
+		"Quo": func(v, x bigInt, y notZeroInt) bool {
+			return checkAliasingTwoArgs(t, (*big.Int).Quo, v.Int, x.Int, y.Int)
+		},
+		"Rand": func(v, x bigInt, seed int64) bool {
+			return checkAliasingOneArg(t, func(v, x *big.Int) *big.Int {
+				rnd := rand.New(rand.NewSource(seed))
+				return v.Rand(rnd, x)
+			}, v.Int, x.Int)
+		},
+		"Rem": func(v, x bigInt, y notZeroInt) bool {
+			return checkAliasingTwoArgs(t, (*big.Int).Rem, v.Int, x.Int, y.Int)
+		},
+		"Rsh": func(v, x bigInt, n smallUint) bool {
+			return checkAliasingOneArg(t, func(v, x *big.Int) *big.Int {
+				return v.Rsh(x, n.uint)
+			}, v.Int, x.Int)
+		},
+		"Set": func(v, x bigInt) bool {
+			return checkAliasingOneArg(t, (*big.Int).Set, v.Int, x.Int)
+		},
+		"SetBit": func(v, x bigInt, i smallUint, b zeroOrOne) bool {
+			return checkAliasingOneArg(t, func(v, x *big.Int) *big.Int {
+				return v.SetBit(x, int(i.uint), b.uint)
+			}, v.Int, x.Int)
+		},
+		"Sqrt": func(v bigInt, x positiveInt) bool {
+			return checkAliasingOneArg(t, (*big.Int).Sqrt, v.Int, x.Int)
+		},
+		"Sub": func(v, x, y bigInt) bool {
+			return checkAliasingTwoArgs(t, (*big.Int).Sub, v.Int, x.Int, y.Int)
+		},
+		"Xor": func(v, x, y bigInt) bool {
+			return checkAliasingTwoArgs(t, (*big.Int).Xor, v.Int, x.Int, y.Int)
+		},
+	} {
+		t.Run(name, func(t *testing.T) {
+			scale := 1.0
+			switch name {
+			case "ModInverse", "GCD-Y", "GCD-X":
+				scale /= 5
+			case "Rand":
+				scale /= 10
+			case "Exp-XZ", "Exp-XY", "Exp-YZ":
+				scale /= 50
+			case "ModSqrt":
+				scale /= 500
+			}
+			if err := quick.Check(f, &quick.Config{
+				MaxCountScale: scale,
+			}); err != nil {
+				t.Error(err)
+			}
+		})
+	}
+}
diff --git a/src/math/big/arith.go b/src/math/big/arith.go
new file mode 100644
index 0000000..06e63e2
--- /dev/null
+++ b/src/math/big/arith.go
@@ -0,0 +1,277 @@
+// Copyright 2009 The Go Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+// This file provides Go implementations of elementary multi-precision
+// arithmetic operations on word vectors. These have the suffix _g.
+// These are needed for platforms without assembly implementations of these routines.
+// This file also contains elementary operations that can be implemented
+// sufficiently efficiently in Go.
+
+package big
+
+import "math/bits"
+
+// A Word represents a single digit of a multi-precision unsigned integer.
+type Word uint
+
+const (
+	_S = _W / 8 // word size in bytes
+
+	_W = bits.UintSize // word size in bits
+	_B = 1 << _W       // digit base
+	_M = _B - 1        // digit mask
+)
+
+// Many of the loops in this file are of the form
+//   for i := 0; i < len(z) && i < len(x) && i < len(y); i++
+// i < len(z) is the real condition.
+// However, checking i < len(x) && i < len(y) as well is faster than
+// having the compiler do a bounds check in the body of the loop;
+// remarkably it is even faster than hoisting the bounds check
+// out of the loop, by doing something like
+//   _, _ = x[len(z)-1], y[len(z)-1]
+// There are other ways to hoist the bounds check out of the loop,
+// but the compiler's BCE isn't powerful enough for them (yet?).
+// See the discussion in CL 164966.
+
+// ----------------------------------------------------------------------------
+// Elementary operations on words
+//
+// These operations are used by the vector operations below.
+
+// z1<<_W + z0 = x*y
+func mulWW(x, y Word) (z1, z0 Word) {
+	hi, lo := bits.Mul(uint(x), uint(y))
+	return Word(hi), Word(lo)
+}
+
+// z1<<_W + z0 = x*y + c
+func mulAddWWW_g(x, y, c Word) (z1, z0 Word) {
+	hi, lo := bits.Mul(uint(x), uint(y))
+	var cc uint
+	lo, cc = bits.Add(lo, uint(c), 0)
+	return Word(hi + cc), Word(lo)
+}
+
+// nlz returns the number of leading zeros in x.
+// Wraps bits.LeadingZeros call for convenience.
+func nlz(x Word) uint {
+	return uint(bits.LeadingZeros(uint(x)))
+}
+
+// The resulting carry c is either 0 or 1.
+func addVV_g(z, x, y []Word) (c Word) {
+	// The comment near the top of this file discusses this for loop condition.
+	for i := 0; i < len(z) && i < len(x) && i < len(y); i++ {
+		zi, cc := bits.Add(uint(x[i]), uint(y[i]), uint(c))
+		z[i] = Word(zi)
+		c = Word(cc)
+	}
+	return
+}
+
+// The resulting carry c is either 0 or 1.
+func subVV_g(z, x, y []Word) (c Word) {
+	// The comment near the top of this file discusses this for loop condition.
+	for i := 0; i < len(z) && i < len(x) && i < len(y); i++ {
+		zi, cc := bits.Sub(uint(x[i]), uint(y[i]), uint(c))
+		z[i] = Word(zi)
+		c = Word(cc)
+	}
+	return
+}
+
+// The resulting carry c is either 0 or 1.
+func addVW_g(z, x []Word, y Word) (c Word) {
+	c = y
+	// The comment near the top of this file discusses this for loop condition.
+	for i := 0; i < len(z) && i < len(x); i++ {
+		zi, cc := bits.Add(uint(x[i]), uint(c), 0)
+		z[i] = Word(zi)
+		c = Word(cc)
+	}
+	return
+}
+
+// addVWlarge is addVW, but intended for large z.
+// The only difference is that we check on every iteration
+// whether we are done with carries,
+// and if so, switch to a much faster copy instead.
+// This is only a good idea for large z,
+// because the overhead of the check and the function call
+// outweigh the benefits when z is small.
+func addVWlarge(z, x []Word, y Word) (c Word) {
+	c = y
+	// The comment near the top of this file discusses this for loop condition.
+	for i := 0; i < len(z) && i < len(x); i++ {
+		if c == 0 {
+			copy(z[i:], x[i:])
+			return
+		}
+		zi, cc := bits.Add(uint(x[i]), uint(c), 0)
+		z[i] = Word(zi)
+		c = Word(cc)
+	}
+	return
+}
+
+func subVW_g(z, x []Word, y Word) (c Word) {
+	c = y
+	// The comment near the top of this file discusses this for loop condition.
+	for i := 0; i < len(z) && i < len(x); i++ {
+		zi, cc := bits.Sub(uint(x[i]), uint(c), 0)
+		z[i] = Word(zi)
+		c = Word(cc)
+	}
+	return
+}
+
+// subVWlarge is to subVW as addVWlarge is to addVW.
+func subVWlarge(z, x []Word, y Word) (c Word) {
+	c = y
+	// The comment near the top of this file discusses this for loop condition.
+	for i := 0; i < len(z) && i < len(x); i++ {
+		if c == 0 {
+			copy(z[i:], x[i:])
+			return
+		}
+		zi, cc := bits.Sub(uint(x[i]), uint(c), 0)
+		z[i] = Word(zi)
+		c = Word(cc)
+	}
+	return
+}
+
+func shlVU_g(z, x []Word, s uint) (c Word) {
+	if s == 0 {
+		copy(z, x)
+		return
+	}
+	if len(z) == 0 {
+		return
+	}
+	s &= _W - 1 // hint to the compiler that shifts by s don't need guard code
+	ŝ := _W - s
+	ŝ &= _W - 1 // ditto
+	c = x[len(z)-1] >> ŝ
+	for i := len(z) - 1; i > 0; i-- {
+		z[i] = x[i]<<s | x[i-1]>>ŝ
+	}
+	z[0] = x[0] << s
+	return
+}
+
+func shrVU_g(z, x []Word, s uint) (c Word) {
+	if s == 0 {
+		copy(z, x)
+		return
+	}
+	if len(z) == 0 {
+		return
+	}
+	if len(x) != len(z) {
+		// This is an invariant guaranteed by the caller.
+		panic("len(x) != len(z)")
+	}
+	s &= _W - 1 // hint to the compiler that shifts by s don't need guard code
+	ŝ := _W - s
+	ŝ &= _W - 1 // ditto
+	c = x[0] << ŝ
+	for i := 1; i < len(z); i++ {
+		z[i-1] = x[i-1]>>s | x[i]<<ŝ
+	}
+	z[len(z)-1] = x[len(z)-1] >> s
+	return
+}
+
+func mulAddVWW_g(z, x []Word, y, r Word) (c Word) {
+	c = r
+	// The comment near the top of this file discusses this for loop condition.
+	for i := 0; i < len(z) && i < len(x); i++ {
+		c, z[i] = mulAddWWW_g(x[i], y, c)
+	}
+	return
+}
+
+func addMulVVW_g(z, x []Word, y Word) (c Word) {
+	// The comment near the top of this file discusses this for loop condition.
+	for i := 0; i < len(z) && i < len(x); i++ {
+		z1, z0 := mulAddWWW_g(x[i], y, z[i])
+		lo, cc := bits.Add(uint(z0), uint(c), 0)
+		c, z[i] = Word(cc), Word(lo)
+		c += z1
+	}
+	return
+}
+
+// q = ( x1 << _W + x0 - r)/y. m = floor(( _B^2 - 1 ) / d - _B). Requiring x1<y.
+// An approximate reciprocal with a reference to "Improved Division by Invariant Integers
+// (IEEE Transactions on Computers, 11 Jun. 2010)"
+func divWW(x1, x0, y, m Word) (q, r Word) {
+	s := nlz(y)
+	if s != 0 {
+		x1 = x1<<s | x0>>(_W-s)
+		x0 <<= s
+		y <<= s
+	}
+	d := uint(y)
+	// We know that
+	//   m = ⎣(B^2-1)/d⎦-B
+	//   ⎣(B^2-1)/d⎦ = m+B
+	//   (B^2-1)/d = m+B+delta1    0 <= delta1 <= (d-1)/d
+	//   B^2/d = m+B+delta2        0 <= delta2 <= 1
+	// The quotient we're trying to compute is
+	//   quotient = ⎣(x1*B+x0)/d⎦
+	//            = ⎣(x1*B*(B^2/d)+x0*(B^2/d))/B^2⎦
+	//            = ⎣(x1*B*(m+B+delta2)+x0*(m+B+delta2))/B^2⎦
+	//            = ⎣(x1*m+x1*B+x0)/B + x0*m/B^2 + delta2*(x1*B+x0)/B^2⎦
+	// The latter two terms of this three-term sum are between 0 and 1.
+	// So we can compute just the first term, and we will be low by at most 2.
+	t1, t0 := bits.Mul(uint(m), uint(x1))
+	_, c := bits.Add(t0, uint(x0), 0)
+	t1, _ = bits.Add(t1, uint(x1), c)
+	// The quotient is either t1, t1+1, or t1+2.
+	// We'll try t1 and adjust if needed.
+	qq := t1
+	// compute remainder r=x-d*q.
+	dq1, dq0 := bits.Mul(d, qq)
+	r0, b := bits.Sub(uint(x0), dq0, 0)
+	r1, _ := bits.Sub(uint(x1), dq1, b)
+	// The remainder we just computed is bounded above by B+d:
+	// r = x1*B + x0 - d*q.
+	//   = x1*B + x0 - d*⎣(x1*m+x1*B+x0)/B⎦
+	//   = x1*B + x0 - d*((x1*m+x1*B+x0)/B-alpha)                                   0 <= alpha < 1
+	//   = x1*B + x0 - x1*d/B*m                         - x1*d - x0*d/B + d*alpha
+	//   = x1*B + x0 - x1*d/B*⎣(B^2-1)/d-B⎦             - x1*d - x0*d/B + d*alpha
+	//   = x1*B + x0 - x1*d/B*⎣(B^2-1)/d-B⎦             - x1*d - x0*d/B + d*alpha
+	//   = x1*B + x0 - x1*d/B*((B^2-1)/d-B-beta)        - x1*d - x0*d/B + d*alpha   0 <= beta < 1
+	//   = x1*B + x0 - x1*B + x1/B + x1*d + x1*d/B*beta - x1*d - x0*d/B + d*alpha
+	//   =        x0        + x1/B        + x1*d/B*beta        - x0*d/B + d*alpha
+	//   = x0*(1-d/B) + x1*(1+d*beta)/B + d*alpha
+	//   <  B*(1-d/B) +  d*B/B          + d          because x0<B (and 1-d/B>0), x1<d, 1+d*beta<=B, alpha<1
+	//   =  B - d     +  d              + d
+	//   = B+d
+	// So r1 can only be 0 or 1. If r1 is 1, then we know q was too small.
+	// Add 1 to q and subtract d from r. That guarantees that r is <B, so
+	// we no longer need to keep track of r1.
+	if r1 != 0 {
+		qq++
+		r0 -= d
+	}
+	// If the remainder is still too large, increment q one more time.
+	if r0 >= d {
+		qq++
+		r0 -= d
+	}
+	return Word(qq), Word(r0 >> s)
+}
+
+// reciprocalWord return the reciprocal of the divisor. rec = floor(( _B^2 - 1 ) / u - _B). u = d1 << nlz(d1).
+func reciprocalWord(d1 Word) Word {
+	u := uint(d1 << nlz(d1))
+	x1 := ^u
+	x0 := uint(_M)
+	rec, _ := bits.Div(x1, x0, u) // (_B^2-1)/U-_B = (_B*(_M-C)+_M)/U
+	return Word(rec)
+}
diff --git a/src/math/big/arith_386.s b/src/math/big/arith_386.s
new file mode 100644
index 0000000..8cf4665
--- /dev/null
+++ b/src/math/big/arith_386.s
@@ -0,0 +1,236 @@
+// Copyright 2009 The Go Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+//go:build !math_big_pure_go
+// +build !math_big_pure_go
+
+#include "textflag.h"
+
+// This file provides fast assembly versions for the elementary
+// arithmetic operations on vectors implemented in arith.go.
+
+// func addVV(z, x, y []Word) (c Word)
+TEXT ·addVV(SB),NOSPLIT,$0
+	MOVL z+0(FP), DI
+	MOVL x+12(FP), SI
+	MOVL y+24(FP), CX
+	MOVL z_len+4(FP), BP
+	MOVL $0, BX		// i = 0
+	MOVL $0, DX		// c = 0
+	JMP E1
+
+L1:	MOVL (SI)(BX*4), AX
+	ADDL DX, DX		// restore CF
+	ADCL (CX)(BX*4), AX
+	SBBL DX, DX		// save CF
+	MOVL AX, (DI)(BX*4)
+	ADDL $1, BX		// i++
+
+E1:	CMPL BX, BP		// i < n
+	JL L1
+
+	NEGL DX
+	MOVL DX, c+36(FP)
+	RET
+
+
+// func subVV(z, x, y []Word) (c Word)
+// (same as addVV except for SBBL instead of ADCL and label names)
+TEXT ·subVV(SB),NOSPLIT,$0
+	MOVL z+0(FP), DI
+	MOVL x+12(FP), SI
+	MOVL y+24(FP), CX
+	MOVL z_len+4(FP), BP
+	MOVL $0, BX		// i = 0
+	MOVL $0, DX		// c = 0
+	JMP E2
+
+L2:	MOVL (SI)(BX*4), AX
+	ADDL DX, DX		// restore CF
+	SBBL (CX)(BX*4), AX
+	SBBL DX, DX		// save CF
+	MOVL AX, (DI)(BX*4)
+	ADDL $1, BX		// i++
+
+E2:	CMPL BX, BP		// i < n
+	JL L2
+
+	NEGL DX
+	MOVL DX, c+36(FP)
+	RET
+
+
+// func addVW(z, x []Word, y Word) (c Word)
+TEXT ·addVW(SB),NOSPLIT,$0
+	MOVL z+0(FP), DI
+	MOVL x+12(FP), SI
+	MOVL y+24(FP), AX	// c = y
+	MOVL z_len+4(FP), BP
+	MOVL $0, BX		// i = 0
+	JMP E3
+
+L3:	ADDL (SI)(BX*4), AX
+	MOVL AX, (DI)(BX*4)
+	SBBL AX, AX		// save CF
+	NEGL AX
+	ADDL $1, BX		// i++
+
+E3:	CMPL BX, BP		// i < n
+	JL L3
+
+	MOVL AX, c+28(FP)
+	RET
+
+
+// func subVW(z, x []Word, y Word) (c Word)
+TEXT ·subVW(SB),NOSPLIT,$0
+	MOVL z+0(FP), DI
+	MOVL x+12(FP), SI
+	MOVL y+24(FP), AX	// c = y
+	MOVL z_len+4(FP), BP
+	MOVL $0, BX		// i = 0
+	JMP E4
+
+L4:	MOVL (SI)(BX*4), DX
+	SUBL AX, DX
+	MOVL DX, (DI)(BX*4)
+	SBBL AX, AX		// save CF
+	NEGL AX
+	ADDL $1, BX		// i++
+
+E4:	CMPL BX, BP		// i < n
+	JL L4
+
+	MOVL AX, c+28(FP)
+	RET
+
+
+// func shlVU(z, x []Word, s uint) (c Word)
+TEXT ·shlVU(SB),NOSPLIT,$0
+	MOVL z_len+4(FP), BX	// i = z
+	SUBL $1, BX		// i--
+	JL X8b			// i < 0	(n <= 0)
+
+	// n > 0
+	MOVL z+0(FP), DI
+	MOVL x+12(FP), SI
+	MOVL s+24(FP), CX
+	MOVL (SI)(BX*4), AX	// w1 = x[n-1]
+	MOVL $0, DX
+	SHLL CX, AX, DX		// w1>>ŝ
+	MOVL DX, c+28(FP)
+
+	CMPL BX, $0
+	JLE X8a			// i <= 0
+
+	// i > 0
+L8:	MOVL AX, DX		// w = w1
+	MOVL -4(SI)(BX*4), AX	// w1 = x[i-1]
+	SHLL CX, AX, DX		// w<<s | w1>>ŝ
+	MOVL DX, (DI)(BX*4)	// z[i] = w<<s | w1>>ŝ
+	SUBL $1, BX		// i--
+	JG L8			// i > 0
+
+	// i <= 0
+X8a:	SHLL CX, AX		// w1<<s
+	MOVL AX, (DI)		// z[0] = w1<<s
+	RET
+
+X8b:	MOVL $0, c+28(FP)
+	RET
+
+
+// func shrVU(z, x []Word, s uint) (c Word)
+TEXT ·shrVU(SB),NOSPLIT,$0
+	MOVL z_len+4(FP), BP
+	SUBL $1, BP		// n--
+	JL X9b			// n < 0	(n <= 0)
+
+	// n > 0
+	MOVL z+0(FP), DI
+	MOVL x+12(FP), SI
+	MOVL s+24(FP), CX
+	MOVL (SI), AX		// w1 = x[0]
+	MOVL $0, DX
+	SHRL CX, AX, DX		// w1<<ŝ
+	MOVL DX, c+28(FP)
+
+	MOVL $0, BX		// i = 0
+	JMP E9
+
+	// i < n-1
+L9:	MOVL AX, DX		// w = w1
+	MOVL 4(SI)(BX*4), AX	// w1 = x[i+1]
+	SHRL CX, AX, DX		// w>>s | w1<<ŝ
+	MOVL DX, (DI)(BX*4)	// z[i] = w>>s | w1<<ŝ
+	ADDL $1, BX		// i++
+
+E9:	CMPL BX, BP
+	JL L9			// i < n-1
+
+	// i >= n-1
+X9a:	SHRL CX, AX		// w1>>s
+	MOVL AX, (DI)(BP*4)	// z[n-1] = w1>>s
+	RET
+
+X9b:	MOVL $0, c+28(FP)
+	RET
+
+
+// func mulAddVWW(z, x []Word, y, r Word) (c Word)
+TEXT ·mulAddVWW(SB),NOSPLIT,$0
+	MOVL z+0(FP), DI
+	MOVL x+12(FP), SI
+	MOVL y+24(FP), BP
+	MOVL r+28(FP), CX	// c = r
+	MOVL z_len+4(FP), BX
+	LEAL (DI)(BX*4), DI
+	LEAL (SI)(BX*4), SI
+	NEGL BX			// i = -n
+	JMP E5
+
+L5:	MOVL (SI)(BX*4), AX
+	MULL BP
+	ADDL CX, AX
+	ADCL $0, DX
+	MOVL AX, (DI)(BX*4)
+	MOVL DX, CX
+	ADDL $1, BX		// i++
+
+E5:	CMPL BX, $0		// i < 0
+	JL L5
+
+	MOVL CX, c+32(FP)
+	RET
+
+
+// func addMulVVW(z, x []Word, y Word) (c Word)
+TEXT ·addMulVVW(SB),NOSPLIT,$0
+	MOVL z+0(FP), DI
+	MOVL x+12(FP), SI
+	MOVL y+24(FP), BP
+	MOVL z_len+4(FP), BX
+	LEAL (DI)(BX*4), DI
+	LEAL (SI)(BX*4), SI
+	NEGL BX			// i = -n
+	MOVL $0, CX		// c = 0
+	JMP E6
+
+L6:	MOVL (SI)(BX*4), AX
+	MULL BP
+	ADDL CX, AX
+	ADCL $0, DX
+	ADDL AX, (DI)(BX*4)
+	ADCL $0, DX
+	MOVL DX, CX
+	ADDL $1, BX		// i++
+
+E6:	CMPL BX, $0		// i < 0
+	JL L6
+
+	MOVL CX, c+28(FP)
+	RET
+
+
+
diff --git a/src/math/big/arith_amd64.go b/src/math/big/arith_amd64.go
new file mode 100644
index 0000000..89108fe
--- /dev/null
+++ b/src/math/big/arith_amd64.go
@@ -0,0 +1,12 @@
+// Copyright 2017 The Go Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+//go:build !math_big_pure_go
+// +build !math_big_pure_go
+
+package big
+
+import "internal/cpu"
+
+var support_adx = cpu.X86.HasADX && cpu.X86.HasBMI2
diff --git a/src/math/big/arith_amd64.s b/src/math/big/arith_amd64.s
new file mode 100644
index 0000000..b1e914c
--- /dev/null
+++ b/src/math/big/arith_amd64.s
@@ -0,0 +1,516 @@
+// Copyright 2009 The Go Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+//go:build !math_big_pure_go
+// +build !math_big_pure_go
+
+#include "textflag.h"
+
+// This file provides fast assembly versions for the elementary
+// arithmetic operations on vectors implemented in arith.go.
+
+// The carry bit is saved with SBBQ Rx, Rx: if the carry was set, Rx is -1, otherwise it is 0.
+// It is restored with ADDQ Rx, Rx: if Rx was -1 the carry is set, otherwise it is cleared.
+// This is faster than using rotate instructions.
+
+// func addVV(z, x, y []Word) (c Word)
+TEXT ·addVV(SB),NOSPLIT,$0
+	MOVQ z_len+8(FP), DI
+	MOVQ x+24(FP), R8
+	MOVQ y+48(FP), R9
+	MOVQ z+0(FP), R10
+
+	MOVQ $0, CX		// c = 0
+	MOVQ $0, SI		// i = 0
+
+	// s/JL/JMP/ below to disable the unrolled loop
+	SUBQ $4, DI		// n -= 4
+	JL V1			// if n < 0 goto V1
+
+U1:	// n >= 0
+	// regular loop body unrolled 4x
+	ADDQ CX, CX		// restore CF
+	MOVQ 0(R8)(SI*8), R11
+	MOVQ 8(R8)(SI*8), R12
+	MOVQ 16(R8)(SI*8), R13
+	MOVQ 24(R8)(SI*8), R14
+	ADCQ 0(R9)(SI*8), R11
+	ADCQ 8(R9)(SI*8), R12
+	ADCQ 16(R9)(SI*8), R13
+	ADCQ 24(R9)(SI*8), R14
+	MOVQ R11, 0(R10)(SI*8)
+	MOVQ R12, 8(R10)(SI*8)
+	MOVQ R13, 16(R10)(SI*8)
+	MOVQ R14, 24(R10)(SI*8)
+	SBBQ CX, CX		// save CF
+
+	ADDQ $4, SI		// i += 4
+	SUBQ $4, DI		// n -= 4
+	JGE U1			// if n >= 0 goto U1
+
+V1:	ADDQ $4, DI		// n += 4
+	JLE E1			// if n <= 0 goto E1
+
+L1:	// n > 0
+	ADDQ CX, CX		// restore CF
+	MOVQ 0(R8)(SI*8), R11
+	ADCQ 0(R9)(SI*8), R11
+	MOVQ R11, 0(R10)(SI*8)
+	SBBQ CX, CX		// save CF
+
+	ADDQ $1, SI		// i++
+	SUBQ $1, DI		// n--
+	JG L1			// if n > 0 goto L1
+
+E1:	NEGQ CX
+	MOVQ CX, c+72(FP)	// return c
+	RET
+
+
+// func subVV(z, x, y []Word) (c Word)
+// (same as addVV except for SBBQ instead of ADCQ and label names)
+TEXT ·subVV(SB),NOSPLIT,$0
+	MOVQ z_len+8(FP), DI
+	MOVQ x+24(FP), R8
+	MOVQ y+48(FP), R9
+	MOVQ z+0(FP), R10
+
+	MOVQ $0, CX		// c = 0
+	MOVQ $0, SI		// i = 0
+
+	// s/JL/JMP/ below to disable the unrolled loop
+	SUBQ $4, DI		// n -= 4
+	JL V2			// if n < 0 goto V2
+
+U2:	// n >= 0
+	// regular loop body unrolled 4x
+	ADDQ CX, CX		// restore CF
+	MOVQ 0(R8)(SI*8), R11
+	MOVQ 8(R8)(SI*8), R12
+	MOVQ 16(R8)(SI*8), R13
+	MOVQ 24(R8)(SI*8), R14
+	SBBQ 0(R9)(SI*8), R11
+	SBBQ 8(R9)(SI*8), R12
+	SBBQ 16(R9)(SI*8), R13
+	SBBQ 24(R9)(SI*8), R14
+	MOVQ R11, 0(R10)(SI*8)
+	MOVQ R12, 8(R10)(SI*8)
+	MOVQ R13, 16(R10)(SI*8)
+	MOVQ R14, 24(R10)(SI*8)
+	SBBQ CX, CX		// save CF
+
+	ADDQ $4, SI		// i += 4
+	SUBQ $4, DI		// n -= 4
+	JGE U2			// if n >= 0 goto U2
+
+V2:	ADDQ $4, DI		// n += 4
+	JLE E2			// if n <= 0 goto E2
+
+L2:	// n > 0
+	ADDQ CX, CX		// restore CF
+	MOVQ 0(R8)(SI*8), R11
+	SBBQ 0(R9)(SI*8), R11
+	MOVQ R11, 0(R10)(SI*8)
+	SBBQ CX, CX		// save CF
+
+	ADDQ $1, SI		// i++
+	SUBQ $1, DI		// n--
+	JG L2			// if n > 0 goto L2
+
+E2:	NEGQ CX
+	MOVQ CX, c+72(FP)	// return c
+	RET
+
+
+// func addVW(z, x []Word, y Word) (c Word)
+TEXT ·addVW(SB),NOSPLIT,$0
+	MOVQ z_len+8(FP), DI
+	CMPQ DI, $32
+	JG large
+	MOVQ x+24(FP), R8
+	MOVQ y+48(FP), CX	// c = y
+	MOVQ z+0(FP), R10
+
+	MOVQ $0, SI		// i = 0
+
+	// s/JL/JMP/ below to disable the unrolled loop
+	SUBQ $4, DI		// n -= 4
+	JL V3			// if n < 4 goto V3
+
+U3:	// n >= 0
+	// regular loop body unrolled 4x
+	MOVQ 0(R8)(SI*8), R11
+	MOVQ 8(R8)(SI*8), R12
+	MOVQ 16(R8)(SI*8), R13
+	MOVQ 24(R8)(SI*8), R14
+	ADDQ CX, R11
+	ADCQ $0, R12
+	ADCQ $0, R13
+	ADCQ $0, R14
+	SBBQ CX, CX		// save CF
+	NEGQ CX
+	MOVQ R11, 0(R10)(SI*8)
+	MOVQ R12, 8(R10)(SI*8)
+	MOVQ R13, 16(R10)(SI*8)
+	MOVQ R14, 24(R10)(SI*8)
+
+	ADDQ $4, SI		// i += 4
+	SUBQ $4, DI		// n -= 4
+	JGE U3			// if n >= 0 goto U3
+
+V3:	ADDQ $4, DI		// n += 4
+	JLE E3			// if n <= 0 goto E3
+
+L3:	// n > 0
+	ADDQ 0(R8)(SI*8), CX
+	MOVQ CX, 0(R10)(SI*8)
+	SBBQ CX, CX		// save CF
+	NEGQ CX
+
+	ADDQ $1, SI		// i++
+	SUBQ $1, DI		// n--
+	JG L3			// if n > 0 goto L3
+
+E3:	MOVQ CX, c+56(FP)	// return c
+	RET
+large:
+	JMP ·addVWlarge(SB)
+
+
+// func subVW(z, x []Word, y Word) (c Word)
+// (same as addVW except for SUBQ/SBBQ instead of ADDQ/ADCQ and label names)
+TEXT ·subVW(SB),NOSPLIT,$0
+	MOVQ z_len+8(FP), DI
+	CMPQ DI, $32
+	JG large
+	MOVQ x+24(FP), R8
+	MOVQ y+48(FP), CX	// c = y
+	MOVQ z+0(FP), R10
+
+	MOVQ $0, SI		// i = 0
+
+	// s/JL/JMP/ below to disable the unrolled loop
+	SUBQ $4, DI		// n -= 4
+	JL V4			// if n < 4 goto V4
+
+U4:	// n >= 0
+	// regular loop body unrolled 4x
+	MOVQ 0(R8)(SI*8), R11
+	MOVQ 8(R8)(SI*8), R12
+	MOVQ 16(R8)(SI*8), R13
+	MOVQ 24(R8)(SI*8), R14
+	SUBQ CX, R11
+	SBBQ $0, R12
+	SBBQ $0, R13
+	SBBQ $0, R14
+	SBBQ CX, CX		// save CF
+	NEGQ CX
+	MOVQ R11, 0(R10)(SI*8)
+	MOVQ R12, 8(R10)(SI*8)
+	MOVQ R13, 16(R10)(SI*8)
+	MOVQ R14, 24(R10)(SI*8)
+
+	ADDQ $4, SI		// i += 4
+	SUBQ $4, DI		// n -= 4
+	JGE U4			// if n >= 0 goto U4
+
+V4:	ADDQ $4, DI		// n += 4
+	JLE E4			// if n <= 0 goto E4
+
+L4:	// n > 0
+	MOVQ 0(R8)(SI*8), R11
+	SUBQ CX, R11
+	MOVQ R11, 0(R10)(SI*8)
+	SBBQ CX, CX		// save CF
+	NEGQ CX
+
+	ADDQ $1, SI		// i++
+	SUBQ $1, DI		// n--
+	JG L4			// if n > 0 goto L4
+
+E4:	MOVQ CX, c+56(FP)	// return c
+	RET
+large:
+	JMP ·subVWlarge(SB)
+
+
+// func shlVU(z, x []Word, s uint) (c Word)
+TEXT ·shlVU(SB),NOSPLIT,$0
+	MOVQ z_len+8(FP), BX	// i = z
+	SUBQ $1, BX		// i--
+	JL X8b			// i < 0	(n <= 0)
+
+	// n > 0
+	MOVQ z+0(FP), R10
+	MOVQ x+24(FP), R8
+	MOVQ s+48(FP), CX
+	MOVQ (R8)(BX*8), AX	// w1 = x[n-1]
+	MOVQ $0, DX
+	SHLQ CX, AX, DX		// w1>>ŝ
+	MOVQ DX, c+56(FP)
+
+	CMPQ BX, $0
+	JLE X8a			// i <= 0
+
+	// i > 0
+L8:	MOVQ AX, DX		// w = w1
+	MOVQ -8(R8)(BX*8), AX	// w1 = x[i-1]
+	SHLQ CX, AX, DX		// w<<s | w1>>ŝ
+	MOVQ DX, (R10)(BX*8)	// z[i] = w<<s | w1>>ŝ
+	SUBQ $1, BX		// i--
+	JG L8			// i > 0
+
+	// i <= 0
+X8a:	SHLQ CX, AX		// w1<<s
+	MOVQ AX, (R10)		// z[0] = w1<<s
+	RET
+
+X8b:	MOVQ $0, c+56(FP)
+	RET
+
+
+// func shrVU(z, x []Word, s uint) (c Word)
+TEXT ·shrVU(SB),NOSPLIT,$0
+	MOVQ z_len+8(FP), R11
+	SUBQ $1, R11		// n--
+	JL X9b			// n < 0	(n <= 0)
+
+	// n > 0
+	MOVQ z+0(FP), R10
+	MOVQ x+24(FP), R8
+	MOVQ s+48(FP), CX
+	MOVQ (R8), AX		// w1 = x[0]
+	MOVQ $0, DX
+	SHRQ CX, AX, DX		// w1<<ŝ
+	MOVQ DX, c+56(FP)
+
+	MOVQ $0, BX		// i = 0
+	JMP E9
+
+	// i < n-1
+L9:	MOVQ AX, DX		// w = w1
+	MOVQ 8(R8)(BX*8), AX	// w1 = x[i+1]
+	SHRQ CX, AX, DX		// w>>s | w1<<ŝ
+	MOVQ DX, (R10)(BX*8)	// z[i] = w>>s | w1<<ŝ
+	ADDQ $1, BX		// i++
+
+E9:	CMPQ BX, R11
+	JL L9			// i < n-1
+
+	// i >= n-1
+X9a:	SHRQ CX, AX		// w1>>s
+	MOVQ AX, (R10)(R11*8)	// z[n-1] = w1>>s
+	RET
+
+X9b:	MOVQ $0, c+56(FP)
+	RET
+
+
+// func mulAddVWW(z, x []Word, y, r Word) (c Word)
+TEXT ·mulAddVWW(SB),NOSPLIT,$0
+	MOVQ z+0(FP), R10
+	MOVQ x+24(FP), R8
+	MOVQ y+48(FP), R9
+	MOVQ r+56(FP), CX	// c = r
+	MOVQ z_len+8(FP), R11
+	MOVQ $0, BX		// i = 0
+
+	CMPQ R11, $4
+	JL E5
+
+U5:	// i+4 <= n
+	// regular loop body unrolled 4x
+	MOVQ (0*8)(R8)(BX*8), AX
+	MULQ R9
+	ADDQ CX, AX
+	ADCQ $0, DX
+	MOVQ AX, (0*8)(R10)(BX*8)
+	MOVQ DX, CX
+	MOVQ (1*8)(R8)(BX*8), AX
+	MULQ R9
+	ADDQ CX, AX
+	ADCQ $0, DX
+	MOVQ AX, (1*8)(R10)(BX*8)
+	MOVQ DX, CX
+	MOVQ (2*8)(R8)(BX*8), AX
+	MULQ R9
+	ADDQ CX, AX
+	ADCQ $0, DX
+	MOVQ AX, (2*8)(R10)(BX*8)
+	MOVQ DX, CX
+	MOVQ (3*8)(R8)(BX*8), AX
+	MULQ R9
+	ADDQ CX, AX
+	ADCQ $0, DX
+	MOVQ AX, (3*8)(R10)(BX*8)
+	MOVQ DX, CX
+	ADDQ $4, BX		// i += 4
+
+	LEAQ 4(BX), DX
+	CMPQ DX, R11
+	JLE U5
+	JMP E5
+
+L5:	MOVQ (R8)(BX*8), AX
+	MULQ R9
+	ADDQ CX, AX
+	ADCQ $0, DX
+	MOVQ AX, (R10)(BX*8)
+	MOVQ DX, CX
+	ADDQ $1, BX		// i++
+
+E5:	CMPQ BX, R11		// i < n
+	JL L5
+
+	MOVQ CX, c+64(FP)
+	RET
+
+
+// func addMulVVW(z, x []Word, y Word) (c Word)
+TEXT ·addMulVVW(SB),NOSPLIT,$0
+	CMPB ·support_adx(SB), $1
+	JEQ adx
+	MOVQ z+0(FP), R10
+	MOVQ x+24(FP), R8
+	MOVQ y+48(FP), R9
+	MOVQ z_len+8(FP), R11
+	MOVQ $0, BX		// i = 0
+	MOVQ $0, CX		// c = 0
+	MOVQ R11, R12
+	ANDQ $-2, R12
+	CMPQ R11, $2
+	JAE A6
+	JMP E6
+
+A6:
+	MOVQ (R8)(BX*8), AX
+	MULQ R9
+	ADDQ (R10)(BX*8), AX
+	ADCQ $0, DX
+	ADDQ CX, AX
+	ADCQ $0, DX
+	MOVQ DX, CX
+	MOVQ AX, (R10)(BX*8)
+
+	MOVQ (8)(R8)(BX*8), AX
+	MULQ R9
+	ADDQ (8)(R10)(BX*8), AX
+	ADCQ $0, DX
+	ADDQ CX, AX
+	ADCQ $0, DX
+	MOVQ DX, CX
+	MOVQ AX, (8)(R10)(BX*8)
+
+	ADDQ $2, BX
+	CMPQ BX, R12
+	JL A6
+	JMP E6
+
+L6:	MOVQ (R8)(BX*8), AX
+	MULQ R9
+	ADDQ CX, AX
+	ADCQ $0, DX
+	ADDQ AX, (R10)(BX*8)
+	ADCQ $0, DX
+	MOVQ DX, CX
+	ADDQ $1, BX		// i++
+
+E6:	CMPQ BX, R11		// i < n
+	JL L6
+
+	MOVQ CX, c+56(FP)
+	RET
+
+adx:
+	MOVQ z_len+8(FP), R11
+	MOVQ z+0(FP), R10
+	MOVQ x+24(FP), R8
+	MOVQ y+48(FP), DX
+	MOVQ $0, BX   // i = 0
+	MOVQ $0, CX   // carry
+	CMPQ R11, $8
+	JAE  adx_loop_header
+	CMPQ BX, R11
+	JL adx_short
+	MOVQ CX, c+56(FP)
+	RET
+
+adx_loop_header:
+	MOVQ  R11, R13
+	ANDQ  $-8, R13
+adx_loop:
+	XORQ  R9, R9  // unset flags
+	MULXQ (R8), SI, DI
+	ADCXQ CX,SI
+	ADOXQ (R10), SI
+	MOVQ  SI,(R10)
+
+	MULXQ 8(R8), AX, CX
+	ADCXQ DI, AX
+	ADOXQ 8(R10), AX
+	MOVQ  AX, 8(R10)
+
+	MULXQ 16(R8), SI, DI
+	ADCXQ CX, SI
+	ADOXQ 16(R10), SI
+	MOVQ  SI, 16(R10)
+
+	MULXQ 24(R8), AX, CX
+	ADCXQ DI, AX
+	ADOXQ 24(R10), AX
+	MOVQ  AX, 24(R10)
+
+	MULXQ 32(R8), SI, DI
+	ADCXQ CX, SI
+	ADOXQ 32(R10), SI
+	MOVQ  SI, 32(R10)
+
+	MULXQ 40(R8), AX, CX
+	ADCXQ DI, AX
+	ADOXQ 40(R10), AX
+	MOVQ  AX, 40(R10)
+
+	MULXQ 48(R8), SI, DI
+	ADCXQ CX, SI
+	ADOXQ 48(R10), SI
+	MOVQ  SI, 48(R10)
+
+	MULXQ 56(R8), AX, CX
+	ADCXQ DI, AX
+	ADOXQ 56(R10), AX
+	MOVQ  AX, 56(R10)
+
+	ADCXQ R9, CX
+	ADOXQ R9, CX
+
+	ADDQ $64, R8
+	ADDQ $64, R10
+	ADDQ $8, BX
+
+	CMPQ BX, R13
+	JL adx_loop
+	MOVQ z+0(FP), R10
+	MOVQ x+24(FP), R8
+	CMPQ BX, R11
+	JL adx_short
+	MOVQ CX, c+56(FP)
+	RET
+
+adx_short:
+	MULXQ (R8)(BX*8), SI, DI
+	ADDQ CX, SI
+	ADCQ $0, DI
+	ADDQ SI, (R10)(BX*8)
+	ADCQ $0, DI
+	MOVQ DI, CX
+	ADDQ $1, BX		// i++
+
+	CMPQ BX, R11
+	JL adx_short
+
+	MOVQ CX, c+56(FP)
+	RET
+
+
+
diff --git a/src/math/big/arith_arm.s b/src/math/big/arith_arm.s
new file mode 100644
index 0000000..10054bd
--- /dev/null
+++ b/src/math/big/arith_arm.s
@@ -0,0 +1,273 @@
+// Copyright 2009 The Go Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+//go:build !math_big_pure_go
+// +build !math_big_pure_go
+
+#include "textflag.h"
+
+// This file provides fast assembly versions for the elementary
+// arithmetic operations on vectors implemented in arith.go.
+
+// func addVV(z, x, y []Word) (c Word)
+TEXT ·addVV(SB),NOSPLIT,$0
+	ADD.S	$0, R0		// clear carry flag
+	MOVW	z+0(FP), R1
+	MOVW	z_len+4(FP), R4
+	MOVW	x+12(FP), R2
+	MOVW	y+24(FP), R3
+	ADD	R4<<2, R1, R4
+	B E1
+L1:
+	MOVW.P	4(R2), R5
+	MOVW.P	4(R3), R6
+	ADC.S	R6, R5
+	MOVW.P	R5, 4(R1)
+E1:
+	TEQ	R1, R4
+	BNE L1
+
+	MOVW	$0, R0
+	MOVW.CS	$1, R0
+	MOVW	R0, c+36(FP)
+	RET
+
+
+// func subVV(z, x, y []Word) (c Word)
+// (same as addVV except for SBC instead of ADC and label names)
+TEXT ·subVV(SB),NOSPLIT,$0
+	SUB.S	$0, R0		// clear borrow flag
+	MOVW	z+0(FP), R1
+	MOVW	z_len+4(FP), R4
+	MOVW	x+12(FP), R2
+	MOVW	y+24(FP), R3
+	ADD	R4<<2, R1, R4
+	B E2
+L2:
+	MOVW.P	4(R2), R5
+	MOVW.P	4(R3), R6
+	SBC.S	R6, R5
+	MOVW.P	R5, 4(R1)
+E2:
+	TEQ	R1, R4
+	BNE L2
+
+	MOVW	$0, R0
+	MOVW.CC	$1, R0
+	MOVW	R0, c+36(FP)
+	RET
+
+
+// func addVW(z, x []Word, y Word) (c Word)
+TEXT ·addVW(SB),NOSPLIT,$0
+	MOVW	z+0(FP), R1
+	MOVW	z_len+4(FP), R4
+	MOVW	x+12(FP), R2
+	MOVW	y+24(FP), R3
+	ADD	R4<<2, R1, R4
+	TEQ	R1, R4
+	BNE L3a
+	MOVW	R3, c+28(FP)
+	RET
+L3a:
+	MOVW.P	4(R2), R5
+	ADD.S	R3, R5
+	MOVW.P	R5, 4(R1)
+	B	E3
+L3:
+	MOVW.P	4(R2), R5
+	ADC.S	$0, R5
+	MOVW.P	R5, 4(R1)
+E3:
+	TEQ	R1, R4
+	BNE	L3
+
+	MOVW	$0, R0
+	MOVW.CS	$1, R0
+	MOVW	R0, c+28(FP)
+	RET
+
+
+// func subVW(z, x []Word, y Word) (c Word)
+TEXT ·subVW(SB),NOSPLIT,$0
+	MOVW	z+0(FP), R1
+	MOVW	z_len+4(FP), R4
+	MOVW	x+12(FP), R2
+	MOVW	y+24(FP), R3
+	ADD	R4<<2, R1, R4
+	TEQ	R1, R4
+	BNE L4a
+	MOVW	R3, c+28(FP)
+	RET
+L4a:
+	MOVW.P	4(R2), R5
+	SUB.S	R3, R5
+	MOVW.P	R5, 4(R1)
+	B	E4
+L4:
+	MOVW.P	4(R2), R5
+	SBC.S	$0, R5
+	MOVW.P	R5, 4(R1)
+E4:
+	TEQ	R1, R4
+	BNE	L4
+
+	MOVW	$0, R0
+	MOVW.CC	$1, R0
+	MOVW	R0, c+28(FP)
+	RET
+
+
+// func shlVU(z, x []Word, s uint) (c Word)
+TEXT ·shlVU(SB),NOSPLIT,$0
+	MOVW	z_len+4(FP), R5
+	TEQ	$0, R5
+	BEQ	X7
+
+	MOVW	z+0(FP), R1
+	MOVW	x+12(FP), R2
+	ADD	R5<<2, R2, R2
+	ADD	R5<<2, R1, R5
+	MOVW	s+24(FP), R3
+	TEQ	$0, R3	// shift 0 is special
+	BEQ	Y7
+	ADD	$4, R1	// stop one word early
+	MOVW	$32, R4
+	SUB	R3, R4
+	MOVW	$0, R7
+
+	MOVW.W	-4(R2), R6
+	MOVW	R6<<R3, R7
+	MOVW	R6>>R4, R6
+	MOVW	R6, c+28(FP)
+	B E7
+
+L7:
+	MOVW.W	-4(R2), R6
+	ORR	R6>>R4, R7
+	MOVW.W	R7, -4(R5)
+	MOVW	R6<<R3, R7
+E7:
+	TEQ	R1, R5
+	BNE	L7
+
+	MOVW	R7, -4(R5)
+	RET
+
+Y7:	// copy loop, because shift 0 == shift 32
+	MOVW.W	-4(R2), R6
+	MOVW.W	R6, -4(R5)
+	TEQ	R1, R5
+	BNE Y7
+
+X7:
+	MOVW	$0, R1
+	MOVW	R1, c+28(FP)
+	RET
+
+
+// func shrVU(z, x []Word, s uint) (c Word)
+TEXT ·shrVU(SB),NOSPLIT,$0
+	MOVW	z_len+4(FP), R5
+	TEQ	$0, R5
+	BEQ	X6
+
+	MOVW	z+0(FP), R1
+	MOVW	x+12(FP), R2
+	ADD	R5<<2, R1, R5
+	MOVW	s+24(FP), R3
+	TEQ	$0, R3	// shift 0 is special
+	BEQ Y6
+	SUB	$4, R5	// stop one word early
+	MOVW	$32, R4
+	SUB	R3, R4
+	MOVW	$0, R7
+
+	// first word
+	MOVW.P	4(R2), R6
+	MOVW	R6>>R3, R7
+	MOVW	R6<<R4, R6
+	MOVW	R6, c+28(FP)
+	B E6
+
+	// word loop
+L6:
+	MOVW.P	4(R2), R6
+	ORR	R6<<R4, R7
+	MOVW.P	R7, 4(R1)
+	MOVW	R6>>R3, R7
+E6:
+	TEQ	R1, R5
+	BNE	L6
+
+	MOVW	R7, 0(R1)
+	RET
+
+Y6:	// copy loop, because shift 0 == shift 32
+	MOVW.P	4(R2), R6
+	MOVW.P	R6, 4(R1)
+	TEQ R1, R5
+	BNE Y6
+
+X6:
+	MOVW	$0, R1
+	MOVW	R1, c+28(FP)
+	RET
+
+
+// func mulAddVWW(z, x []Word, y, r Word) (c Word)
+TEXT ·mulAddVWW(SB),NOSPLIT,$0
+	MOVW	$0, R0
+	MOVW	z+0(FP), R1
+	MOVW	z_len+4(FP), R5
+	MOVW	x+12(FP), R2
+	MOVW	y+24(FP), R3
+	MOVW	r+28(FP), R4
+	ADD	R5<<2, R1, R5
+	B E8
+
+	// word loop
+L8:
+	MOVW.P	4(R2), R6
+	MULLU	R6, R3, (R7, R6)
+	ADD.S	R4, R6
+	ADC	R0, R7
+	MOVW.P	R6, 4(R1)
+	MOVW	R7, R4
+E8:
+	TEQ	R1, R5
+	BNE	L8
+
+	MOVW	R4, c+32(FP)
+	RET
+
+
+// func addMulVVW(z, x []Word, y Word) (c Word)
+TEXT ·addMulVVW(SB),NOSPLIT,$0
+	MOVW	$0, R0
+	MOVW	z+0(FP), R1
+	MOVW	z_len+4(FP), R5
+	MOVW	x+12(FP), R2
+	MOVW	y+24(FP), R3
+	ADD	R5<<2, R1, R5
+	MOVW	$0, R4
+	B E9
+
+	// word loop
+L9:
+	MOVW.P	4(R2), R6
+	MULLU	R6, R3, (R7, R6)
+	ADD.S	R4, R6
+	ADC	R0, R7
+	MOVW	0(R1), R4
+	ADD.S	R4, R6
+	ADC	R0, R7
+	MOVW.P	R6, 4(R1)
+	MOVW	R7, R4
+E9:
+	TEQ	R1, R5
+	BNE	L9
+
+	MOVW	R4, c+28(FP)
+	RET
diff --git a/src/math/big/arith_arm64.s b/src/math/big/arith_arm64.s
new file mode 100644
index 0000000..addf2d6
--- /dev/null
+++ b/src/math/big/arith_arm64.s
@@ -0,0 +1,573 @@
+// Copyright 2013 The Go Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+//go:build !math_big_pure_go
+// +build !math_big_pure_go
+
+#include "textflag.h"
+
+// This file provides fast assembly versions for the elementary
+// arithmetic operations on vectors implemented in arith.go.
+
+// TODO: Consider re-implementing using Advanced SIMD
+// once the assembler supports those instructions.
+
+// func addVV(z, x, y []Word) (c Word)
+TEXT ·addVV(SB),NOSPLIT,$0
+	MOVD	z_len+8(FP), R0
+	MOVD	x+24(FP), R8
+	MOVD	y+48(FP), R9
+	MOVD	z+0(FP), R10
+	ADDS	$0, R0		// clear carry flag
+	TBZ	$0, R0, two
+	MOVD.P	8(R8), R11
+	MOVD.P	8(R9), R15
+	ADCS	R15, R11
+	MOVD.P	R11, 8(R10)
+	SUB	$1, R0
+two:
+	TBZ	$1, R0, loop
+	LDP.P	16(R8), (R11, R12)
+	LDP.P	16(R9), (R15, R16)
+	ADCS	R15, R11
+	ADCS	R16, R12
+	STP.P	(R11, R12), 16(R10)
+	SUB	$2, R0
+loop:
+	CBZ	R0, done	// careful not to touch the carry flag
+	LDP.P	32(R8), (R11, R12)
+	LDP	-16(R8), (R13, R14)
+	LDP.P	32(R9), (R15, R16)
+	LDP	-16(R9), (R17, R19)
+	ADCS	R15, R11
+	ADCS	R16, R12
+	ADCS	R17, R13
+	ADCS	R19, R14
+	STP.P	(R11, R12), 32(R10)
+	STP	(R13, R14), -16(R10)
+	SUB	$4, R0
+	B	loop
+done:
+	CSET	HS, R0		// extract carry flag
+	MOVD	R0, c+72(FP)
+	RET
+
+
+// func subVV(z, x, y []Word) (c Word)
+TEXT ·subVV(SB),NOSPLIT,$0
+	MOVD	z_len+8(FP), R0
+	MOVD	x+24(FP), R8
+	MOVD	y+48(FP), R9
+	MOVD	z+0(FP), R10
+	CMP	R0, R0		// set carry flag
+	TBZ	$0, R0, two
+	MOVD.P	8(R8), R11
+	MOVD.P	8(R9), R15
+	SBCS	R15, R11
+	MOVD.P	R11, 8(R10)
+	SUB	$1, R0
+two:
+	TBZ	$1, R0, loop
+	LDP.P	16(R8), (R11, R12)
+	LDP.P	16(R9), (R15, R16)
+	SBCS	R15, R11
+	SBCS	R16, R12
+	STP.P	(R11, R12), 16(R10)
+	SUB	$2, R0
+loop:
+	CBZ	R0, done	// careful not to touch the carry flag
+	LDP.P	32(R8), (R11, R12)
+	LDP	-16(R8), (R13, R14)
+	LDP.P	32(R9), (R15, R16)
+	LDP	-16(R9), (R17, R19)
+	SBCS	R15, R11
+	SBCS	R16, R12
+	SBCS	R17, R13
+	SBCS	R19, R14
+	STP.P	(R11, R12), 32(R10)
+	STP	(R13, R14), -16(R10)
+	SUB	$4, R0
+	B	loop
+done:
+	CSET	LO, R0		// extract carry flag
+	MOVD	R0, c+72(FP)
+	RET
+
+#define vwOneOp(instr, op1)				\
+	MOVD.P	8(R1), R4;				\
+	instr	op1, R4;				\
+	MOVD.P	R4, 8(R3);
+
+// handle the first 1~4 elements before starting iteration in addVW/subVW
+#define vwPreIter(instr1, instr2, counter, target)	\
+	vwOneOp(instr1, R2);				\
+	SUB	$1, counter;				\
+	CBZ	counter, target;			\
+	vwOneOp(instr2, $0);				\
+	SUB	$1, counter;				\
+	CBZ	counter, target;			\
+	vwOneOp(instr2, $0);				\
+	SUB	$1, counter;				\
+	CBZ	counter, target;			\
+	vwOneOp(instr2, $0);
+
+// do one iteration of add or sub in addVW/subVW
+#define vwOneIter(instr, counter, exit)	\
+	CBZ	counter, exit;		\	// careful not to touch the carry flag
+	LDP.P	32(R1), (R4, R5);	\
+	LDP	-16(R1), (R6, R7);	\
+	instr	$0, R4, R8;		\
+	instr	$0, R5, R9;		\
+	instr	$0, R6, R10;		\
+	instr	$0, R7, R11;		\
+	STP.P	(R8, R9), 32(R3);	\
+	STP	(R10, R11), -16(R3);	\
+	SUB	$4, counter;
+
+// do one iteration of copy in addVW/subVW
+#define vwOneIterCopy(counter, exit)			\
+	CBZ	counter, exit;				\
+	LDP.P	32(R1), (R4, R5);			\
+	LDP	-16(R1), (R6, R7);			\
+	STP.P	(R4, R5), 32(R3);			\
+	STP	(R6, R7), -16(R3);			\
+	SUB	$4, counter;
+
+// func addVW(z, x []Word, y Word) (c Word)
+// The 'large' branch handles large 'z'. It checks the carry flag on every iteration
+// and switches to copy if we are done with carries. The copying is skipped as well
+// if 'x' and 'z' happen to share the same underlying storage.
+// The overhead of the checking and branching is visible when 'z' are small (~5%),
+// so set a threshold of 32, and remain the small-sized part entirely untouched.
+TEXT ·addVW(SB),NOSPLIT,$0
+	MOVD	z+0(FP), R3
+	MOVD	z_len+8(FP), R0
+	MOVD	x+24(FP), R1
+	MOVD	y+48(FP), R2
+	CMP	$32, R0
+	BGE	large		// large-sized 'z' and 'x'
+	CBZ	R0, len0	// the length of z is 0
+	MOVD.P	8(R1), R4
+	ADDS	R2, R4		// z[0] = x[0] + y, set carry
+	MOVD.P	R4, 8(R3)
+	SUB	$1, R0
+	CBZ	R0, len1	// the length of z is 1
+	TBZ	$0, R0, two
+	MOVD.P	8(R1), R4	// do it once
+	ADCS	$0, R4
+	MOVD.P	R4, 8(R3)
+	SUB	$1, R0
+two:				// do it twice
+	TBZ	$1, R0, loop
+	LDP.P	16(R1), (R4, R5)
+	ADCS	$0, R4, R8	// c, z[i] = x[i] + c
+	ADCS	$0, R5, R9
+	STP.P	(R8, R9), 16(R3)
+	SUB	$2, R0
+loop:				// do four times per round
+	vwOneIter(ADCS, R0, len1)
+	B	loop
+len1:
+	CSET	HS, R2		// extract carry flag
+len0:
+	MOVD	R2, c+56(FP)
+done:
+	RET
+large:
+	AND	$0x3, R0, R10
+	AND	$~0x3, R0
+	// unrolling for the first 1~4 elements to avoid saving the carry
+	// flag in each step, adjust $R0 if we unrolled 4 elements
+	vwPreIter(ADDS, ADCS, R10, add4)
+	SUB	$4, R0
+add4:
+	BCC	copy
+	vwOneIter(ADCS, R0, len1)
+	B	add4
+copy:
+	MOVD	ZR, c+56(FP)
+	CMP	R1, R3
+	BEQ	done
+copy_4:				// no carry flag, copy the rest
+	vwOneIterCopy(R0, done)
+	B	copy_4
+
+// func subVW(z, x []Word, y Word) (c Word)
+// The 'large' branch handles large 'z'. It checks the carry flag on every iteration
+// and switches to copy if we are done with carries. The copying is skipped as well
+// if 'x' and 'z' happen to share the same underlying storage.
+// The overhead of the checking and branching is visible when 'z' are small (~5%),
+// so set a threshold of 32, and remain the small-sized part entirely untouched.
+TEXT ·subVW(SB),NOSPLIT,$0
+	MOVD	z+0(FP), R3
+	MOVD	z_len+8(FP), R0
+	MOVD	x+24(FP), R1
+	MOVD	y+48(FP), R2
+	CMP	$32, R0
+	BGE	large		// large-sized 'z' and 'x'
+	CBZ	R0, len0	// the length of z is 0
+	MOVD.P	8(R1), R4
+	SUBS	R2, R4		// z[0] = x[0] - y, set carry
+	MOVD.P	R4, 8(R3)
+	SUB	$1, R0
+	CBZ	R0, len1	// the length of z is 1
+	TBZ	$0, R0, two	// do it once
+	MOVD.P	8(R1), R4
+	SBCS	$0, R4
+	MOVD.P	R4, 8(R3)
+	SUB	$1, R0
+two:				// do it twice
+	TBZ	$1, R0, loop
+	LDP.P	16(R1), (R4, R5)
+	SBCS	$0, R4, R8	// c, z[i] = x[i] + c
+	SBCS	$0, R5, R9
+	STP.P	(R8, R9), 16(R3)
+	SUB	$2, R0
+loop:				// do four times per round
+	vwOneIter(SBCS, R0, len1)
+	B	loop
+len1:
+	CSET	LO, R2		// extract carry flag
+len0:
+	MOVD	R2, c+56(FP)
+done:
+	RET
+large:
+	AND	$0x3, R0, R10
+	AND	$~0x3, R0
+	// unrolling for the first 1~4 elements to avoid saving the carry
+	// flag in each step, adjust $R0 if we unrolled 4 elements
+	vwPreIter(SUBS, SBCS, R10, sub4)
+	SUB	$4, R0
+sub4:
+	BCS	copy
+	vwOneIter(SBCS, R0, len1)
+	B	sub4
+copy:
+	MOVD	ZR, c+56(FP)
+	CMP	R1, R3
+	BEQ	done
+copy_4:				// no carry flag, copy the rest
+	vwOneIterCopy(R0, done)
+	B	copy_4
+
+// func shlVU(z, x []Word, s uint) (c Word)
+// This implementation handles the shift operation from the high word to the low word,
+// which may be an error for the case where the low word of x overlaps with the high
+// word of z. When calling this function directly, you need to pay attention to this
+// situation.
+TEXT ·shlVU(SB),NOSPLIT,$0
+	LDP	z+0(FP), (R0, R1)	// R0 = z.ptr, R1 = len(z)
+	MOVD	x+24(FP), R2
+	MOVD	s+48(FP), R3
+	ADD	R1<<3, R0	// R0 = &z[n]
+	ADD	R1<<3, R2	// R2 = &x[n]
+	CBZ	R1, len0
+	CBZ	R3, copy	// if the number of shift is 0, just copy x to z
+	MOVD	$64, R4
+	SUB	R3, R4
+	// handling the most significant element x[n-1]
+	MOVD.W	-8(R2), R6
+	LSR	R4, R6, R5	// return value
+	LSL	R3, R6, R8	// x[i] << s
+	SUB	$1, R1
+one:	TBZ	$0, R1, two
+	MOVD.W	-8(R2), R6
+	LSR	R4, R6, R7
+	ORR	R8, R7
+	LSL	R3, R6, R8
+	SUB	$1, R1
+	MOVD.W	R7, -8(R0)
+two:
+	TBZ	$1, R1, loop
+	LDP.W	-16(R2), (R6, R7)
+	LSR	R4, R7, R10
+	ORR	R8, R10
+	LSL	R3, R7
+	LSR	R4, R6, R9
+	ORR	R7, R9
+	LSL	R3, R6, R8
+	SUB	$2, R1
+	STP.W	(R9, R10), -16(R0)
+loop:
+	CBZ	R1, done
+	LDP.W	-32(R2), (R10, R11)
+	LDP	16(R2), (R12, R13)
+	LSR	R4, R13, R23
+	ORR	R8, R23		// z[i] = (x[i] << s) | (x[i-1] >> (64 - s))
+	LSL	R3, R13
+	LSR	R4, R12, R22
+	ORR	R13, R22
+	LSL	R3, R12
+	LSR	R4, R11, R21
+	ORR	R12, R21
+	LSL	R3, R11
+	LSR	R4, R10, R20
+	ORR	R11, R20
+	LSL	R3, R10, R8
+	STP.W	(R20, R21), -32(R0)
+	STP	(R22, R23), 16(R0)
+	SUB	$4, R1
+	B	loop
+done:
+	MOVD.W	R8, -8(R0)	// the first element x[0]
+	MOVD	R5, c+56(FP)	// the part moved out from x[n-1]
+	RET
+copy:
+	CMP	R0, R2
+	BEQ	len0
+	TBZ	$0, R1, ctwo
+	MOVD.W	-8(R2), R4
+	MOVD.W	R4, -8(R0)
+	SUB	$1, R1
+ctwo:
+	TBZ	$1, R1, cloop
+	LDP.W	-16(R2), (R4, R5)
+	STP.W	(R4, R5), -16(R0)
+	SUB	$2, R1
+cloop:
+	CBZ	R1, len0
+	LDP.W	-32(R2), (R4, R5)
+	LDP	16(R2), (R6, R7)
+	STP.W	(R4, R5), -32(R0)
+	STP	(R6, R7), 16(R0)
+	SUB	$4, R1
+	B	cloop
+len0:
+	MOVD	$0, c+56(FP)
+	RET
+
+// func shrVU(z, x []Word, s uint) (c Word)
+// This implementation handles the shift operation from the low word to the high word,
+// which may be an error for the case where the high word of x overlaps with the low
+// word of z. When calling this function directly, you need to pay attention to this
+// situation.
+TEXT ·shrVU(SB),NOSPLIT,$0
+	MOVD	z+0(FP), R0
+	MOVD	z_len+8(FP), R1
+	MOVD	x+24(FP), R2
+	MOVD	s+48(FP), R3
+	MOVD	$0, R8
+	MOVD	$64, R4
+	SUB	R3, R4
+	CBZ	R1, len0
+	CBZ	R3, copy	// if the number of shift is 0, just copy x to z
+
+	MOVD.P	8(R2), R20
+	LSR	R3, R20, R8
+	LSL	R4, R20
+	MOVD	R20, c+56(FP)	// deal with the first element
+	SUB	$1, R1
+
+	TBZ	$0, R1, two
+	MOVD.P	8(R2), R6
+	LSL	R4, R6, R20
+	ORR	R8, R20
+	LSR	R3, R6, R8
+	MOVD.P	R20, 8(R0)
+	SUB	$1, R1
+two:
+	TBZ	$1, R1, loop
+	LDP.P	16(R2), (R6, R7)
+	LSL	R4, R6, R20
+	LSR	R3, R6
+	ORR	R8, R20
+	LSL	R4, R7, R21
+	LSR	R3, R7, R8
+	ORR	R6, R21
+	STP.P	(R20, R21), 16(R0)
+	SUB	$2, R1
+loop:
+	CBZ	R1, done
+	LDP.P	32(R2), (R10, R11)
+	LDP	-16(R2), (R12, R13)
+	LSL	R4, R10, R20
+	LSR	R3, R10
+	ORR	R8, R20		// z[i] = (x[i] >> s) | (x[i+1] << (64 - s))
+	LSL	R4, R11, R21
+	LSR	R3, R11
+	ORR	R10, R21
+	LSL	R4, R12, R22
+	LSR	R3, R12
+	ORR	R11, R22
+	LSL	R4, R13, R23
+	LSR	R3, R13, R8
+	ORR	R12, R23
+	STP.P	(R20, R21), 32(R0)
+	STP	(R22, R23), -16(R0)
+	SUB	$4, R1
+	B	loop
+done:
+	MOVD	R8, (R0)	// deal with the last element
+	RET
+copy:
+	CMP	R0, R2
+	BEQ	len0
+	TBZ	$0, R1, ctwo
+	MOVD.P	8(R2), R3
+	MOVD.P	R3, 8(R0)
+	SUB	$1, R1
+ctwo:
+	TBZ	$1, R1, cloop
+	LDP.P	16(R2), (R4, R5)
+	STP.P	(R4, R5), 16(R0)
+	SUB	$2, R1
+cloop:
+	CBZ	R1, len0
+	LDP.P	32(R2), (R4, R5)
+	LDP	-16(R2), (R6, R7)
+	STP.P	(R4, R5), 32(R0)
+	STP	(R6, R7), -16(R0)
+	SUB	$4, R1
+	B	cloop
+len0:
+	MOVD	$0, c+56(FP)
+	RET
+
+
+// func mulAddVWW(z, x []Word, y, r Word) (c Word)
+TEXT ·mulAddVWW(SB),NOSPLIT,$0
+	MOVD	z+0(FP), R1
+	MOVD	z_len+8(FP), R0
+	MOVD	x+24(FP), R2
+	MOVD	y+48(FP), R3
+	MOVD	r+56(FP), R4
+	// c, z = x * y + r
+	TBZ	$0, R0, two
+	MOVD.P	8(R2), R5
+	MUL	R3, R5, R7
+	UMULH	R3, R5, R8
+	ADDS	R4, R7
+	ADC	$0, R8, R4	// c, z[i] = x[i] * y +  r
+	MOVD.P	R7, 8(R1)
+	SUB	$1, R0
+two:
+	TBZ	$1, R0, loop
+	LDP.P	16(R2), (R5, R6)
+	MUL	R3, R5, R10
+	UMULH	R3, R5, R11
+	ADDS	R4, R10
+	MUL	R3, R6, R12
+	UMULH	R3, R6, R13
+	ADCS	R12, R11
+	ADC	$0, R13, R4
+
+	STP.P	(R10, R11), 16(R1)
+	SUB	$2, R0
+loop:
+	CBZ	R0, done
+	LDP.P	32(R2), (R5, R6)
+	LDP	-16(R2), (R7, R8)
+
+	MUL	R3, R5, R10
+	UMULH	R3, R5, R11
+	ADDS	R4, R10
+	MUL	R3, R6, R12
+	UMULH	R3, R6, R13
+	ADCS	R11, R12
+
+	MUL	R3, R7, R14
+	UMULH	R3, R7, R15
+	ADCS	R13, R14
+	MUL	R3, R8, R16
+	UMULH	R3, R8, R17
+	ADCS	R15, R16
+	ADC	$0, R17, R4
+
+	STP.P	(R10, R12), 32(R1)
+	STP	(R14, R16), -16(R1)
+	SUB	$4, R0
+	B	loop
+done:
+	MOVD	R4, c+64(FP)
+	RET
+
+
+// func addMulVVW(z, x []Word, y Word) (c Word)
+TEXT ·addMulVVW(SB),NOSPLIT,$0
+	MOVD	z+0(FP), R1
+	MOVD	z_len+8(FP), R0
+	MOVD	x+24(FP), R2
+	MOVD	y+48(FP), R3
+	MOVD	$0, R4
+
+	TBZ	$0, R0, two
+
+	MOVD.P	8(R2), R5
+	MOVD	(R1), R6
+
+	MUL	R5, R3, R7
+	UMULH	R5, R3, R8
+
+	ADDS	R7, R6
+	ADC	$0, R8, R4
+
+	MOVD.P	R6, 8(R1)
+	SUB	$1, R0
+
+two:
+	TBZ	$1, R0, loop
+
+	LDP.P	16(R2), (R5, R10)
+	LDP	(R1), (R6, R11)
+
+	MUL	R10, R3, R13
+	UMULH	R10, R3, R12
+
+	MUL	R5, R3, R7
+	UMULH	R5, R3, R8
+
+	ADDS	R4, R6
+	ADCS	R13, R11
+	ADC	$0, R12
+
+	ADDS	R7, R6
+	ADCS	R8, R11
+	ADC	$0, R12, R4
+
+	STP.P	(R6, R11), 16(R1)
+	SUB	$2, R0
+
+// The main loop of this code operates on a block of 4 words every iteration
+// performing [R4:R12:R11:R10:R9] = R4 + R3 * [R8:R7:R6:R5] + [R12:R11:R10:R9]
+// where R4 is carried from the previous iteration, R8:R7:R6:R5 hold the next
+// 4 words of x, R3 is y and R12:R11:R10:R9 are part of the result z.
+loop:
+	CBZ	R0, done
+
+	LDP.P	16(R2), (R5, R6)
+	LDP.P	16(R2), (R7, R8)
+
+	LDP	(R1), (R9, R10)
+	ADDS	R4, R9
+	MUL	R6, R3, R14
+	ADCS	R14, R10
+	MUL	R7, R3, R15
+	LDP	16(R1), (R11, R12)
+	ADCS	R15, R11
+	MUL	R8, R3, R16
+	ADCS	R16, R12
+	UMULH	R8, R3, R20
+	ADC	$0, R20
+
+	MUL	R5, R3, R13
+	ADDS	R13, R9
+	UMULH	R5, R3, R17
+	ADCS	R17, R10
+	UMULH	R6, R3, R21
+	STP.P	(R9, R10), 16(R1)
+	ADCS	R21, R11
+	UMULH	R7, R3, R19
+	ADCS	R19, R12
+	STP.P	(R11, R12), 16(R1)
+	ADC	$0, R20, R4
+
+	SUB	$4, R0
+	B	loop
+
+done:
+	MOVD	R4, c+56(FP)
+	RET
+
+
diff --git a/src/math/big/arith_decl.go b/src/math/big/arith_decl.go
new file mode 100644
index 0000000..9b254f2
--- /dev/null
+++ b/src/math/big/arith_decl.go
@@ -0,0 +1,34 @@
+// Copyright 2010 The Go Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+//go:build !math_big_pure_go
+// +build !math_big_pure_go
+
+package big
+
+// implemented in arith_$GOARCH.s
+
+//go:noescape
+func addVV(z, x, y []Word) (c Word)
+
+//go:noescape
+func subVV(z, x, y []Word) (c Word)
+
+//go:noescape
+func addVW(z, x []Word, y Word) (c Word)
+
+//go:noescape
+func subVW(z, x []Word, y Word) (c Word)
+
+//go:noescape
+func shlVU(z, x []Word, s uint) (c Word)
+
+//go:noescape
+func shrVU(z, x []Word, s uint) (c Word)
+
+//go:noescape
+func mulAddVWW(z, x []Word, y, r Word) (c Word)
+
+//go:noescape
+func addMulVVW(z, x []Word, y Word) (c Word)
diff --git a/src/math/big/arith_decl_pure.go b/src/math/big/arith_decl_pure.go
new file mode 100644
index 0000000..75f3ed2
--- /dev/null
+++ b/src/math/big/arith_decl_pure.go
@@ -0,0 +1,50 @@
+// Copyright 2015 The Go Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+//go:build math_big_pure_go
+// +build math_big_pure_go
+
+package big
+
+func addVV(z, x, y []Word) (c Word) {
+	return addVV_g(z, x, y)
+}
+
+func subVV(z, x, y []Word) (c Word) {
+	return subVV_g(z, x, y)
+}
+
+func addVW(z, x []Word, y Word) (c Word) {
+	// TODO: remove indirect function call when golang.org/issue/30548 is fixed
+	fn := addVW_g
+	if len(z) > 32 {
+		fn = addVWlarge
+	}
+	return fn(z, x, y)
+}
+
+func subVW(z, x []Word, y Word) (c Word) {
+	// TODO: remove indirect function call when golang.org/issue/30548 is fixed
+	fn := subVW_g
+	if len(z) > 32 {
+		fn = subVWlarge
+	}
+	return fn(z, x, y)
+}
+
+func shlVU(z, x []Word, s uint) (c Word) {
+	return shlVU_g(z, x, s)
+}
+
+func shrVU(z, x []Word, s uint) (c Word) {
+	return shrVU_g(z, x, s)
+}
+
+func mulAddVWW(z, x []Word, y, r Word) (c Word) {
+	return mulAddVWW_g(z, x, y, r)
+}
+
+func addMulVVW(z, x []Word, y Word) (c Word) {
+	return addMulVVW_g(z, x, y)
+}
diff --git a/src/math/big/arith_decl_s390x.go b/src/math/big/arith_decl_s390x.go
new file mode 100644
index 0000000..4193f32
--- /dev/null
+++ b/src/math/big/arith_decl_s390x.go
@@ -0,0 +1,19 @@
+// Copyright 2016 The Go Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+//go:build !math_big_pure_go
+// +build !math_big_pure_go
+
+package big
+
+import "internal/cpu"
+
+func addVV_check(z, x, y []Word) (c Word)
+func addVV_vec(z, x, y []Word) (c Word)
+func addVV_novec(z, x, y []Word) (c Word)
+func subVV_check(z, x, y []Word) (c Word)
+func subVV_vec(z, x, y []Word) (c Word)
+func subVV_novec(z, x, y []Word) (c Word)
+
+var hasVX = cpu.S390X.HasVX
diff --git a/src/math/big/arith_loong64.s b/src/math/big/arith_loong64.s
new file mode 100644
index 0000000..0ae3031
--- /dev/null
+++ b/src/math/big/arith_loong64.s
@@ -0,0 +1,34 @@
+// Copyright 2022 The Go Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+// +build !math_big_pure_go,loong64
+
+#include "textflag.h"
+
+// This file provides fast assembly versions for the elementary
+// arithmetic operations on vectors implemented in arith.go.
+
+TEXT ·addVV(SB),NOSPLIT,$0
+	JMP ·addVV_g(SB)
+
+TEXT ·subVV(SB),NOSPLIT,$0
+	JMP ·subVV_g(SB)
+
+TEXT ·addVW(SB),NOSPLIT,$0
+	JMP ·addVW_g(SB)
+
+TEXT ·subVW(SB),NOSPLIT,$0
+	JMP ·subVW_g(SB)
+
+TEXT ·shlVU(SB),NOSPLIT,$0
+	JMP ·shlVU_g(SB)
+
+TEXT ·shrVU(SB),NOSPLIT,$0
+	JMP ·shrVU_g(SB)
+
+TEXT ·mulAddVWW(SB),NOSPLIT,$0
+	JMP ·mulAddVWW_g(SB)
+
+TEXT ·addMulVVW(SB),NOSPLIT,$0
+	JMP ·addMulVVW_g(SB)
diff --git a/src/math/big/arith_mips64x.s b/src/math/big/arith_mips64x.s
new file mode 100644
index 0000000..3ee6e27
--- /dev/null
+++ b/src/math/big/arith_mips64x.s
@@ -0,0 +1,37 @@
+// Copyright 2013 The Go Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+//go:build !math_big_pure_go && (mips64 || mips64le)
+// +build !math_big_pure_go
+// +build mips64 mips64le
+
+#include "textflag.h"
+
+// This file provides fast assembly versions for the elementary
+// arithmetic operations on vectors implemented in arith.go.
+
+TEXT ·addVV(SB),NOSPLIT,$0
+	JMP ·addVV_g(SB)
+
+TEXT ·subVV(SB),NOSPLIT,$0
+	JMP ·subVV_g(SB)
+
+TEXT ·addVW(SB),NOSPLIT,$0
+	JMP ·addVW_g(SB)
+
+TEXT ·subVW(SB),NOSPLIT,$0
+	JMP ·subVW_g(SB)
+
+TEXT ·shlVU(SB),NOSPLIT,$0
+	JMP ·shlVU_g(SB)
+
+TEXT ·shrVU(SB),NOSPLIT,$0
+	JMP ·shrVU_g(SB)
+
+TEXT ·mulAddVWW(SB),NOSPLIT,$0
+	JMP ·mulAddVWW_g(SB)
+
+TEXT ·addMulVVW(SB),NOSPLIT,$0
+	JMP ·addMulVVW_g(SB)
+
diff --git a/src/math/big/arith_mipsx.s b/src/math/big/arith_mipsx.s
new file mode 100644
index 0000000..b1d3282
--- /dev/null
+++ b/src/math/big/arith_mipsx.s
@@ -0,0 +1,37 @@
+// Copyright 2016 The Go Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+//go:build !math_big_pure_go && (mips || mipsle)
+// +build !math_big_pure_go
+// +build mips mipsle
+
+#include "textflag.h"
+
+// This file provides fast assembly versions for the elementary
+// arithmetic operations on vectors implemented in arith.go.
+
+TEXT ·addVV(SB),NOSPLIT,$0
+	JMP	·addVV_g(SB)
+
+TEXT ·subVV(SB),NOSPLIT,$0
+	JMP	·subVV_g(SB)
+
+TEXT ·addVW(SB),NOSPLIT,$0
+	JMP	·addVW_g(SB)
+
+TEXT ·subVW(SB),NOSPLIT,$0
+	JMP	·subVW_g(SB)
+
+TEXT ·shlVU(SB),NOSPLIT,$0
+	JMP	·shlVU_g(SB)
+
+TEXT ·shrVU(SB),NOSPLIT,$0
+	JMP	·shrVU_g(SB)
+
+TEXT ·mulAddVWW(SB),NOSPLIT,$0
+	JMP	·mulAddVWW_g(SB)
+
+TEXT ·addMulVVW(SB),NOSPLIT,$0
+	JMP	·addMulVVW_g(SB)
+
diff --git a/src/math/big/arith_ppc64x.s b/src/math/big/arith_ppc64x.s
new file mode 100644
index 0000000..0613f5c
--- /dev/null
+++ b/src/math/big/arith_ppc64x.s
@@ -0,0 +1,633 @@
+// Copyright 2013 The Go Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+//go:build !math_big_pure_go && (ppc64 || ppc64le)
+// +build !math_big_pure_go
+// +build ppc64 ppc64le
+
+#include "textflag.h"
+
+// This file provides fast assembly versions for the elementary
+// arithmetic operations on vectors implemented in arith.go.
+
+// func addVV(z, y, y []Word) (c Word)
+// z[i] = x[i] + y[i] for all i, carrying
+TEXT ·addVV(SB), NOSPLIT, $0
+	MOVD  z_len+8(FP), R7   // R7 = z_len
+	MOVD  x+24(FP), R8      // R8 = x[]
+	MOVD  y+48(FP), R9      // R9 = y[]
+	MOVD  z+0(FP), R10      // R10 = z[]
+
+	// If z_len = 0, we are done
+	CMP   R0, R7
+	MOVD  R0, R4
+	BEQ   done
+
+	// Process the first iteration out of the loop so we can
+	// use MOVDU and avoid 3 index registers updates.
+	MOVD  0(R8), R11      // R11 = x[i]
+	MOVD  0(R9), R12      // R12 = y[i]
+	ADD   $-1, R7         // R7 = z_len - 1
+	ADDC  R12, R11, R15   // R15 = x[i] + y[i], set CA
+	CMP   R0, R7
+	MOVD  R15, 0(R10)     // z[i]
+	BEQ   final          // If z_len was 1, we are done
+
+	SRD   $2, R7, R5      // R5 = z_len/4
+	CMP   R0, R5
+	MOVD  R5, CTR         // Set up loop counter
+	BEQ   tail            // If R5 = 0, we can't use the loop
+
+	// Process 4 elements per iteration. Unrolling this loop
+	// means a performance trade-off: we will lose performance
+	// for small values of z_len (0.90x in the worst case), but
+	// gain significant performance as z_len increases (up to
+	// 1.45x).
+
+	PCALIGN $16
+loop:
+	MOVD  8(R8), R11      // R11 = x[i]
+	MOVD  16(R8), R12     // R12 = x[i+1]
+	MOVD  24(R8), R14     // R14 = x[i+2]
+	MOVDU 32(R8), R15     // R15 = x[i+3]
+	MOVD  8(R9), R16      // R16 = y[i]
+	MOVD  16(R9), R17     // R17 = y[i+1]
+	MOVD  24(R9), R18     // R18 = y[i+2]
+	MOVDU 32(R9), R19     // R19 = y[i+3]
+	ADDE  R11, R16, R20   // R20 = x[i] + y[i] + CA
+	ADDE  R12, R17, R21   // R21 = x[i+1] + y[i+1] + CA
+	ADDE  R14, R18, R22   // R22 = x[i+2] + y[i+2] + CA
+	ADDE  R15, R19, R23   // R23 = x[i+3] + y[i+3] + CA
+	MOVD  R20, 8(R10)     // z[i]
+	MOVD  R21, 16(R10)    // z[i+1]
+	MOVD  R22, 24(R10)    // z[i+2]
+	MOVDU R23, 32(R10)    // z[i+3]
+	ADD   $-4, R7         // R7 = z_len - 4
+	BC  16, 0, loop       // bdnz
+
+	// We may have more elements to read
+	CMP   R0, R7
+	BEQ   final
+
+	// Process the remaining elements, one at a time
+tail:
+	MOVDU 8(R8), R11      // R11 = x[i]
+	MOVDU 8(R9), R16      // R16 = y[i]
+	ADD   $-1, R7         // R7 = z_len - 1
+	ADDE  R11, R16, R20   // R20 = x[i] + y[i] + CA
+	CMP   R0, R7
+	MOVDU R20, 8(R10)     // z[i]
+	BEQ   final           // If R7 = 0, we are done
+
+	MOVDU 8(R8), R11
+	MOVDU 8(R9), R16
+	ADD   $-1, R7
+	ADDE  R11, R16, R20
+	CMP   R0, R7
+	MOVDU R20, 8(R10)
+	BEQ   final
+
+	MOVD  8(R8), R11
+	MOVD  8(R9), R16
+	ADDE  R11, R16, R20
+	MOVD  R20, 8(R10)
+
+final:
+	ADDZE R4              // Capture CA
+
+done:
+	MOVD  R4, c+72(FP)
+	RET
+
+// func subVV(z, x, y []Word) (c Word)
+// z[i] = x[i] - y[i] for all i, carrying
+TEXT ·subVV(SB), NOSPLIT, $0
+	MOVD  z_len+8(FP), R7 // R7 = z_len
+	MOVD  x+24(FP), R8    // R8 = x[]
+	MOVD  y+48(FP), R9    // R9 = y[]
+	MOVD  z+0(FP), R10    // R10 = z[]
+
+	// If z_len = 0, we are done
+	CMP   R0, R7
+	MOVD  R0, R4
+	BEQ   done
+
+	// Process the first iteration out of the loop so we can
+	// use MOVDU and avoid 3 index registers updates.
+	MOVD  0(R8), R11      // R11 = x[i]
+	MOVD  0(R9), R12      // R12 = y[i]
+	ADD   $-1, R7         // R7 = z_len - 1
+	SUBC  R12, R11, R15   // R15 = x[i] - y[i], set CA
+	CMP   R0, R7
+	MOVD  R15, 0(R10)     // z[i]
+	BEQ   final           // If z_len was 1, we are done
+
+	SRD   $2, R7, R5      // R5 = z_len/4
+	CMP   R0, R5
+	MOVD  R5, CTR         // Set up loop counter
+	BEQ   tail            // If R5 = 0, we can't use the loop
+
+	// Process 4 elements per iteration. Unrolling this loop
+	// means a performance trade-off: we will lose performance
+	// for small values of z_len (0.92x in the worst case), but
+	// gain significant performance as z_len increases (up to
+	// 1.45x).
+
+	PCALIGN $16
+loop:
+	MOVD  8(R8), R11      // R11 = x[i]
+	MOVD  16(R8), R12     // R12 = x[i+1]
+	MOVD  24(R8), R14     // R14 = x[i+2]
+	MOVDU 32(R8), R15     // R15 = x[i+3]
+	MOVD  8(R9), R16      // R16 = y[i]
+	MOVD  16(R9), R17     // R17 = y[i+1]
+	MOVD  24(R9), R18     // R18 = y[i+2]
+	MOVDU 32(R9), R19     // R19 = y[i+3]
+	SUBE  R16, R11, R20   // R20 = x[i] - y[i] + CA
+	SUBE  R17, R12, R21   // R21 = x[i+1] - y[i+1] + CA
+	SUBE  R18, R14, R22   // R22 = x[i+2] - y[i+2] + CA
+	SUBE  R19, R15, R23   // R23 = x[i+3] - y[i+3] + CA
+	MOVD  R20, 8(R10)     // z[i]
+	MOVD  R21, 16(R10)    // z[i+1]
+	MOVD  R22, 24(R10)    // z[i+2]
+	MOVDU R23, 32(R10)    // z[i+3]
+	ADD   $-4, R7         // R7 = z_len - 4
+	BC  16, 0, loop       // bdnz
+
+	// We may have more elements to read
+	CMP   R0, R7
+	BEQ   final
+
+	// Process the remaining elements, one at a time
+tail:
+	MOVDU 8(R8), R11      // R11 = x[i]
+	MOVDU 8(R9), R16      // R16 = y[i]
+	ADD   $-1, R7         // R7 = z_len - 1
+	SUBE  R16, R11, R20   // R20 = x[i] - y[i] + CA
+	CMP   R0, R7
+	MOVDU R20, 8(R10)     // z[i]
+	BEQ   final           // If R7 = 0, we are done
+
+	MOVDU 8(R8), R11
+	MOVDU 8(R9), R16
+	ADD   $-1, R7
+	SUBE  R16, R11, R20
+	CMP   R0, R7
+	MOVDU R20, 8(R10)
+	BEQ   final
+
+	MOVD  8(R8), R11
+	MOVD  8(R9), R16
+	SUBE  R16, R11, R20
+	MOVD  R20, 8(R10)
+
+final:
+	ADDZE R4
+	XOR   $1, R4
+
+done:
+	MOVD  R4, c+72(FP)
+	RET
+
+// func addVW(z, x []Word, y Word) (c Word)
+TEXT ·addVW(SB), NOSPLIT, $0
+	MOVD z+0(FP), R10	// R10 = z[]
+	MOVD x+24(FP), R8	// R8 = x[]
+	MOVD y+48(FP), R4	// R4 = y = c
+	MOVD z_len+8(FP), R11	// R11 = z_len
+
+	CMP   R0, R11		// If z_len is zero, return
+	BEQ   done
+
+	// We will process the first iteration out of the loop so we capture
+	// the value of c. In the subsequent iterations, we will rely on the
+	// value of CA set here.
+	MOVD  0(R8), R20	// R20 = x[i]
+	ADD   $-1, R11		// R11 = z_len - 1
+	ADDC  R20, R4, R6	// R6 = x[i] + c
+	CMP   R0, R11		// If z_len was 1, we are done
+	MOVD  R6, 0(R10)	// z[i]
+	BEQ   final
+
+	// We will read 4 elements per iteration
+	SRD   $2, R11, R9	// R9 = z_len/4
+	DCBT  (R8)
+	CMP   R0, R9
+	MOVD  R9, CTR		// Set up the loop counter
+	BEQ   tail		// If R9 = 0, we can't use the loop
+	PCALIGN $16
+
+loop:
+	MOVD  8(R8), R20	// R20 = x[i]
+	MOVD  16(R8), R21	// R21 = x[i+1]
+	MOVD  24(R8), R22	// R22 = x[i+2]
+	MOVDU 32(R8), R23	// R23 = x[i+3]
+	ADDZE R20, R24		// R24 = x[i] + CA
+	ADDZE R21, R25		// R25 = x[i+1] + CA
+	ADDZE R22, R26		// R26 = x[i+2] + CA
+	ADDZE R23, R27		// R27 = x[i+3] + CA
+	MOVD  R24, 8(R10)	// z[i]
+	MOVD  R25, 16(R10)	// z[i+1]
+	MOVD  R26, 24(R10)	// z[i+2]
+	MOVDU R27, 32(R10)	// z[i+3]
+	ADD   $-4, R11		// R11 = z_len - 4
+	BC    16, 0, loop	// bdnz
+
+	// We may have some elements to read
+	CMP R0, R11
+	BEQ final
+
+tail:
+	MOVDU 8(R8), R20
+	ADDZE R20, R24
+	ADD $-1, R11
+	MOVDU R24, 8(R10)
+	CMP R0, R11
+	BEQ final
+
+	MOVDU 8(R8), R20
+	ADDZE R20, R24
+	ADD $-1, R11
+	MOVDU R24, 8(R10)
+	CMP R0, R11
+	BEQ final
+
+	MOVD 8(R8), R20
+	ADDZE R20, R24
+	MOVD R24, 8(R10)
+
+final:
+	ADDZE R0, R4		// c = CA
+done:
+	MOVD  R4, c+56(FP)
+	RET
+
+// func subVW(z, x []Word, y Word) (c Word)
+TEXT ·subVW(SB), NOSPLIT, $0
+	MOVD  z+0(FP), R10	// R10 = z[]
+	MOVD  x+24(FP), R8	// R8 = x[]
+	MOVD  y+48(FP), R4	// R4 = y = c
+	MOVD  z_len+8(FP), R11	// R11 = z_len
+
+	CMP   R0, R11		// If z_len is zero, return
+	BEQ   done
+
+	// We will process the first iteration out of the loop so we capture
+	// the value of c. In the subsequent iterations, we will rely on the
+	// value of CA set here.
+	MOVD  0(R8), R20	// R20 = x[i]
+	ADD   $-1, R11		// R11 = z_len - 1
+	SUBC  R4, R20, R6	// R6 = x[i] - c
+	CMP   R0, R11		// If z_len was 1, we are done
+	MOVD  R6, 0(R10)	// z[i]
+	BEQ   final
+
+	// We will read 4 elements per iteration
+	SRD   $2, R11, R9	// R9 = z_len/4
+	DCBT  (R8)
+	CMP   R0, R9
+	MOVD  R9, CTR		// Set up the loop counter
+	BEQ   tail		// If R9 = 0, we can't use the loop
+
+	// The loop here is almost the same as the one used in s390x, but
+	// we don't need to capture CA every iteration because we've already
+	// done that above.
+
+	PCALIGN $16
+loop:
+	MOVD  8(R8), R20
+	MOVD  16(R8), R21
+	MOVD  24(R8), R22
+	MOVDU 32(R8), R23
+	SUBE  R0, R20
+	SUBE  R0, R21
+	SUBE  R0, R22
+	SUBE  R0, R23
+	MOVD  R20, 8(R10)
+	MOVD  R21, 16(R10)
+	MOVD  R22, 24(R10)
+	MOVDU R23, 32(R10)
+	ADD   $-4, R11
+	BC    16, 0, loop	// bdnz
+
+	// We may have some elements to read
+	CMP   R0, R11
+	BEQ   final
+
+tail:
+	MOVDU 8(R8), R20
+	SUBE  R0, R20
+	ADD   $-1, R11
+	MOVDU R20, 8(R10)
+	CMP   R0, R11
+	BEQ   final
+
+	MOVDU 8(R8), R20
+	SUBE  R0, R20
+	ADD   $-1, R11
+	MOVDU R20, 8(R10)
+	CMP   R0, R11
+	BEQ   final
+
+	MOVD  8(R8), R20
+	SUBE  R0, R20
+	MOVD  R20, 8(R10)
+
+final:
+	// Capture CA
+	SUBE  R4, R4
+	NEG   R4, R4
+
+done:
+	MOVD  R4, c+56(FP)
+	RET
+
+//func shlVU(z, x []Word, s uint) (c Word)
+TEXT ·shlVU(SB), NOSPLIT, $0
+	MOVD    z+0(FP), R3
+	MOVD    x+24(FP), R6
+	MOVD    s+48(FP), R9
+	MOVD    z_len+8(FP), R4
+	MOVD    x_len+32(FP), R7
+	CMP     R9, R0          // s==0 copy(z,x)
+	BEQ     zeroshift
+	CMP     R4, R0          // len(z)==0 return
+	BEQ     done
+
+	ADD     $-1, R4, R5     // len(z)-1
+	SUBC    R9, $64, R4     // ŝ=_W-s, we skip & by _W-1 as the caller ensures s < _W(64)
+	SLD     $3, R5, R7
+	ADD     R6, R7, R15     // save starting address &x[len(z)-1]
+	ADD     R3, R7, R16     // save starting address &z[len(z)-1]
+	MOVD    (R6)(R7), R14
+	SRD     R4, R14, R7     // compute x[len(z)-1]>>ŝ into R7
+	CMP     R5, R0          // iterate from i=len(z)-1 to 0
+	BEQ     loopexit        // Already at end?
+	MOVD	0(R15),R10	// x[i]
+	PCALIGN $16
+shloop:
+	SLD     R9, R10, R10    // x[i]<<s
+	MOVDU   -8(R15), R14
+	SRD     R4, R14, R11    // x[i-1]>>ŝ
+	OR      R11, R10, R10
+	MOVD    R10, 0(R16)     // z[i-1]=x[i]<<s | x[i-1]>>ŝ
+	MOVD	R14, R10	// reuse x[i-1] for next iteration
+	ADD     $-8, R16        // i--
+	CMP     R15, R6         // &x[i-1]>&x[0]?
+	BGT     shloop
+loopexit:
+	MOVD    0(R6), R4
+	SLD     R9, R4, R4
+	MOVD    R4, 0(R3)       // z[0]=x[0]<<s
+	MOVD    R7, c+56(FP)    // store pre-computed x[len(z)-1]>>ŝ into c
+	RET
+
+zeroshift:
+	CMP     R6, R0          // x is null, nothing to copy
+	BEQ     done
+	CMP     R6, R3          // if x is same as z, nothing to copy
+	BEQ     done
+	CMP     R7, R4
+	ISEL    $0, R7, R4, R7  // Take the lower bound of lengths of x,z
+	SLD     $3, R7, R7
+	SUB     R6, R3, R11     // dest - src
+	CMPU    R11, R7, CR2    // < len?
+	BLT     CR2, backward   // there is overlap, copy backwards
+	MOVD    $0, R14
+	// shlVU processes backwards, but added a forward copy option 
+	// since its faster on POWER
+repeat:
+	MOVD    (R6)(R14), R15  // Copy 8 bytes at a time
+	MOVD    R15, (R3)(R14)
+	ADD     $8, R14
+	CMP     R14, R7         // More 8 bytes left?
+	BLT     repeat
+	BR      done
+backward:
+	ADD     $-8,R7, R14
+repeatback:
+	MOVD    (R6)(R14), R15  // copy x into z backwards
+	MOVD    R15, (R3)(R14)  // copy 8 bytes at a time
+	SUB     $8, R14
+	CMP     R14, $-8        // More 8 bytes left?
+	BGT     repeatback
+
+done:
+	MOVD    R0, c+56(FP)    // c=0
+	RET
+
+//func shrVU(z, x []Word, s uint) (c Word)
+TEXT ·shrVU(SB), NOSPLIT, $0
+	MOVD    z+0(FP), R3
+	MOVD    x+24(FP), R6
+	MOVD    s+48(FP), R9
+	MOVD    z_len+8(FP), R4
+	MOVD    x_len+32(FP), R7
+
+	CMP     R9, R0          // s==0, copy(z,x)
+	BEQ     zeroshift
+	CMP     R4, R0          // len(z)==0 return
+	BEQ     done
+	SUBC    R9, $64, R5     // ŝ=_W-s, we skip & by _W-1 as the caller ensures s < _W(64)
+
+	MOVD    0(R6), R7
+	SLD     R5, R7, R7      // compute x[0]<<ŝ
+	MOVD    $1, R8          // iterate from i=1 to i<len(z)
+	CMP     R8, R4
+	BGE     loopexit        // Already at end?
+
+	// vectorize if len(z) is >=3, else jump to scalar loop
+	CMP     R4, $3
+	BLT     scalar
+	MTVSRD  R9, VS38        // s
+	VSPLTB  $7, V6, V4
+	MTVSRD  R5, VS39        // ŝ
+	VSPLTB  $7, V7, V2
+	ADD     $-2, R4, R16
+	PCALIGN $16
+loopback:
+	ADD     $-1, R8, R10
+	SLD     $3, R10
+	LXVD2X  (R6)(R10), VS32 // load x[i-1], x[i]
+	SLD     $3, R8, R12
+	LXVD2X  (R6)(R12), VS33 // load x[i], x[i+1]
+
+	VSRD    V0, V4, V3      // x[i-1]>>s, x[i]>>s
+	VSLD    V1, V2, V5      // x[i]<<ŝ, x[i+1]<<ŝ
+	VOR     V3, V5, V5      // Or(|) the two registers together
+	STXVD2X VS37, (R3)(R10) // store into z[i-1] and z[i]
+	ADD     $2, R8          // Done processing 2 entries, i and i+1
+	CMP     R8, R16         // Are there at least a couple of more entries left?
+	BLE     loopback
+	CMP     R8, R4          // Are we at the last element?
+	BEQ     loopexit
+scalar:	
+	ADD     $-1, R8, R10
+	SLD     $3, R10
+	MOVD    (R6)(R10),R11
+	SRD     R9, R11, R11    // x[len(z)-2] >> s
+	SLD     $3, R8, R12
+	MOVD    (R6)(R12), R12
+	SLD     R5, R12, R12    // x[len(z)-1]<<ŝ
+	OR      R12, R11, R11   // x[len(z)-2]>>s | x[len(z)-1]<<ŝ
+	MOVD    R11, (R3)(R10)  // z[len(z)-2]=x[len(z)-2]>>s | x[len(z)-1]<<ŝ
+loopexit:
+	ADD     $-1, R4
+	SLD     $3, R4
+	MOVD    (R6)(R4), R5
+	SRD     R9, R5, R5      // x[len(z)-1]>>s
+	MOVD    R5, (R3)(R4)    // z[len(z)-1]=x[len(z)-1]>>s
+	MOVD    R7, c+56(FP)    // store pre-computed x[0]<<ŝ into c
+	RET
+
+zeroshift:
+	CMP     R6, R0          // x is null, nothing to copy
+	BEQ     done
+	CMP     R6, R3          // if x is same as z, nothing to copy
+	BEQ     done
+	CMP     R7, R4
+	ISEL    $0, R7, R4, R7  // Take the lower bounds of lengths of x, z
+	SLD     $3, R7, R7
+	MOVD    $0, R14
+repeat:
+	MOVD    (R6)(R14), R15  // copy 8 bytes at a time
+	MOVD    R15, (R3)(R14)  // shrVU processes bytes only forwards
+	ADD     $8, R14
+	CMP     R14, R7         // More 8 bytes left?
+	BLT     repeat
+done:
+	MOVD    R0, c+56(FP)
+	RET
+
+// func mulAddVWW(z, x []Word, y, r Word) (c Word)
+TEXT ·mulAddVWW(SB), NOSPLIT, $0
+	MOVD    z+0(FP), R10      // R10 = z[]
+	MOVD    x+24(FP), R8      // R8 = x[]
+	MOVD    y+48(FP), R9      // R9 = y
+	MOVD    r+56(FP), R4      // R4 = r = c
+	MOVD    z_len+8(FP), R11  // R11 = z_len
+
+	CMP     R0, R11
+	BEQ     done
+
+	MOVD    0(R8), R20
+	ADD     $-1, R11
+	MULLD   R9, R20, R6       // R6 = z0 = Low-order(x[i]*y)
+	MULHDU  R9, R20, R7       // R7 = z1 = High-order(x[i]*y)
+	ADDC    R4, R6            // R6 = z0 + r
+	ADDZE   R7                // R7 = z1 + CA
+	CMP     R0, R11
+	MOVD    R7, R4            // R4 = c
+	MOVD    R6, 0(R10)        // z[i]
+	BEQ     done
+
+	// We will read 4 elements per iteration
+	SRD     $2, R11, R14      // R14 = z_len/4
+	DCBT    (R8)
+	CMP     R0, R14
+	MOVD    R14, CTR          // Set up the loop counter
+	BEQ     tail              // If R9 = 0, we can't use the loop
+	PCALIGN $16
+
+loop:
+	MOVD    8(R8), R20        // R20 = x[i]
+	MOVD    16(R8), R21       // R21 = x[i+1]
+	MOVD    24(R8), R22       // R22 = x[i+2]
+	MOVDU   32(R8), R23       // R23 = x[i+3]
+	MULLD   R9, R20, R24      // R24 = z0[i]
+	MULHDU  R9, R20, R20      // R20 = z1[i]
+	ADDC    R4, R24           // R24 = z0[i] + c
+	ADDZE   R20               // R7 = z1[i] + CA
+	MULLD   R9, R21, R25
+	MULHDU  R9, R21, R21
+	ADDC    R20, R25
+	ADDZE   R21
+	MULLD   R9, R22, R26
+	MULHDU  R9, R22, R22
+	MULLD   R9, R23, R27
+	MULHDU  R9, R23, R23
+	ADDC    R21, R26
+	ADDZE   R22
+	MOVD    R24, 8(R10)       // z[i]
+	MOVD    R25, 16(R10)      // z[i+1]
+	ADDC    R22, R27
+	ADDZE   R23,R4		  // update carry
+	MOVD    R26, 24(R10)      // z[i+2]
+	MOVDU   R27, 32(R10)      // z[i+3]
+	ADD     $-4, R11          // R11 = z_len - 4
+	BC      16, 0, loop       // bdnz
+
+	// We may have some elements to read
+	CMP   R0, R11
+	BEQ   done
+
+	// Process the remaining elements, one at a time
+tail:
+	MOVDU   8(R8), R20        // R20 = x[i]
+	MULLD   R9, R20, R24      // R24 = z0[i]
+	MULHDU  R9, R20, R25      // R25 = z1[i]
+	ADD     $-1, R11          // R11 = z_len - 1
+	ADDC    R4, R24
+	ADDZE   R25
+	MOVDU   R24, 8(R10)       // z[i]
+	CMP     R0, R11
+	MOVD    R25, R4           // R4 = c
+	BEQ     done              // If R11 = 0, we are done
+
+	MOVDU   8(R8), R20
+	MULLD   R9, R20, R24
+	MULHDU  R9, R20, R25
+	ADD     $-1, R11
+	ADDC    R4, R24
+	ADDZE   R25
+	MOVDU   R24, 8(R10)
+	CMP     R0, R11
+	MOVD    R25, R4
+	BEQ     done
+
+	MOVD    8(R8), R20
+	MULLD   R9, R20, R24
+	MULHDU  R9, R20, R25
+	ADD     $-1, R11
+	ADDC    R4, R24
+	ADDZE   R25
+	MOVD    R24, 8(R10)
+	MOVD    R25, R4
+
+done:
+	MOVD    R4, c+64(FP)
+	RET
+
+// func addMulVVW(z, x []Word, y Word) (c Word)
+TEXT ·addMulVVW(SB), NOSPLIT, $0
+	MOVD z+0(FP), R10	// R10 = z[]
+	MOVD x+24(FP), R8	// R8 = x[]
+	MOVD y+48(FP), R9	// R9 = y
+	MOVD z_len+8(FP), R22	// R22 = z_len
+
+	MOVD R0, R3		// R3 will be the index register
+	CMP  R0, R22
+	MOVD R0, R4		// R4 = c = 0
+	MOVD R22, CTR		// Initialize loop counter
+	BEQ  done
+	PCALIGN $16
+
+loop:
+	MOVD  (R8)(R3), R20	// Load x[i]
+	MOVD  (R10)(R3), R21	// Load z[i]
+	MULLD  R9, R20, R6	// R6 = Low-order(x[i]*y)
+	MULHDU R9, R20, R7	// R7 = High-order(x[i]*y)
+	ADDC   R21, R6		// R6 = z0
+	ADDZE  R7		// R7 = z1
+	ADDC   R4, R6		// R6 = z0 + c + 0
+	ADDZE  R7, R4           // c += z1
+	MOVD   R6, (R10)(R3)	// Store z[i]
+	ADD    $8, R3
+	BC  16, 0, loop		// bdnz
+
+done:
+	MOVD R4, c+56(FP)
+	RET
+
+
diff --git a/src/math/big/arith_riscv64.s b/src/math/big/arith_riscv64.s
new file mode 100644
index 0000000..cb9ac18
--- /dev/null
+++ b/src/math/big/arith_riscv64.s
@@ -0,0 +1,36 @@
+// Copyright 2020 The Go Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+//go:build !math_big_pure_go && riscv64
+// +build !math_big_pure_go,riscv64
+
+#include "textflag.h"
+
+// This file provides fast assembly versions for the elementary
+// arithmetic operations on vectors implemented in arith.go.
+
+TEXT ·addVV(SB),NOSPLIT,$0
+	JMP ·addVV_g(SB)
+
+TEXT ·subVV(SB),NOSPLIT,$0
+	JMP ·subVV_g(SB)
+
+TEXT ·addVW(SB),NOSPLIT,$0
+	JMP ·addVW_g(SB)
+
+TEXT ·subVW(SB),NOSPLIT,$0
+	JMP ·subVW_g(SB)
+
+TEXT ·shlVU(SB),NOSPLIT,$0
+	JMP ·shlVU_g(SB)
+
+TEXT ·shrVU(SB),NOSPLIT,$0
+	JMP ·shrVU_g(SB)
+
+TEXT ·mulAddVWW(SB),NOSPLIT,$0
+	JMP ·mulAddVWW_g(SB)
+
+TEXT ·addMulVVW(SB),NOSPLIT,$0
+	JMP ·addMulVVW_g(SB)
+
diff --git a/src/math/big/arith_s390x.s b/src/math/big/arith_s390x.s
new file mode 100644
index 0000000..aa6590e
--- /dev/null
+++ b/src/math/big/arith_s390x.s
@@ -0,0 +1,787 @@
+// Copyright 2016 The Go Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+//go:build !math_big_pure_go
+// +build !math_big_pure_go
+
+#include "textflag.h"
+
+// This file provides fast assembly versions for the elementary
+// arithmetic operations on vectors implemented in arith.go.
+
+// DI = R3, CX = R4, SI = r10, r8 = r8, r9=r9, r10 = r2, r11 = r5, r12 = r6, r13 = r7, r14 = r1 (R0 set to 0) + use R11
+// func addVV(z, x, y []Word) (c Word)
+
+TEXT ·addVV(SB), NOSPLIT, $0
+	MOVD addvectorfacility+0x00(SB), R1
+	BR   (R1)
+
+TEXT ·addVV_check(SB), NOSPLIT, $0
+	MOVB   ·hasVX(SB), R1
+	CMPBEQ R1, $1, vectorimpl              // vectorfacility = 1, vector supported
+	MOVD   $addvectorfacility+0x00(SB), R1
+	MOVD   $·addVV_novec(SB), R2
+	MOVD   R2, 0(R1)
+
+	// MOVD	$·addVV_novec(SB), 0(R1)
+	BR ·addVV_novec(SB)
+
+vectorimpl:
+	MOVD $addvectorfacility+0x00(SB), R1
+	MOVD $·addVV_vec(SB), R2
+	MOVD R2, 0(R1)
+
+	// MOVD	$·addVV_vec(SB), 0(R1)
+	BR ·addVV_vec(SB)
+
+GLOBL addvectorfacility+0x00(SB), NOPTR, $8
+DATA addvectorfacility+0x00(SB)/8, $·addVV_check(SB)
+
+TEXT ·addVV_vec(SB), NOSPLIT, $0
+	MOVD z_len+8(FP), R3
+	MOVD x+24(FP), R8
+	MOVD y+48(FP), R9
+	MOVD z+0(FP), R2
+
+	MOVD $0, R4  // c = 0
+	MOVD $0, R0  // make sure it's zero
+	MOVD $0, R10 // i = 0
+
+	// s/JL/JMP/ below to disable the unrolled loop
+	SUB $4, R3
+	BLT v1
+	SUB $12, R3 // n -= 16
+	BLT A1      // if n < 0 goto A1
+
+	MOVD R8, R5
+	MOVD R9, R6
+	MOVD R2, R7
+
+	// n >= 0
+	// regular loop body unrolled 16x
+	VZERO V0 // c = 0
+
+UU1:
+	VLM  0(R5), V1, V4    // 64-bytes into V1..V8
+	ADD  $64, R5
+	VPDI $0x4, V1, V1, V1 // flip the doublewords to big-endian order
+	VPDI $0x4, V2, V2, V2 // flip the doublewords to big-endian order
+
+	VLM  0(R6), V9, V12      // 64-bytes into V9..V16
+	ADD  $64, R6
+	VPDI $0x4, V9, V9, V9    // flip the doublewords to big-endian order
+	VPDI $0x4, V10, V10, V10 // flip the doublewords to big-endian order
+
+	VACCCQ V1, V9, V0, V25
+	VACQ   V1, V9, V0, V17
+	VACCCQ V2, V10, V25, V26
+	VACQ   V2, V10, V25, V18
+
+	VLM 0(R5), V5, V6   // 32-bytes into V1..V8
+	VLM 0(R6), V13, V14 // 32-bytes into V9..V16
+	ADD $32, R5
+	ADD $32, R6
+
+	VPDI $0x4, V3, V3, V3    // flip the doublewords to big-endian order
+	VPDI $0x4, V4, V4, V4    // flip the doublewords to big-endian order
+	VPDI $0x4, V11, V11, V11 // flip the doublewords to big-endian order
+	VPDI $0x4, V12, V12, V12 // flip the doublewords to big-endian order
+
+	VACCCQ V3, V11, V26, V27
+	VACQ   V3, V11, V26, V19
+	VACCCQ V4, V12, V27, V28
+	VACQ   V4, V12, V27, V20
+
+	VLM 0(R5), V7, V8   // 32-bytes into V1..V8
+	VLM 0(R6), V15, V16 // 32-bytes into V9..V16
+	ADD $32, R5
+	ADD $32, R6
+
+	VPDI $0x4, V5, V5, V5    // flip the doublewords to big-endian order
+	VPDI $0x4, V6, V6, V6    // flip the doublewords to big-endian order
+	VPDI $0x4, V13, V13, V13 // flip the doublewords to big-endian order
+	VPDI $0x4, V14, V14, V14 // flip the doublewords to big-endian order
+
+	VACCCQ V5, V13, V28, V29
+	VACQ   V5, V13, V28, V21
+	VACCCQ V6, V14, V29, V30
+	VACQ   V6, V14, V29, V22
+
+	VPDI $0x4, V7, V7, V7    // flip the doublewords to big-endian order
+	VPDI $0x4, V8, V8, V8    // flip the doublewords to big-endian order
+	VPDI $0x4, V15, V15, V15 // flip the doublewords to big-endian order
+	VPDI $0x4, V16, V16, V16 // flip the doublewords to big-endian order
+
+	VACCCQ V7, V15, V30, V31
+	VACQ   V7, V15, V30, V23
+	VACCCQ V8, V16, V31, V0  // V0 has carry-over
+	VACQ   V8, V16, V31, V24
+
+	VPDI  $0x4, V17, V17, V17 // flip the doublewords to big-endian order
+	VPDI  $0x4, V18, V18, V18 // flip the doublewords to big-endian order
+	VPDI  $0x4, V19, V19, V19 // flip the doublewords to big-endian order
+	VPDI  $0x4, V20, V20, V20 // flip the doublewords to big-endian order
+	VPDI  $0x4, V21, V21, V21 // flip the doublewords to big-endian order
+	VPDI  $0x4, V22, V22, V22 // flip the doublewords to big-endian order
+	VPDI  $0x4, V23, V23, V23 // flip the doublewords to big-endian order
+	VPDI  $0x4, V24, V24, V24 // flip the doublewords to big-endian order
+	VSTM  V17, V24, 0(R7)     // 128-bytes into z
+	ADD   $128, R7
+	ADD   $128, R10           // i += 16
+	SUB   $16, R3             // n -= 16
+	BGE   UU1                 // if n >= 0 goto U1
+	VLGVG $1, V0, R4          // put cf into R4
+	NEG   R4, R4              // save cf
+
+A1:
+	ADD $12, R3 // n += 16
+
+	// s/JL/JMP/ below to disable the unrolled loop
+	BLT v1 // if n < 0 goto v1
+
+U1:  // n >= 0
+	// regular loop body unrolled 4x
+	MOVD 0(R8)(R10*1), R5
+	MOVD 8(R8)(R10*1), R6
+	MOVD 16(R8)(R10*1), R7
+	MOVD 24(R8)(R10*1), R1
+	ADDC R4, R4             // restore CF
+	MOVD 0(R9)(R10*1), R11
+	ADDE R11, R5
+	MOVD 8(R9)(R10*1), R11
+	ADDE R11, R6
+	MOVD 16(R9)(R10*1), R11
+	ADDE R11, R7
+	MOVD 24(R9)(R10*1), R11
+	ADDE R11, R1
+	MOVD R0, R4
+	ADDE R4, R4             // save CF
+	NEG  R4, R4
+	MOVD R5, 0(R2)(R10*1)
+	MOVD R6, 8(R2)(R10*1)
+	MOVD R7, 16(R2)(R10*1)
+	MOVD R1, 24(R2)(R10*1)
+
+	ADD $32, R10 // i += 4
+	SUB $4, R3   // n -= 4
+	BGE U1       // if n >= 0 goto U1
+
+v1:
+	ADD $4, R3 // n += 4
+	BLE E1     // if n <= 0 goto E1
+
+L1:  // n > 0
+	ADDC R4, R4            // restore CF
+	MOVD 0(R8)(R10*1), R5
+	MOVD 0(R9)(R10*1), R11
+	ADDE R11, R5
+	MOVD R5, 0(R2)(R10*1)
+	MOVD R0, R4
+	ADDE R4, R4            // save CF
+	NEG  R4, R4
+
+	ADD $8, R10 // i++
+	SUB $1, R3  // n--
+	BGT L1      // if n > 0 goto L1
+
+E1:
+	NEG  R4, R4
+	MOVD R4, c+72(FP) // return c
+	RET
+
+TEXT ·addVV_novec(SB), NOSPLIT, $0
+novec:
+	MOVD z_len+8(FP), R3
+	MOVD x+24(FP), R8
+	MOVD y+48(FP), R9
+	MOVD z+0(FP), R2
+
+	MOVD $0, R4  // c = 0
+	MOVD $0, R0  // make sure it's zero
+	MOVD $0, R10 // i = 0
+
+	// s/JL/JMP/ below to disable the unrolled loop
+	SUB $4, R3 // n -= 4
+	BLT v1n    // if n < 0 goto v1n
+
+U1n:  // n >= 0
+	// regular loop body unrolled 4x
+	MOVD 0(R8)(R10*1), R5
+	MOVD 8(R8)(R10*1), R6
+	MOVD 16(R8)(R10*1), R7
+	MOVD 24(R8)(R10*1), R1
+	ADDC R4, R4             // restore CF
+	MOVD 0(R9)(R10*1), R11
+	ADDE R11, R5
+	MOVD 8(R9)(R10*1), R11
+	ADDE R11, R6
+	MOVD 16(R9)(R10*1), R11
+	ADDE R11, R7
+	MOVD 24(R9)(R10*1), R11
+	ADDE R11, R1
+	MOVD R0, R4
+	ADDE R4, R4             // save CF
+	NEG  R4, R4
+	MOVD R5, 0(R2)(R10*1)
+	MOVD R6, 8(R2)(R10*1)
+	MOVD R7, 16(R2)(R10*1)
+	MOVD R1, 24(R2)(R10*1)
+
+	ADD $32, R10 // i += 4
+	SUB $4, R3   // n -= 4
+	BGE U1n      // if n >= 0 goto U1n
+
+v1n:
+	ADD $4, R3 // n += 4
+	BLE E1n    // if n <= 0 goto E1n
+
+L1n:  // n > 0
+	ADDC R4, R4            // restore CF
+	MOVD 0(R8)(R10*1), R5
+	MOVD 0(R9)(R10*1), R11
+	ADDE R11, R5
+	MOVD R5, 0(R2)(R10*1)
+	MOVD R0, R4
+	ADDE R4, R4            // save CF
+	NEG  R4, R4
+
+	ADD $8, R10 // i++
+	SUB $1, R3  // n--
+	BGT L1n     // if n > 0 goto L1n
+
+E1n:
+	NEG  R4, R4
+	MOVD R4, c+72(FP) // return c
+	RET
+
+TEXT ·subVV(SB), NOSPLIT, $0
+	MOVD subvectorfacility+0x00(SB), R1
+	BR   (R1)
+
+TEXT ·subVV_check(SB), NOSPLIT, $0
+	MOVB   ·hasVX(SB), R1
+	CMPBEQ R1, $1, vectorimpl              // vectorfacility = 1, vector supported
+	MOVD   $subvectorfacility+0x00(SB), R1
+	MOVD   $·subVV_novec(SB), R2
+	MOVD   R2, 0(R1)
+
+	// MOVD	$·subVV_novec(SB), 0(R1)
+	BR ·subVV_novec(SB)
+
+vectorimpl:
+	MOVD $subvectorfacility+0x00(SB), R1
+	MOVD $·subVV_vec(SB), R2
+	MOVD R2, 0(R1)
+
+	// MOVD	$·subVV_vec(SB), 0(R1)
+	BR ·subVV_vec(SB)
+
+GLOBL subvectorfacility+0x00(SB), NOPTR, $8
+DATA subvectorfacility+0x00(SB)/8, $·subVV_check(SB)
+
+// DI = R3, CX = R4, SI = r10, r8 = r8, r9=r9, r10 = r2, r11 = r5, r12 = r6, r13 = r7, r14 = r1 (R0 set to 0) + use R11
+// func subVV(z, x, y []Word) (c Word)
+// (same as addVV except for SUBC/SUBE instead of ADDC/ADDE and label names)
+TEXT ·subVV_vec(SB), NOSPLIT, $0
+	MOVD z_len+8(FP), R3
+	MOVD x+24(FP), R8
+	MOVD y+48(FP), R9
+	MOVD z+0(FP), R2
+	MOVD $0, R4          // c = 0
+	MOVD $0, R0          // make sure it's zero
+	MOVD $0, R10         // i = 0
+
+	// s/JL/JMP/ below to disable the unrolled loop
+	SUB $4, R3  // n -= 4
+	BLT v1      // if n < 0 goto v1
+	SUB $12, R3 // n -= 16
+	BLT A1      // if n < 0 goto A1
+
+	MOVD R8, R5
+	MOVD R9, R6
+	MOVD R2, R7
+
+	// n >= 0
+	// regular loop body unrolled 16x
+	VZERO V0         // cf = 0
+	MOVD  $1, R4     // for 390 subtraction cf starts as 1 (no borrow)
+	VLVGG $1, R4, V0 // put carry into V0
+
+UU1:
+	VLM  0(R5), V1, V4    // 64-bytes into V1..V8
+	ADD  $64, R5
+	VPDI $0x4, V1, V1, V1 // flip the doublewords to big-endian order
+	VPDI $0x4, V2, V2, V2 // flip the doublewords to big-endian order
+
+	VLM  0(R6), V9, V12      // 64-bytes into V9..V16
+	ADD  $64, R6
+	VPDI $0x4, V9, V9, V9    // flip the doublewords to big-endian order
+	VPDI $0x4, V10, V10, V10 // flip the doublewords to big-endian order
+
+	VSBCBIQ V1, V9, V0, V25
+	VSBIQ   V1, V9, V0, V17
+	VSBCBIQ V2, V10, V25, V26
+	VSBIQ   V2, V10, V25, V18
+
+	VLM 0(R5), V5, V6   // 32-bytes into V1..V8
+	VLM 0(R6), V13, V14 // 32-bytes into V9..V16
+	ADD $32, R5
+	ADD $32, R6
+
+	VPDI $0x4, V3, V3, V3    // flip the doublewords to big-endian order
+	VPDI $0x4, V4, V4, V4    // flip the doublewords to big-endian order
+	VPDI $0x4, V11, V11, V11 // flip the doublewords to big-endian order
+	VPDI $0x4, V12, V12, V12 // flip the doublewords to big-endian order
+
+	VSBCBIQ V3, V11, V26, V27
+	VSBIQ   V3, V11, V26, V19
+	VSBCBIQ V4, V12, V27, V28
+	VSBIQ   V4, V12, V27, V20
+
+	VLM 0(R5), V7, V8   // 32-bytes into V1..V8
+	VLM 0(R6), V15, V16 // 32-bytes into V9..V16
+	ADD $32, R5
+	ADD $32, R6
+
+	VPDI $0x4, V5, V5, V5    // flip the doublewords to big-endian order
+	VPDI $0x4, V6, V6, V6    // flip the doublewords to big-endian order
+	VPDI $0x4, V13, V13, V13 // flip the doublewords to big-endian order
+	VPDI $0x4, V14, V14, V14 // flip the doublewords to big-endian order
+
+	VSBCBIQ V5, V13, V28, V29
+	VSBIQ   V5, V13, V28, V21
+	VSBCBIQ V6, V14, V29, V30
+	VSBIQ   V6, V14, V29, V22
+
+	VPDI $0x4, V7, V7, V7    // flip the doublewords to big-endian order
+	VPDI $0x4, V8, V8, V8    // flip the doublewords to big-endian order
+	VPDI $0x4, V15, V15, V15 // flip the doublewords to big-endian order
+	VPDI $0x4, V16, V16, V16 // flip the doublewords to big-endian order
+
+	VSBCBIQ V7, V15, V30, V31
+	VSBIQ   V7, V15, V30, V23
+	VSBCBIQ V8, V16, V31, V0  // V0 has carry-over
+	VSBIQ   V8, V16, V31, V24
+
+	VPDI  $0x4, V17, V17, V17 // flip the doublewords to big-endian order
+	VPDI  $0x4, V18, V18, V18 // flip the doublewords to big-endian order
+	VPDI  $0x4, V19, V19, V19 // flip the doublewords to big-endian order
+	VPDI  $0x4, V20, V20, V20 // flip the doublewords to big-endian order
+	VPDI  $0x4, V21, V21, V21 // flip the doublewords to big-endian order
+	VPDI  $0x4, V22, V22, V22 // flip the doublewords to big-endian order
+	VPDI  $0x4, V23, V23, V23 // flip the doublewords to big-endian order
+	VPDI  $0x4, V24, V24, V24 // flip the doublewords to big-endian order
+	VSTM  V17, V24, 0(R7)     // 128-bytes into z
+	ADD   $128, R7
+	ADD   $128, R10           // i += 16
+	SUB   $16, R3             // n -= 16
+	BGE   UU1                 // if n >= 0 goto U1
+	VLGVG $1, V0, R4          // put cf into R4
+	SUB   $1, R4              // save cf
+
+A1:
+	ADD $12, R3 // n += 16
+	BLT v1      // if n < 0 goto v1
+
+U1:  // n >= 0
+	// regular loop body unrolled 4x
+	MOVD 0(R8)(R10*1), R5
+	MOVD 8(R8)(R10*1), R6
+	MOVD 16(R8)(R10*1), R7
+	MOVD 24(R8)(R10*1), R1
+	MOVD R0, R11
+	SUBC R4, R11            // restore CF
+	MOVD 0(R9)(R10*1), R11
+	SUBE R11, R5
+	MOVD 8(R9)(R10*1), R11
+	SUBE R11, R6
+	MOVD 16(R9)(R10*1), R11
+	SUBE R11, R7
+	MOVD 24(R9)(R10*1), R11
+	SUBE R11, R1
+	MOVD R0, R4
+	SUBE R4, R4             // save CF
+	MOVD R5, 0(R2)(R10*1)
+	MOVD R6, 8(R2)(R10*1)
+	MOVD R7, 16(R2)(R10*1)
+	MOVD R1, 24(R2)(R10*1)
+
+	ADD $32, R10 // i += 4
+	SUB $4, R3   // n -= 4
+	BGE U1       // if n >= 0 goto U1n
+
+v1:
+	ADD $4, R3 // n += 4
+	BLE E1     // if n <= 0 goto E1
+
+L1:  // n > 0
+	MOVD R0, R11
+	SUBC R4, R11           // restore CF
+	MOVD 0(R8)(R10*1), R5
+	MOVD 0(R9)(R10*1), R11
+	SUBE R11, R5
+	MOVD R5, 0(R2)(R10*1)
+	MOVD R0, R4
+	SUBE R4, R4            // save CF
+
+	ADD $8, R10 // i++
+	SUB $1, R3  // n--
+	BGT L1      // if n > 0 goto L1n
+
+E1:
+	NEG  R4, R4
+	MOVD R4, c+72(FP) // return c
+	RET
+
+// DI = R3, CX = R4, SI = r10, r8 = r8, r9=r9, r10 = r2, r11 = r5, r12 = r6, r13 = r7, r14 = r1 (R0 set to 0) + use R11
+// func subVV(z, x, y []Word) (c Word)
+// (same as addVV except for SUBC/SUBE instead of ADDC/ADDE and label names)
+TEXT ·subVV_novec(SB), NOSPLIT, $0
+	MOVD z_len+8(FP), R3
+	MOVD x+24(FP), R8
+	MOVD y+48(FP), R9
+	MOVD z+0(FP), R2
+
+	MOVD $0, R4  // c = 0
+	MOVD $0, R0  // make sure it's zero
+	MOVD $0, R10 // i = 0
+
+	// s/JL/JMP/ below to disable the unrolled loop
+	SUB $4, R3 // n -= 4
+	BLT v1     // if n < 0 goto v1
+
+U1:  // n >= 0
+	// regular loop body unrolled 4x
+	MOVD 0(R8)(R10*1), R5
+	MOVD 8(R8)(R10*1), R6
+	MOVD 16(R8)(R10*1), R7
+	MOVD 24(R8)(R10*1), R1
+	MOVD R0, R11
+	SUBC R4, R11            // restore CF
+	MOVD 0(R9)(R10*1), R11
+	SUBE R11, R5
+	MOVD 8(R9)(R10*1), R11
+	SUBE R11, R6
+	MOVD 16(R9)(R10*1), R11
+	SUBE R11, R7
+	MOVD 24(R9)(R10*1), R11
+	SUBE R11, R1
+	MOVD R0, R4
+	SUBE R4, R4             // save CF
+	MOVD R5, 0(R2)(R10*1)
+	MOVD R6, 8(R2)(R10*1)
+	MOVD R7, 16(R2)(R10*1)
+	MOVD R1, 24(R2)(R10*1)
+
+	ADD $32, R10 // i += 4
+	SUB $4, R3   // n -= 4
+	BGE U1       // if n >= 0 goto U1
+
+v1:
+	ADD $4, R3 // n += 4
+	BLE E1     // if n <= 0 goto E1
+
+L1:  // n > 0
+	MOVD R0, R11
+	SUBC R4, R11           // restore CF
+	MOVD 0(R8)(R10*1), R5
+	MOVD 0(R9)(R10*1), R11
+	SUBE R11, R5
+	MOVD R5, 0(R2)(R10*1)
+	MOVD R0, R4
+	SUBE R4, R4            // save CF
+
+	ADD $8, R10 // i++
+	SUB $1, R3  // n--
+	BGT L1      // if n > 0 goto L1
+
+E1:
+	NEG  R4, R4
+	MOVD R4, c+72(FP) // return c
+	RET
+
+TEXT ·addVW(SB), NOSPLIT, $0
+	MOVD z_len+8(FP), R5 // length of z
+	MOVD x+24(FP), R6
+	MOVD y+48(FP), R7    // c = y
+	MOVD z+0(FP), R8
+
+	CMPBEQ R5, $0, returnC // if len(z) == 0, we can have an early return
+
+	// Add the first two words, and determine which path (copy path or loop path) to take based on the carry flag.
+	ADDC   0(R6), R7
+	MOVD   R7, 0(R8)
+	CMPBEQ R5, $1, returnResult // len(z) == 1
+	MOVD   $0, R9
+	ADDE   8(R6), R9
+	MOVD   R9, 8(R8)
+	CMPBEQ R5, $2, returnResult // len(z) == 2
+
+	// Update the counters
+	MOVD $16, R12    // i = 2
+	MOVD $-2(R5), R5 // n = n - 2
+
+loopOverEachWord:
+	BRC  $12, copySetup // carry = 0, copy the rest
+	MOVD $1, R9
+
+	// Originally we used the carry flag generated in the previous iteration
+	// (i.e: ADDE could be used here to do the addition).  However, since we
+	// already know carry is 1 (otherwise we will go to copy section), we can use
+	// ADDC here so the current iteration does not depend on the carry flag
+	// generated in the previous iteration. This could be useful when branch prediction happens.
+	ADDC 0(R6)(R12*1), R9
+	MOVD R9, 0(R8)(R12*1) // z[i] = x[i] + c
+
+	MOVD  $8(R12), R12         // i++
+	BRCTG R5, loopOverEachWord // n--
+
+// Return the current carry value
+returnResult:
+	MOVD $0, R0
+	ADDE R0, R0
+	MOVD R0, c+56(FP)
+	RET
+
+// Update position of x(R6) and z(R8) based on the current counter value and perform copying.
+// With the assumption that x and z will not overlap with each other or x and z will
+// point to same memory region, we can use a faster version of copy using only MVC here.
+// In the following implementation, we have three copy loops, each copying a word, 4 words, and
+// 32 words at a time.  Via benchmarking, this implementation is faster than calling runtime·memmove.
+copySetup:
+	ADD R12, R6
+	ADD R12, R8
+
+	CMPBGE R5, $4, mediumLoop
+
+smallLoop:  // does a loop unrolling to copy word when n < 4
+	CMPBEQ R5, $0, returnZero
+	MVC    $8, 0(R6), 0(R8)
+	CMPBEQ R5, $1, returnZero
+	MVC    $8, 8(R6), 8(R8)
+	CMPBEQ R5, $2, returnZero
+	MVC    $8, 16(R6), 16(R8)
+
+returnZero:
+	MOVD $0, c+56(FP) // return 0 as carry
+	RET
+
+mediumLoop:
+	CMPBLT R5, $4, smallLoop
+	CMPBLT R5, $32, mediumLoopBody
+
+largeLoop:  // Copying 256 bytes at a time.
+	MVC    $256, 0(R6), 0(R8)
+	MOVD   $256(R6), R6
+	MOVD   $256(R8), R8
+	MOVD   $-32(R5), R5
+	CMPBGE R5, $32, largeLoop
+	BR     mediumLoop
+
+mediumLoopBody:  // Copying 32 bytes at a time
+	MVC    $32, 0(R6), 0(R8)
+	MOVD   $32(R6), R6
+	MOVD   $32(R8), R8
+	MOVD   $-4(R5), R5
+	CMPBGE R5, $4, mediumLoopBody
+	BR     smallLoop
+
+returnC:
+	MOVD R7, c+56(FP)
+	RET
+
+TEXT ·subVW(SB), NOSPLIT, $0
+	MOVD z_len+8(FP), R5
+	MOVD x+24(FP), R6
+	MOVD y+48(FP), R7    // The borrow bit passed in
+	MOVD z+0(FP), R8
+	MOVD $0, R0          // R0 is a temporary variable used during computation. Ensure it has zero in it.
+
+	CMPBEQ R5, $0, returnC // len(z) == 0, have an early return
+
+	// Subtract the first two words, and determine which path (copy path or loop path) to take based on the borrow flag
+	MOVD   0(R6), R9
+	SUBC   R7, R9
+	MOVD   R9, 0(R8)
+	CMPBEQ R5, $1, returnResult
+	MOVD   8(R6), R9
+	SUBE   R0, R9
+	MOVD   R9, 8(R8)
+	CMPBEQ R5, $2, returnResult
+
+	// Update the counters
+	MOVD $16, R12    // i = 2
+	MOVD $-2(R5), R5 // n = n - 2
+
+loopOverEachWord:
+	BRC  $3, copySetup    // no borrow, copy the rest
+	MOVD 0(R6)(R12*1), R9
+
+	// Originally we used the borrow flag generated in the previous iteration
+	// (i.e: SUBE could be used here to do the subtraction). However, since we
+	// already know borrow is 1 (otherwise we will go to copy section), we can
+	// use SUBC here so the current iteration does not depend on the borrow flag
+	// generated in the previous iteration. This could be useful when branch prediction happens.
+	SUBC $1, R9
+	MOVD R9, 0(R8)(R12*1) // z[i] = x[i] - 1
+
+	MOVD  $8(R12), R12         // i++
+	BRCTG R5, loopOverEachWord // n--
+
+// return the current borrow value
+returnResult:
+	SUBE R0, R0
+	NEG  R0, R0
+	MOVD R0, c+56(FP)
+	RET
+
+// Update position of x(R6) and z(R8) based on the current counter value and perform copying.
+// With the assumption that x and z will not overlap with each other or x and z will
+// point to same memory region, we can use a faster version of copy using only MVC here.
+// In the following implementation, we have three copy loops, each copying a word, 4 words, and
+// 32 words at a time. Via benchmarking, this implementation is faster than calling runtime·memmove.
+copySetup:
+	ADD R12, R6
+	ADD R12, R8
+
+	CMPBGE R5, $4, mediumLoop
+
+smallLoop:  // does a loop unrolling to copy word when n < 4
+	CMPBEQ R5, $0, returnZero
+	MVC    $8, 0(R6), 0(R8)
+	CMPBEQ R5, $1, returnZero
+	MVC    $8, 8(R6), 8(R8)
+	CMPBEQ R5, $2, returnZero
+	MVC    $8, 16(R6), 16(R8)
+
+returnZero:
+	MOVD $0, c+56(FP) // return 0 as borrow
+	RET
+
+mediumLoop:
+	CMPBLT R5, $4, smallLoop
+	CMPBLT R5, $32, mediumLoopBody
+
+largeLoop:  // Copying 256 bytes at a time
+	MVC    $256, 0(R6), 0(R8)
+	MOVD   $256(R6), R6
+	MOVD   $256(R8), R8
+	MOVD   $-32(R5), R5
+	CMPBGE R5, $32, largeLoop
+	BR     mediumLoop
+
+mediumLoopBody:  // Copying 32 bytes at a time
+	MVC    $32, 0(R6), 0(R8)
+	MOVD   $32(R6), R6
+	MOVD   $32(R8), R8
+	MOVD   $-4(R5), R5
+	CMPBGE R5, $4, mediumLoopBody
+	BR     smallLoop
+
+returnC:
+	MOVD R7, c+56(FP)
+	RET
+
+// func shlVU(z, x []Word, s uint) (c Word)
+TEXT ·shlVU(SB), NOSPLIT, $0
+	BR ·shlVU_g(SB)
+
+// func shrVU(z, x []Word, s uint) (c Word)
+TEXT ·shrVU(SB), NOSPLIT, $0
+	BR ·shrVU_g(SB)
+
+// CX = R4, r8 = r8, r9=r9, r10 = r2, r11 = r5, DX = r3, AX = r6, BX = R1, (R0 set to 0) + use R11 + use R7 for i
+// func mulAddVWW(z, x []Word, y, r Word) (c Word)
+TEXT ·mulAddVWW(SB), NOSPLIT, $0
+	MOVD z+0(FP), R2
+	MOVD x+24(FP), R8
+	MOVD y+48(FP), R9
+	MOVD r+56(FP), R4    // c = r
+	MOVD z_len+8(FP), R5
+	MOVD $0, R1          // i = 0
+	MOVD $0, R7          // i*8 = 0
+	MOVD $0, R0          // make sure it's zero
+	BR   E5
+
+L5:
+	MOVD   (R8)(R1*1), R6
+	MULHDU R9, R6
+	ADDC   R4, R11         // add to low order bits
+	ADDE   R0, R6
+	MOVD   R11, (R2)(R1*1)
+	MOVD   R6, R4
+	ADD    $8, R1          // i*8 + 8
+	ADD    $1, R7          // i++
+
+E5:
+	CMPBLT R7, R5, L5 // i < n
+
+	MOVD R4, c+64(FP)
+	RET
+
+// func addMulVVW(z, x []Word, y Word) (c Word)
+// CX = R4, r8 = r8, r9=r9, r10 = r2, r11 = r5, AX = r11, DX = R6, r12=r12, BX = R1, (R0 set to 0) + use R11 + use R7 for i
+TEXT ·addMulVVW(SB), NOSPLIT, $0
+	MOVD z+0(FP), R2
+	MOVD x+24(FP), R8
+	MOVD y+48(FP), R9
+	MOVD z_len+8(FP), R5
+
+	MOVD $0, R1 // i*8 = 0
+	MOVD $0, R7 // i = 0
+	MOVD $0, R0 // make sure it's zero
+	MOVD $0, R4 // c = 0
+
+	MOVD   R5, R12
+	AND    $-2, R12
+	CMPBGE R5, $2, A6
+	BR     E6
+
+A6:
+	MOVD   (R8)(R1*1), R6
+	MULHDU R9, R6
+	MOVD   (R2)(R1*1), R10
+	ADDC   R10, R11        // add to low order bits
+	ADDE   R0, R6
+	ADDC   R4, R11
+	ADDE   R0, R6
+	MOVD   R6, R4
+	MOVD   R11, (R2)(R1*1)
+
+	MOVD   (8)(R8)(R1*1), R6
+	MULHDU R9, R6
+	MOVD   (8)(R2)(R1*1), R10
+	ADDC   R10, R11           // add to low order bits
+	ADDE   R0, R6
+	ADDC   R4, R11
+	ADDE   R0, R6
+	MOVD   R6, R4
+	MOVD   R11, (8)(R2)(R1*1)
+
+	ADD $16, R1 // i*8 + 8
+	ADD $2, R7  // i++
+
+	CMPBLT R7, R12, A6
+	BR     E6
+
+L6:
+	MOVD   (R8)(R1*1), R6
+	MULHDU R9, R6
+	MOVD   (R2)(R1*1), R10
+	ADDC   R10, R11        // add to low order bits
+	ADDE   R0, R6
+	ADDC   R4, R11
+	ADDE   R0, R6
+	MOVD   R6, R4
+	MOVD   R11, (R2)(R1*1)
+
+	ADD $8, R1 // i*8 + 8
+	ADD $1, R7 // i++
+
+E6:
+	CMPBLT R7, R5, L6 // i < n
+
+	MOVD R4, c+56(FP)
+	RET
+
diff --git a/src/math/big/arith_s390x_test.go b/src/math/big/arith_s390x_test.go
new file mode 100644
index 0000000..8375ddb
--- /dev/null
+++ b/src/math/big/arith_s390x_test.go
@@ -0,0 +1,33 @@
+// Copyright 2016 The Go Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+//go:build s390x && !math_big_pure_go
+// +build s390x,!math_big_pure_go
+
+package big
+
+import (
+	"testing"
+)
+
+// Tests whether the non vector routines are working, even when the tests are run on a
+// vector-capable machine
+
+func TestFunVVnovec(t *testing.T) {
+	if hasVX == true {
+		for _, a := range sumVV {
+			arg := a
+			testFunVV(t, "addVV_novec", addVV_novec, arg)
+
+			arg = argVV{a.z, a.y, a.x, a.c}
+			testFunVV(t, "addVV_novec symmetric", addVV_novec, arg)
+
+			arg = argVV{a.x, a.z, a.y, a.c}
+			testFunVV(t, "subVV_novec", subVV_novec, arg)
+
+			arg = argVV{a.y, a.z, a.x, a.c}
+			testFunVV(t, "subVV_novec symmetric", subVV_novec, arg)
+		}
+	}
+}
diff --git a/src/math/big/arith_test.go b/src/math/big/arith_test.go
new file mode 100644
index 0000000..64225bb
--- /dev/null
+++ b/src/math/big/arith_test.go
@@ -0,0 +1,697 @@
+// Copyright 2009 The Go Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+package big
+
+import (
+	"fmt"
+	"internal/testenv"
+	"math/bits"
+	"math/rand"
+	"strings"
+	"testing"
+)
+
+var isRaceBuilder = strings.HasSuffix(testenv.Builder(), "-race")
+
+type funVV func(z, x, y []Word) (c Word)
+type argVV struct {
+	z, x, y nat
+	c       Word
+}
+
+var sumVV = []argVV{
+	{},
+	{nat{0}, nat{0}, nat{0}, 0},
+	{nat{1}, nat{1}, nat{0}, 0},
+	{nat{0}, nat{_M}, nat{1}, 1},
+	{nat{80235}, nat{12345}, nat{67890}, 0},
+	{nat{_M - 1}, nat{_M}, nat{_M}, 1},
+	{nat{0, 0, 0, 0}, nat{_M, _M, _M, _M}, nat{1, 0, 0, 0}, 1},
+	{nat{0, 0, 0, _M}, nat{_M, _M, _M, _M - 1}, nat{1, 0, 0, 0}, 0},
+	{nat{0, 0, 0, 0}, nat{_M, 0, _M, 0}, nat{1, _M, 0, _M}, 1},
+}
+
+func testFunVV(t *testing.T, msg string, f funVV, a argVV) {
+	z := make(nat, len(a.z))
+	c := f(z, a.x, a.y)
+	for i, zi := range z {
+		if zi != a.z[i] {
+			t.Errorf("%s%+v\n\tgot z[%d] = %#x; want %#x", msg, a, i, zi, a.z[i])
+			break
+		}
+	}
+	if c != a.c {
+		t.Errorf("%s%+v\n\tgot c = %#x; want %#x", msg, a, c, a.c)
+	}
+}
+
+func TestFunVV(t *testing.T) {
+	for _, a := range sumVV {
+		arg := a
+		testFunVV(t, "addVV_g", addVV_g, arg)
+		testFunVV(t, "addVV", addVV, arg)
+
+		arg = argVV{a.z, a.y, a.x, a.c}
+		testFunVV(t, "addVV_g symmetric", addVV_g, arg)
+		testFunVV(t, "addVV symmetric", addVV, arg)
+
+		arg = argVV{a.x, a.z, a.y, a.c}
+		testFunVV(t, "subVV_g", subVV_g, arg)
+		testFunVV(t, "subVV", subVV, arg)
+
+		arg = argVV{a.y, a.z, a.x, a.c}
+		testFunVV(t, "subVV_g symmetric", subVV_g, arg)
+		testFunVV(t, "subVV symmetric", subVV, arg)
+	}
+}
+
+// Always the same seed for reproducible results.
+var rnd = rand.New(rand.NewSource(0))
+
+func rndW() Word {
+	return Word(rnd.Int63()<<1 | rnd.Int63n(2))
+}
+
+func rndV(n int) []Word {
+	v := make([]Word, n)
+	for i := range v {
+		v[i] = rndW()
+	}
+	return v
+}
+
+var benchSizes = []int{1, 2, 3, 4, 5, 1e1, 1e2, 1e3, 1e4, 1e5}
+
+func BenchmarkAddVV(b *testing.B) {
+	for _, n := range benchSizes {
+		if isRaceBuilder && n > 1e3 {
+			continue
+		}
+		x := rndV(n)
+		y := rndV(n)
+		z := make([]Word, n)
+		b.Run(fmt.Sprint(n), func(b *testing.B) {
+			b.SetBytes(int64(n * _W))
+			for i := 0; i < b.N; i++ {
+				addVV(z, x, y)
+			}
+		})
+	}
+}
+
+func BenchmarkSubVV(b *testing.B) {
+	for _, n := range benchSizes {
+		if isRaceBuilder && n > 1e3 {
+			continue
+		}
+		x := rndV(n)
+		y := rndV(n)
+		z := make([]Word, n)
+		b.Run(fmt.Sprint(n), func(b *testing.B) {
+			b.SetBytes(int64(n * _W))
+			for i := 0; i < b.N; i++ {
+				subVV(z, x, y)
+			}
+		})
+	}
+}
+
+type funVW func(z, x []Word, y Word) (c Word)
+type argVW struct {
+	z, x nat
+	y    Word
+	c    Word
+}
+
+var sumVW = []argVW{
+	{},
+	{nil, nil, 2, 2},
+	{nat{0}, nat{0}, 0, 0},
+	{nat{1}, nat{0}, 1, 0},
+	{nat{1}, nat{1}, 0, 0},
+	{nat{0}, nat{_M}, 1, 1},
+	{nat{0, 0, 0, 0}, nat{_M, _M, _M, _M}, 1, 1},
+	{nat{585}, nat{314}, 271, 0},
+}
+
+var lshVW = []argVW{
+	{},
+	{nat{0}, nat{0}, 0, 0},
+	{nat{0}, nat{0}, 1, 0},
+	{nat{0}, nat{0}, 20, 0},
+
+	{nat{_M}, nat{_M}, 0, 0},
+	{nat{_M << 1 & _M}, nat{_M}, 1, 1},
+	{nat{_M << 20 & _M}, nat{_M}, 20, _M >> (_W - 20)},
+
+	{nat{_M, _M, _M}, nat{_M, _M, _M}, 0, 0},
+	{nat{_M << 1 & _M, _M, _M}, nat{_M, _M, _M}, 1, 1},
+	{nat{_M << 20 & _M, _M, _M}, nat{_M, _M, _M}, 20, _M >> (_W - 20)},
+}
+
+var rshVW = []argVW{
+	{},
+	{nat{0}, nat{0}, 0, 0},
+	{nat{0}, nat{0}, 1, 0},
+	{nat{0}, nat{0}, 20, 0},
+
+	{nat{_M}, nat{_M}, 0, 0},
+	{nat{_M >> 1}, nat{_M}, 1, _M << (_W - 1) & _M},
+	{nat{_M >> 20}, nat{_M}, 20, _M << (_W - 20) & _M},
+
+	{nat{_M, _M, _M}, nat{_M, _M, _M}, 0, 0},
+	{nat{_M, _M, _M >> 1}, nat{_M, _M, _M}, 1, _M << (_W - 1) & _M},
+	{nat{_M, _M, _M >> 20}, nat{_M, _M, _M}, 20, _M << (_W - 20) & _M},
+}
+
+func testFunVW(t *testing.T, msg string, f funVW, a argVW) {
+	z := make(nat, len(a.z))
+	c := f(z, a.x, a.y)
+	for i, zi := range z {
+		if zi != a.z[i] {
+			t.Errorf("%s%+v\n\tgot z[%d] = %#x; want %#x", msg, a, i, zi, a.z[i])
+			break
+		}
+	}
+	if c != a.c {
+		t.Errorf("%s%+v\n\tgot c = %#x; want %#x", msg, a, c, a.c)
+	}
+}
+
+func testFunVWext(t *testing.T, msg string, f funVW, f_g funVW, a argVW) {
+	// using the result of addVW_g/subVW_g as golden
+	z_g := make(nat, len(a.z))
+	c_g := f_g(z_g, a.x, a.y)
+	c := f(a.z, a.x, a.y)
+
+	for i, zi := range a.z {
+		if zi != z_g[i] {
+			t.Errorf("%s\n\tgot z[%d] = %#x; want %#x", msg, i, zi, z_g[i])
+			break
+		}
+	}
+	if c != c_g {
+		t.Errorf("%s\n\tgot c = %#x; want %#x", msg, c, c_g)
+	}
+}
+
+func makeFunVW(f func(z, x []Word, s uint) (c Word)) funVW {
+	return func(z, x []Word, s Word) (c Word) {
+		return f(z, x, uint(s))
+	}
+}
+
+func TestFunVW(t *testing.T) {
+	for _, a := range sumVW {
+		arg := a
+		testFunVW(t, "addVW_g", addVW_g, arg)
+		testFunVW(t, "addVW", addVW, arg)
+
+		arg = argVW{a.x, a.z, a.y, a.c}
+		testFunVW(t, "subVW_g", subVW_g, arg)
+		testFunVW(t, "subVW", subVW, arg)
+	}
+
+	shlVW_g := makeFunVW(shlVU_g)
+	shlVW := makeFunVW(shlVU)
+	for _, a := range lshVW {
+		arg := a
+		testFunVW(t, "shlVU_g", shlVW_g, arg)
+		testFunVW(t, "shlVU", shlVW, arg)
+	}
+
+	shrVW_g := makeFunVW(shrVU_g)
+	shrVW := makeFunVW(shrVU)
+	for _, a := range rshVW {
+		arg := a
+		testFunVW(t, "shrVU_g", shrVW_g, arg)
+		testFunVW(t, "shrVU", shrVW, arg)
+	}
+}
+
+// Construct a vector comprising the same word, usually '0' or 'maximum uint'
+func makeWordVec(e Word, n int) []Word {
+	v := make([]Word, n)
+	for i := range v {
+		v[i] = e
+	}
+	return v
+}
+
+// Extended testing to addVW and subVW using various kinds of input data.
+// We utilize the results of addVW_g and subVW_g as golden reference to check
+// correctness.
+func TestFunVWExt(t *testing.T) {
+	// 32 is the current threshold that triggers an optimized version of
+	// calculation for large-sized vector, ensure we have sizes around it tested.
+	var vwSizes = []int{0, 1, 3, 4, 5, 8, 9, 23, 31, 32, 33, 34, 35, 36, 50, 120}
+	for _, n := range vwSizes {
+		// vector of random numbers, using the result of addVW_g/subVW_g as golden
+		x := rndV(n)
+		y := rndW()
+		z := make(nat, n)
+		arg := argVW{z, x, y, 0}
+		testFunVWext(t, "addVW, random inputs", addVW, addVW_g, arg)
+		testFunVWext(t, "subVW, random inputs", subVW, subVW_g, arg)
+
+		// vector of random numbers, but make 'x' and 'z' share storage
+		arg = argVW{x, x, y, 0}
+		testFunVWext(t, "addVW, random inputs, sharing storage", addVW, addVW_g, arg)
+		testFunVWext(t, "subVW, random inputs, sharing storage", subVW, subVW_g, arg)
+
+		// vector of maximum uint, to force carry flag set in each 'add'
+		y = ^Word(0)
+		x = makeWordVec(y, n)
+		arg = argVW{z, x, y, 0}
+		testFunVWext(t, "addVW, vector of max uint", addVW, addVW_g, arg)
+
+		// vector of '0', to force carry flag set in each 'sub'
+		x = makeWordVec(0, n)
+		arg = argVW{z, x, 1, 0}
+		testFunVWext(t, "subVW, vector of zero", subVW, subVW_g, arg)
+	}
+}
+
+type argVU struct {
+	d  []Word // d is a Word slice, the input parameters x and z come from this array.
+	l  uint   // l is the length of the input parameters x and z.
+	xp uint   // xp is the starting position of the input parameter x, x := d[xp:xp+l].
+	zp uint   // zp is the starting position of the input parameter z, z := d[zp:zp+l].
+	s  uint   // s is the shift number.
+	r  []Word // r is the expected output result z.
+	c  Word   // c is the expected return value.
+	m  string // message.
+}
+
+var argshlVUIn = []Word{1, 2, 4, 8, 16, 32, 64, 0, 0, 0}
+var argshlVUr0 = []Word{1, 2, 4, 8, 16, 32, 64}
+var argshlVUr1 = []Word{2, 4, 8, 16, 32, 64, 128}
+var argshlVUrWm1 = []Word{1 << (_W - 1), 0, 1, 2, 4, 8, 16}
+
+var argshlVU = []argVU{
+	// test cases for shlVU
+	{[]Word{1, _M, _M, _M, _M, _M, 3 << (_W - 2), 0}, 7, 0, 0, 1, []Word{2, _M - 1, _M, _M, _M, _M, 1<<(_W-1) + 1}, 1, "complete overlap of shlVU"},
+	{[]Word{1, _M, _M, _M, _M, _M, 3 << (_W - 2), 0, 0, 0, 0}, 7, 0, 3, 1, []Word{2, _M - 1, _M, _M, _M, _M, 1<<(_W-1) + 1}, 1, "partial overlap by half of shlVU"},
+	{[]Word{1, _M, _M, _M, _M, _M, 3 << (_W - 2), 0, 0, 0, 0, 0, 0, 0}, 7, 0, 6, 1, []Word{2, _M - 1, _M, _M, _M, _M, 1<<(_W-1) + 1}, 1, "partial overlap by 1 Word of shlVU"},
+	{[]Word{1, _M, _M, _M, _M, _M, 3 << (_W - 2), 0, 0, 0, 0, 0, 0, 0, 0}, 7, 0, 7, 1, []Word{2, _M - 1, _M, _M, _M, _M, 1<<(_W-1) + 1}, 1, "no overlap of shlVU"},
+	// additional test cases with shift values of 0, 1 and (_W-1)
+	{argshlVUIn, 7, 0, 0, 0, argshlVUr0, 0, "complete overlap of shlVU and shift of 0"},
+	{argshlVUIn, 7, 0, 0, 1, argshlVUr1, 0, "complete overlap of shlVU and shift of 1"},
+	{argshlVUIn, 7, 0, 0, _W - 1, argshlVUrWm1, 32, "complete overlap of shlVU and shift of _W - 1"},
+	{argshlVUIn, 7, 0, 1, 0, argshlVUr0, 0, "partial overlap by 6 Words of shlVU and shift of 0"},
+	{argshlVUIn, 7, 0, 1, 1, argshlVUr1, 0, "partial overlap by 6 Words of shlVU and shift of 1"},
+	{argshlVUIn, 7, 0, 1, _W - 1, argshlVUrWm1, 32, "partial overlap by 6 Words of shlVU and shift of _W - 1"},
+	{argshlVUIn, 7, 0, 2, 0, argshlVUr0, 0, "partial overlap by 5 Words of shlVU and shift of 0"},
+	{argshlVUIn, 7, 0, 2, 1, argshlVUr1, 0, "partial overlap by 5 Words of shlVU and shift of 1"},
+	{argshlVUIn, 7, 0, 2, _W - 1, argshlVUrWm1, 32, "partial overlap by 5 Words of shlVU abd shift of _W - 1"},
+	{argshlVUIn, 7, 0, 3, 0, argshlVUr0, 0, "partial overlap by 4 Words of shlVU and shift of 0"},
+	{argshlVUIn, 7, 0, 3, 1, argshlVUr1, 0, "partial overlap by 4 Words of shlVU and shift of 1"},
+	{argshlVUIn, 7, 0, 3, _W - 1, argshlVUrWm1, 32, "partial overlap by 4 Words of shlVU and shift of _W - 1"},
+}
+
+var argshrVUIn = []Word{0, 0, 0, 1, 2, 4, 8, 16, 32, 64}
+var argshrVUr0 = []Word{1, 2, 4, 8, 16, 32, 64}
+var argshrVUr1 = []Word{0, 1, 2, 4, 8, 16, 32}
+var argshrVUrWm1 = []Word{4, 8, 16, 32, 64, 128, 0}
+
+var argshrVU = []argVU{
+	// test cases for shrVU
+	{[]Word{0, 3, _M, _M, _M, _M, _M, 1 << (_W - 1)}, 7, 1, 1, 1, []Word{1<<(_W-1) + 1, _M, _M, _M, _M, _M >> 1, 1 << (_W - 2)}, 1 << (_W - 1), "complete overlap of shrVU"},
+	{[]Word{0, 0, 0, 0, 3, _M, _M, _M, _M, _M, 1 << (_W - 1)}, 7, 4, 1, 1, []Word{1<<(_W-1) + 1, _M, _M, _M, _M, _M >> 1, 1 << (_W - 2)}, 1 << (_W - 1), "partial overlap by half of shrVU"},
+	{[]Word{0, 0, 0, 0, 0, 0, 0, 3, _M, _M, _M, _M, _M, 1 << (_W - 1)}, 7, 7, 1, 1, []Word{1<<(_W-1) + 1, _M, _M, _M, _M, _M >> 1, 1 << (_W - 2)}, 1 << (_W - 1), "partial overlap by 1 Word of shrVU"},
+	{[]Word{0, 0, 0, 0, 0, 0, 0, 0, 3, _M, _M, _M, _M, _M, 1 << (_W - 1)}, 7, 8, 1, 1, []Word{1<<(_W-1) + 1, _M, _M, _M, _M, _M >> 1, 1 << (_W - 2)}, 1 << (_W - 1), "no overlap of shrVU"},
+	// additional test cases with shift values of 0, 1 and (_W-1)
+	{argshrVUIn, 7, 3, 3, 0, argshrVUr0, 0, "complete overlap of shrVU and shift of 0"},
+	{argshrVUIn, 7, 3, 3, 1, argshrVUr1, 1 << (_W - 1), "complete overlap of shrVU and shift of 1"},
+	{argshrVUIn, 7, 3, 3, _W - 1, argshrVUrWm1, 2, "complete overlap of shrVU and shift of _W - 1"},
+	{argshrVUIn, 7, 3, 2, 0, argshrVUr0, 0, "partial overlap by 6 Words of shrVU and shift of 0"},
+	{argshrVUIn, 7, 3, 2, 1, argshrVUr1, 1 << (_W - 1), "partial overlap by 6 Words of shrVU and shift of 1"},
+	{argshrVUIn, 7, 3, 2, _W - 1, argshrVUrWm1, 2, "partial overlap by 6 Words of shrVU and shift of _W - 1"},
+	{argshrVUIn, 7, 3, 1, 0, argshrVUr0, 0, "partial overlap by 5 Words of shrVU and shift of 0"},
+	{argshrVUIn, 7, 3, 1, 1, argshrVUr1, 1 << (_W - 1), "partial overlap by 5 Words of shrVU and shift of 1"},
+	{argshrVUIn, 7, 3, 1, _W - 1, argshrVUrWm1, 2, "partial overlap by 5 Words of shrVU and shift of _W - 1"},
+	{argshrVUIn, 7, 3, 0, 0, argshrVUr0, 0, "partial overlap by 4 Words of shrVU and shift of 0"},
+	{argshrVUIn, 7, 3, 0, 1, argshrVUr1, 1 << (_W - 1), "partial overlap by 4 Words of shrVU and shift of 1"},
+	{argshrVUIn, 7, 3, 0, _W - 1, argshrVUrWm1, 2, "partial overlap by 4 Words of shrVU and shift of _W - 1"},
+}
+
+func testShiftFunc(t *testing.T, f func(z, x []Word, s uint) Word, a argVU) {
+	// work on copy of a.d to preserve the original data.
+	b := make([]Word, len(a.d))
+	copy(b, a.d)
+	z := b[a.zp : a.zp+a.l]
+	x := b[a.xp : a.xp+a.l]
+	c := f(z, x, a.s)
+	for i, zi := range z {
+		if zi != a.r[i] {
+			t.Errorf("d := %v, %s(d[%d:%d], d[%d:%d], %d)\n\tgot z[%d] = %#x; want %#x", a.d, a.m, a.zp, a.zp+a.l, a.xp, a.xp+a.l, a.s, i, zi, a.r[i])
+			break
+		}
+	}
+	if c != a.c {
+		t.Errorf("d := %v, %s(d[%d:%d], d[%d:%d], %d)\n\tgot c = %#x; want %#x", a.d, a.m, a.zp, a.zp+a.l, a.xp, a.xp+a.l, a.s, c, a.c)
+	}
+}
+
+func TestShiftOverlap(t *testing.T) {
+	for _, a := range argshlVU {
+		arg := a
+		testShiftFunc(t, shlVU, arg)
+	}
+
+	for _, a := range argshrVU {
+		arg := a
+		testShiftFunc(t, shrVU, arg)
+	}
+}
+
+func TestIssue31084(t *testing.T) {
+	// compute 10^n via 5^n << n.
+	const n = 165
+	p := nat(nil).expNN(nat{5}, nat{n}, nil, false)
+	p = p.shl(p, n)
+	got := string(p.utoa(10))
+	want := "1" + strings.Repeat("0", n)
+	if got != want {
+		t.Errorf("shl(%v, %v)\n\tgot  %s\n\twant %s", p, n, got, want)
+	}
+}
+
+const issue42838Value = "159309191113245227702888039776771180559110455519261878607388585338616290151305816094308987472018268594098344692611135542392730712890625"
+
+func TestIssue42838(t *testing.T) {
+	const s = 192
+	z, _, _, _ := nat(nil).scan(strings.NewReader(issue42838Value), 0, false)
+	z = z.shl(z, s)
+	got := string(z.utoa(10))
+	want := "1" + strings.Repeat("0", s)
+	if got != want {
+		t.Errorf("shl(%v, %v)\n\tgot  %s\n\twant %s", z, s, got, want)
+	}
+}
+
+func BenchmarkAddVW(b *testing.B) {
+	for _, n := range benchSizes {
+		if isRaceBuilder && n > 1e3 {
+			continue
+		}
+		x := rndV(n)
+		y := rndW()
+		z := make([]Word, n)
+		b.Run(fmt.Sprint(n), func(b *testing.B) {
+			b.SetBytes(int64(n * _S))
+			for i := 0; i < b.N; i++ {
+				addVW(z, x, y)
+			}
+		})
+	}
+}
+
+// Benchmarking addVW using vector of maximum uint to force carry flag set
+func BenchmarkAddVWext(b *testing.B) {
+	for _, n := range benchSizes {
+		if isRaceBuilder && n > 1e3 {
+			continue
+		}
+		y := ^Word(0)
+		x := makeWordVec(y, n)
+		z := make([]Word, n)
+		b.Run(fmt.Sprint(n), func(b *testing.B) {
+			b.SetBytes(int64(n * _S))
+			for i := 0; i < b.N; i++ {
+				addVW(z, x, y)
+			}
+		})
+	}
+}
+
+func BenchmarkSubVW(b *testing.B) {
+	for _, n := range benchSizes {
+		if isRaceBuilder && n > 1e3 {
+			continue
+		}
+		x := rndV(n)
+		y := rndW()
+		z := make([]Word, n)
+		b.Run(fmt.Sprint(n), func(b *testing.B) {
+			b.SetBytes(int64(n * _S))
+			for i := 0; i < b.N; i++ {
+				subVW(z, x, y)
+			}
+		})
+	}
+}
+
+// Benchmarking subVW using vector of zero to force carry flag set
+func BenchmarkSubVWext(b *testing.B) {
+	for _, n := range benchSizes {
+		if isRaceBuilder && n > 1e3 {
+			continue
+		}
+		x := makeWordVec(0, n)
+		y := Word(1)
+		z := make([]Word, n)
+		b.Run(fmt.Sprint(n), func(b *testing.B) {
+			b.SetBytes(int64(n * _S))
+			for i := 0; i < b.N; i++ {
+				subVW(z, x, y)
+			}
+		})
+	}
+}
+
+type funVWW func(z, x []Word, y, r Word) (c Word)
+type argVWW struct {
+	z, x nat
+	y, r Word
+	c    Word
+}
+
+var prodVWW = []argVWW{
+	{},
+	{nat{0}, nat{0}, 0, 0, 0},
+	{nat{991}, nat{0}, 0, 991, 0},
+	{nat{0}, nat{_M}, 0, 0, 0},
+	{nat{991}, nat{_M}, 0, 991, 0},
+	{nat{0}, nat{0}, _M, 0, 0},
+	{nat{991}, nat{0}, _M, 991, 0},
+	{nat{1}, nat{1}, 1, 0, 0},
+	{nat{992}, nat{1}, 1, 991, 0},
+	{nat{22793}, nat{991}, 23, 0, 0},
+	{nat{22800}, nat{991}, 23, 7, 0},
+	{nat{0, 0, 0, 22793}, nat{0, 0, 0, 991}, 23, 0, 0},
+	{nat{7, 0, 0, 22793}, nat{0, 0, 0, 991}, 23, 7, 0},
+	{nat{0, 0, 0, 0}, nat{7893475, 7395495, 798547395, 68943}, 0, 0, 0},
+	{nat{991, 0, 0, 0}, nat{7893475, 7395495, 798547395, 68943}, 0, 991, 0},
+	{nat{0, 0, 0, 0}, nat{0, 0, 0, 0}, 894375984, 0, 0},
+	{nat{991, 0, 0, 0}, nat{0, 0, 0, 0}, 894375984, 991, 0},
+	{nat{_M << 1 & _M}, nat{_M}, 1 << 1, 0, _M >> (_W - 1)},
+	{nat{_M<<1&_M + 1}, nat{_M}, 1 << 1, 1, _M >> (_W - 1)},
+	{nat{_M << 7 & _M}, nat{_M}, 1 << 7, 0, _M >> (_W - 7)},
+	{nat{_M<<7&_M + 1<<6}, nat{_M}, 1 << 7, 1 << 6, _M >> (_W - 7)},
+	{nat{_M << 7 & _M, _M, _M, _M}, nat{_M, _M, _M, _M}, 1 << 7, 0, _M >> (_W - 7)},
+	{nat{_M<<7&_M + 1<<6, _M, _M, _M}, nat{_M, _M, _M, _M}, 1 << 7, 1 << 6, _M >> (_W - 7)},
+}
+
+func testFunVWW(t *testing.T, msg string, f funVWW, a argVWW) {
+	z := make(nat, len(a.z))
+	c := f(z, a.x, a.y, a.r)
+	for i, zi := range z {
+		if zi != a.z[i] {
+			t.Errorf("%s%+v\n\tgot z[%d] = %#x; want %#x", msg, a, i, zi, a.z[i])
+			break
+		}
+	}
+	if c != a.c {
+		t.Errorf("%s%+v\n\tgot c = %#x; want %#x", msg, a, c, a.c)
+	}
+}
+
+// TODO(gri) mulAddVWW and divWVW are symmetric operations but
+// their signature is not symmetric. Try to unify.
+
+type funWVW func(z []Word, xn Word, x []Word, y Word) (r Word)
+type argWVW struct {
+	z  nat
+	xn Word
+	x  nat
+	y  Word
+	r  Word
+}
+
+func testFunWVW(t *testing.T, msg string, f funWVW, a argWVW) {
+	z := make(nat, len(a.z))
+	r := f(z, a.xn, a.x, a.y)
+	for i, zi := range z {
+		if zi != a.z[i] {
+			t.Errorf("%s%+v\n\tgot z[%d] = %#x; want %#x", msg, a, i, zi, a.z[i])
+			break
+		}
+	}
+	if r != a.r {
+		t.Errorf("%s%+v\n\tgot r = %#x; want %#x", msg, a, r, a.r)
+	}
+}
+
+func TestFunVWW(t *testing.T) {
+	for _, a := range prodVWW {
+		arg := a
+		testFunVWW(t, "mulAddVWW_g", mulAddVWW_g, arg)
+		testFunVWW(t, "mulAddVWW", mulAddVWW, arg)
+
+		if a.y != 0 && a.r < a.y {
+			arg := argWVW{a.x, a.c, a.z, a.y, a.r}
+			testFunWVW(t, "divWVW", divWVW, arg)
+		}
+	}
+}
+
+var mulWWTests = []struct {
+	x, y Word
+	q, r Word
+}{
+	{_M, _M, _M - 1, 1},
+	// 32 bit only: {0xc47dfa8c, 50911, 0x98a4, 0x998587f4},
+}
+
+func TestMulWW(t *testing.T) {
+	for i, test := range mulWWTests {
+		q, r := mulWW(test.x, test.y)
+		if q != test.q || r != test.r {
+			t.Errorf("#%d got (%x, %x) want (%x, %x)", i, q, r, test.q, test.r)
+		}
+	}
+}
+
+var mulAddWWWTests = []struct {
+	x, y, c Word
+	q, r    Word
+}{
+	// TODO(agl): These will only work on 64-bit platforms.
+	// {15064310297182388543, 0xe7df04d2d35d5d80, 13537600649892366549, 13644450054494335067, 10832252001440893781},
+	// {15064310297182388543, 0xdab2f18048baa68d, 13644450054494335067, 12869334219691522700, 14233854684711418382},
+	{_M, _M, 0, _M - 1, 1},
+	{_M, _M, _M, _M, 0},
+}
+
+func TestMulAddWWW(t *testing.T) {
+	for i, test := range mulAddWWWTests {
+		q, r := mulAddWWW_g(test.x, test.y, test.c)
+		if q != test.q || r != test.r {
+			t.Errorf("#%d got (%x, %x) want (%x, %x)", i, q, r, test.q, test.r)
+		}
+	}
+}
+
+var divWWTests = []struct {
+	x1, x0, y Word
+	q, r      Word
+}{
+	{_M >> 1, 0, _M, _M >> 1, _M >> 1},
+	{_M - (1 << (_W - 2)), _M, 3 << (_W - 2), _M, _M - (1 << (_W - 2))},
+}
+
+const testsNumber = 1 << 16
+
+func TestDivWW(t *testing.T) {
+	i := 0
+	for i, test := range divWWTests {
+		rec := reciprocalWord(test.y)
+		q, r := divWW(test.x1, test.x0, test.y, rec)
+		if q != test.q || r != test.r {
+			t.Errorf("#%d got (%x, %x) want (%x, %x)", i, q, r, test.q, test.r)
+		}
+	}
+	//random tests
+	for ; i < testsNumber; i++ {
+		x1 := rndW()
+		x0 := rndW()
+		y := rndW()
+		if x1 >= y {
+			continue
+		}
+		rec := reciprocalWord(y)
+		qGot, rGot := divWW(x1, x0, y, rec)
+		qWant, rWant := bits.Div(uint(x1), uint(x0), uint(y))
+		if uint(qGot) != qWant || uint(rGot) != rWant {
+			t.Errorf("#%d got (%x, %x) want (%x, %x)", i, qGot, rGot, qWant, rWant)
+		}
+	}
+}
+
+func BenchmarkMulAddVWW(b *testing.B) {
+	for _, n := range benchSizes {
+		if isRaceBuilder && n > 1e3 {
+			continue
+		}
+		z := make([]Word, n+1)
+		x := rndV(n)
+		y := rndW()
+		r := rndW()
+		b.Run(fmt.Sprint(n), func(b *testing.B) {
+			b.SetBytes(int64(n * _W))
+			for i := 0; i < b.N; i++ {
+				mulAddVWW(z, x, y, r)
+			}
+		})
+	}
+}
+
+func BenchmarkAddMulVVW(b *testing.B) {
+	for _, n := range benchSizes {
+		if isRaceBuilder && n > 1e3 {
+			continue
+		}
+		x := rndV(n)
+		y := rndW()
+		z := make([]Word, n)
+		b.Run(fmt.Sprint(n), func(b *testing.B) {
+			b.SetBytes(int64(n * _W))
+			for i := 0; i < b.N; i++ {
+				addMulVVW(z, x, y)
+			}
+		})
+	}
+}
+func BenchmarkDivWVW(b *testing.B) {
+	for _, n := range benchSizes {
+		if isRaceBuilder && n > 1e3 {
+			continue
+		}
+		x := rndV(n)
+		y := rndW()
+		z := make([]Word, n)
+		b.Run(fmt.Sprint(n), func(b *testing.B) {
+			b.SetBytes(int64(n * _W))
+			for i := 0; i < b.N; i++ {
+				divWVW(z, 0, x, y)
+			}
+		})
+	}
+}
+
+func BenchmarkNonZeroShifts(b *testing.B) {
+	for _, n := range benchSizes {
+		if isRaceBuilder && n > 1e3 {
+			continue
+		}
+		x := rndV(n)
+		s := uint(rand.Int63n(_W-2)) + 1 // avoid 0 and over-large shifts
+		z := make([]Word, n)
+		b.Run(fmt.Sprint(n), func(b *testing.B) {
+			b.SetBytes(int64(n * _W))
+			b.Run("shrVU", func(b *testing.B) {
+				for i := 0; i < b.N; i++ {
+					_ = shrVU(z, x, s)
+				}
+			})
+			b.Run("shlVU", func(b *testing.B) {
+				for i := 0; i < b.N; i++ {
+					_ = shlVU(z, x, s)
+				}
+			})
+		})
+	}
+}
diff --git a/src/math/big/arith_wasm.s b/src/math/big/arith_wasm.s
new file mode 100644
index 0000000..93eb16d
--- /dev/null
+++ b/src/math/big/arith_wasm.s
@@ -0,0 +1,33 @@
+// Copyright 2018 The Go Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+//go:build !math_big_pure_go
+// +build !math_big_pure_go
+
+#include "textflag.h"
+
+TEXT ·addVV(SB),NOSPLIT,$0
+	JMP ·addVV_g(SB)
+
+TEXT ·subVV(SB),NOSPLIT,$0
+	JMP ·subVV_g(SB)
+
+TEXT ·addVW(SB),NOSPLIT,$0
+	JMP ·addVW_g(SB)
+
+TEXT ·subVW(SB),NOSPLIT,$0
+	JMP ·subVW_g(SB)
+
+TEXT ·shlVU(SB),NOSPLIT,$0
+	JMP ·shlVU_g(SB)
+
+TEXT ·shrVU(SB),NOSPLIT,$0
+	JMP ·shrVU_g(SB)
+
+TEXT ·mulAddVWW(SB),NOSPLIT,$0
+	JMP ·mulAddVWW_g(SB)
+
+TEXT ·addMulVVW(SB),NOSPLIT,$0
+	JMP ·addMulVVW_g(SB)
+
diff --git a/src/math/big/bits_test.go b/src/math/big/bits_test.go
new file mode 100644
index 0000000..985b60b
--- /dev/null
+++ b/src/math/big/bits_test.go
@@ -0,0 +1,224 @@
+// Copyright 2015 The Go Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+// This file implements the Bits type used for testing Float operations
+// via an independent (albeit slower) representations for floating-point
+// numbers.
+
+package big
+
+import (
+	"fmt"
+	"sort"
+	"testing"
+)
+
+// A Bits value b represents a finite floating-point number x of the form
+//
+//	x = 2**b[0] + 2**b[1] + ... 2**b[len(b)-1]
+//
+// The order of slice elements is not significant. Negative elements may be
+// used to form fractions. A Bits value is normalized if each b[i] occurs at
+// most once. For instance Bits{0, 0, 1} is not normalized but represents the
+// same floating-point number as Bits{2}, which is normalized. The zero (nil)
+// value of Bits is a ready to use Bits value and represents the value 0.
+type Bits []int
+
+func (x Bits) add(y Bits) Bits {
+	return append(x, y...)
+}
+
+func (x Bits) mul(y Bits) Bits {
+	var p Bits
+	for _, x := range x {
+		for _, y := range y {
+			p = append(p, x+y)
+		}
+	}
+	return p
+}
+
+func TestMulBits(t *testing.T) {
+	for _, test := range []struct {
+		x, y, want Bits
+	}{
+		{nil, nil, nil},
+		{Bits{}, Bits{}, nil},
+		{Bits{0}, Bits{0}, Bits{0}},
+		{Bits{0}, Bits{1}, Bits{1}},
+		{Bits{1}, Bits{1, 2, 3}, Bits{2, 3, 4}},
+		{Bits{-1}, Bits{1}, Bits{0}},
+		{Bits{-10, -1, 0, 1, 10}, Bits{1, 2, 3}, Bits{-9, -8, -7, 0, 1, 2, 1, 2, 3, 2, 3, 4, 11, 12, 13}},
+	} {
+		got := fmt.Sprintf("%v", test.x.mul(test.y))
+		want := fmt.Sprintf("%v", test.want)
+		if got != want {
+			t.Errorf("%v * %v = %s; want %s", test.x, test.y, got, want)
+		}
+
+	}
+}
+
+// norm returns the normalized bits for x: It removes multiple equal entries
+// by treating them as an addition (e.g., Bits{5, 5} => Bits{6}), and it sorts
+// the result list for reproducible results.
+func (x Bits) norm() Bits {
+	m := make(map[int]bool)
+	for _, b := range x {
+		for m[b] {
+			m[b] = false
+			b++
+		}
+		m[b] = true
+	}
+	var z Bits
+	for b, set := range m {
+		if set {
+			z = append(z, b)
+		}
+	}
+	sort.Ints([]int(z))
+	return z
+}
+
+func TestNormBits(t *testing.T) {
+	for _, test := range []struct {
+		x, want Bits
+	}{
+		{nil, nil},
+		{Bits{}, Bits{}},
+		{Bits{0}, Bits{0}},
+		{Bits{0, 0}, Bits{1}},
+		{Bits{3, 1, 1}, Bits{2, 3}},
+		{Bits{10, 9, 8, 7, 6, 6}, Bits{11}},
+	} {
+		got := fmt.Sprintf("%v", test.x.norm())
+		want := fmt.Sprintf("%v", test.want)
+		if got != want {
+			t.Errorf("normBits(%v) = %s; want %s", test.x, got, want)
+		}
+
+	}
+}
+
+// round returns the Float value corresponding to x after rounding x
+// to prec bits according to mode.
+func (x Bits) round(prec uint, mode RoundingMode) *Float {
+	x = x.norm()
+
+	// determine range
+	var min, max int
+	for i, b := range x {
+		if i == 0 || b < min {
+			min = b
+		}
+		if i == 0 || b > max {
+			max = b
+		}
+	}
+	prec0 := uint(max + 1 - min)
+	if prec >= prec0 {
+		return x.Float()
+	}
+	// prec < prec0
+
+	// determine bit 0, rounding, and sticky bit, and result bits z
+	var bit0, rbit, sbit uint
+	var z Bits
+	r := max - int(prec)
+	for _, b := range x {
+		switch {
+		case b == r:
+			rbit = 1
+		case b < r:
+			sbit = 1
+		default:
+			// b > r
+			if b == r+1 {
+				bit0 = 1
+			}
+			z = append(z, b)
+		}
+	}
+
+	// round
+	f := z.Float() // rounded to zero
+	if mode == ToNearestAway {
+		panic("not yet implemented")
+	}
+	if mode == ToNearestEven && rbit == 1 && (sbit == 1 || sbit == 0 && bit0 != 0) || mode == AwayFromZero {
+		// round away from zero
+		f.SetMode(ToZero).SetPrec(prec)
+		f.Add(f, Bits{int(r) + 1}.Float())
+	}
+	return f
+}
+
+// Float returns the *Float z of the smallest possible precision such that
+// z = sum(2**bits[i]), with i = range bits. If multiple bits[i] are equal,
+// they are added: Bits{0, 1, 0}.Float() == 2**0 + 2**1 + 2**0 = 4.
+func (bits Bits) Float() *Float {
+	// handle 0
+	if len(bits) == 0 {
+		return new(Float)
+	}
+	// len(bits) > 0
+
+	// determine lsb exponent
+	var min int
+	for i, b := range bits {
+		if i == 0 || b < min {
+			min = b
+		}
+	}
+
+	// create bit pattern
+	x := NewInt(0)
+	for _, b := range bits {
+		badj := b - min
+		// propagate carry if necessary
+		for x.Bit(badj) != 0 {
+			x.SetBit(x, badj, 0)
+			badj++
+		}
+		x.SetBit(x, badj, 1)
+	}
+
+	// create corresponding float
+	z := new(Float).SetInt(x) // normalized
+	if e := int64(z.exp) + int64(min); MinExp <= e && e <= MaxExp {
+		z.exp = int32(e)
+	} else {
+		// this should never happen for our test cases
+		panic("exponent out of range")
+	}
+	return z
+}
+
+func TestFromBits(t *testing.T) {
+	for _, test := range []struct {
+		bits Bits
+		want string
+	}{
+		// all different bit numbers
+		{nil, "0"},
+		{Bits{0}, "0x.8p+1"},
+		{Bits{1}, "0x.8p+2"},
+		{Bits{-1}, "0x.8p+0"},
+		{Bits{63}, "0x.8p+64"},
+		{Bits{33, -30}, "0x.8000000000000001p+34"},
+		{Bits{255, 0}, "0x.8000000000000000000000000000000000000000000000000000000000000001p+256"},
+
+		// multiple equal bit numbers
+		{Bits{0, 0}, "0x.8p+2"},
+		{Bits{0, 0, 0, 0}, "0x.8p+3"},
+		{Bits{0, 1, 0}, "0x.8p+3"},
+		{append(Bits{2, 1, 0} /* 7 */, Bits{3, 1} /* 10 */ ...), "0x.88p+5" /* 17 */},
+	} {
+		f := test.bits.Float()
+		if got := f.Text('p', 0); got != test.want {
+			t.Errorf("setBits(%v) = %s; want %s", test.bits, got, test.want)
+		}
+	}
+}
diff --git a/src/math/big/calibrate_test.go b/src/math/big/calibrate_test.go
new file mode 100644
index 0000000..4fa663f
--- /dev/null
+++ b/src/math/big/calibrate_test.go
@@ -0,0 +1,173 @@
+// Copyright 2009 The Go Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+// Calibration used to determine thresholds for using
+// different algorithms.  Ideally, this would be converted
+// to go generate to create thresholds.go
+
+// This file prints execution times for the Mul benchmark
+// given different Karatsuba thresholds. The result may be
+// used to manually fine-tune the threshold constant. The
+// results are somewhat fragile; use repeated runs to get
+// a clear picture.
+
+// Calculates lower and upper thresholds for when basicSqr
+// is faster than standard multiplication.
+
+// Usage: go test -run=TestCalibrate -v -calibrate
+
+package big
+
+import (
+	"flag"
+	"fmt"
+	"testing"
+	"time"
+)
+
+var calibrate = flag.Bool("calibrate", false, "run calibration test")
+
+const (
+	sqrModeMul       = "mul(x, x)"
+	sqrModeBasic     = "basicSqr(x)"
+	sqrModeKaratsuba = "karatsubaSqr(x)"
+)
+
+func TestCalibrate(t *testing.T) {
+	if !*calibrate {
+		return
+	}
+
+	computeKaratsubaThresholds()
+
+	// compute basicSqrThreshold where overhead becomes negligible
+	minSqr := computeSqrThreshold(10, 30, 1, 3, sqrModeMul, sqrModeBasic)
+	// compute karatsubaSqrThreshold where karatsuba is faster
+	maxSqr := computeSqrThreshold(200, 500, 10, 3, sqrModeBasic, sqrModeKaratsuba)
+	if minSqr != 0 {
+		fmt.Printf("found basicSqrThreshold = %d\n", minSqr)
+	} else {
+		fmt.Println("no basicSqrThreshold found")
+	}
+	if maxSqr != 0 {
+		fmt.Printf("found karatsubaSqrThreshold = %d\n", maxSqr)
+	} else {
+		fmt.Println("no karatsubaSqrThreshold found")
+	}
+}
+
+func karatsubaLoad(b *testing.B) {
+	BenchmarkMul(b)
+}
+
+// measureKaratsuba returns the time to run a Karatsuba-relevant benchmark
+// given Karatsuba threshold th.
+func measureKaratsuba(th int) time.Duration {
+	th, karatsubaThreshold = karatsubaThreshold, th
+	res := testing.Benchmark(karatsubaLoad)
+	karatsubaThreshold = th
+	return time.Duration(res.NsPerOp())
+}
+
+func computeKaratsubaThresholds() {
+	fmt.Printf("Multiplication times for varying Karatsuba thresholds\n")
+	fmt.Printf("(run repeatedly for good results)\n")
+
+	// determine Tk, the work load execution time using basic multiplication
+	Tb := measureKaratsuba(1e9) // th == 1e9 => Karatsuba multiplication disabled
+	fmt.Printf("Tb = %10s\n", Tb)
+
+	// thresholds
+	th := 4
+	th1 := -1
+	th2 := -1
+
+	var deltaOld time.Duration
+	for count := -1; count != 0 && th < 128; count-- {
+		// determine Tk, the work load execution time using Karatsuba multiplication
+		Tk := measureKaratsuba(th)
+
+		// improvement over Tb
+		delta := (Tb - Tk) * 100 / Tb
+
+		fmt.Printf("th = %3d  Tk = %10s  %4d%%", th, Tk, delta)
+
+		// determine break-even point
+		if Tk < Tb && th1 < 0 {
+			th1 = th
+			fmt.Print("  break-even point")
+		}
+
+		// determine diminishing return
+		if 0 < delta && delta < deltaOld && th2 < 0 {
+			th2 = th
+			fmt.Print("  diminishing return")
+		}
+		deltaOld = delta
+
+		fmt.Println()
+
+		// trigger counter
+		if th1 >= 0 && th2 >= 0 && count < 0 {
+			count = 10 // this many extra measurements after we got both thresholds
+		}
+
+		th++
+	}
+}
+
+func measureSqr(words, nruns int, mode string) time.Duration {
+	// more runs for better statistics
+	initBasicSqr, initKaratsubaSqr := basicSqrThreshold, karatsubaSqrThreshold
+
+	switch mode {
+	case sqrModeMul:
+		basicSqrThreshold = words + 1
+	case sqrModeBasic:
+		basicSqrThreshold, karatsubaSqrThreshold = words-1, words+1
+	case sqrModeKaratsuba:
+		karatsubaSqrThreshold = words - 1
+	}
+
+	var testval int64
+	for i := 0; i < nruns; i++ {
+		res := testing.Benchmark(func(b *testing.B) { benchmarkNatSqr(b, words) })
+		testval += res.NsPerOp()
+	}
+	testval /= int64(nruns)
+
+	basicSqrThreshold, karatsubaSqrThreshold = initBasicSqr, initKaratsubaSqr
+
+	return time.Duration(testval)
+}
+
+func computeSqrThreshold(from, to, step, nruns int, lower, upper string) int {
+	fmt.Printf("Calibrating threshold between %s and %s\n", lower, upper)
+	fmt.Printf("Looking for a timing difference for x between %d - %d words by %d step\n", from, to, step)
+	var initPos bool
+	var threshold int
+	for i := from; i <= to; i += step {
+		baseline := measureSqr(i, nruns, lower)
+		testval := measureSqr(i, nruns, upper)
+		pos := baseline > testval
+		delta := baseline - testval
+		percent := delta * 100 / baseline
+		fmt.Printf("words = %3d deltaT = %10s (%4d%%) is %s better: %v", i, delta, percent, upper, pos)
+		if i == from {
+			initPos = pos
+		}
+		if threshold == 0 && pos != initPos {
+			threshold = i
+			fmt.Printf("  threshold  found")
+		}
+		fmt.Println()
+
+	}
+	if threshold != 0 {
+		fmt.Printf("Found threshold = %d between %d - %d\n", threshold, from, to)
+	} else {
+		fmt.Printf("Found NO threshold between %d - %d\n", from, to)
+	}
+	return threshold
+}
diff --git a/src/math/big/decimal.go b/src/math/big/decimal.go
new file mode 100644
index 0000000..716f03b
--- /dev/null
+++ b/src/math/big/decimal.go
@@ -0,0 +1,270 @@
+// Copyright 2015 The Go Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+// This file implements multi-precision decimal numbers.
+// The implementation is for float to decimal conversion only;
+// not general purpose use.
+// The only operations are precise conversion from binary to
+// decimal and rounding.
+//
+// The key observation and some code (shr) is borrowed from
+// strconv/decimal.go: conversion of binary fractional values can be done
+// precisely in multi-precision decimal because 2 divides 10 (required for
+// >> of mantissa); but conversion of decimal floating-point values cannot
+// be done precisely in binary representation.
+//
+// In contrast to strconv/decimal.go, only right shift is implemented in
+// decimal format - left shift can be done precisely in binary format.
+
+package big
+
+// A decimal represents an unsigned floating-point number in decimal representation.
+// The value of a non-zero decimal d is d.mant * 10**d.exp with 0.1 <= d.mant < 1,
+// with the most-significant mantissa digit at index 0. For the zero decimal, the
+// mantissa length and exponent are 0.
+// The zero value for decimal represents a ready-to-use 0.0.
+type decimal struct {
+	mant []byte // mantissa ASCII digits, big-endian
+	exp  int    // exponent
+}
+
+// at returns the i'th mantissa digit, starting with the most significant digit at 0.
+func (d *decimal) at(i int) byte {
+	if 0 <= i && i < len(d.mant) {
+		return d.mant[i]
+	}
+	return '0'
+}
+
+// Maximum shift amount that can be done in one pass without overflow.
+// A Word has _W bits and (1<<maxShift - 1)*10 + 9 must fit into Word.
+const maxShift = _W - 4
+
+// TODO(gri) Since we know the desired decimal precision when converting
+// a floating-point number, we may be able to limit the number of decimal
+// digits that need to be computed by init by providing an additional
+// precision argument and keeping track of when a number was truncated early
+// (equivalent of "sticky bit" in binary rounding).
+
+// TODO(gri) Along the same lines, enforce some limit to shift magnitudes
+// to avoid "infinitely" long running conversions (until we run out of space).
+
+// Init initializes x to the decimal representation of m << shift (for
+// shift >= 0), or m >> -shift (for shift < 0).
+func (x *decimal) init(m nat, shift int) {
+	// special case 0
+	if len(m) == 0 {
+		x.mant = x.mant[:0]
+		x.exp = 0
+		return
+	}
+
+	// Optimization: If we need to shift right, first remove any trailing
+	// zero bits from m to reduce shift amount that needs to be done in
+	// decimal format (since that is likely slower).
+	if shift < 0 {
+		ntz := m.trailingZeroBits()
+		s := uint(-shift)
+		if s >= ntz {
+			s = ntz // shift at most ntz bits
+		}
+		m = nat(nil).shr(m, s)
+		shift += int(s)
+	}
+
+	// Do any shift left in binary representation.
+	if shift > 0 {
+		m = nat(nil).shl(m, uint(shift))
+		shift = 0
+	}
+
+	// Convert mantissa into decimal representation.
+	s := m.utoa(10)
+	n := len(s)
+	x.exp = n
+	// Trim trailing zeros; instead the exponent is tracking
+	// the decimal point independent of the number of digits.
+	for n > 0 && s[n-1] == '0' {
+		n--
+	}
+	x.mant = append(x.mant[:0], s[:n]...)
+
+	// Do any (remaining) shift right in decimal representation.
+	if shift < 0 {
+		for shift < -maxShift {
+			shr(x, maxShift)
+			shift += maxShift
+		}
+		shr(x, uint(-shift))
+	}
+}
+
+// shr implements x >> s, for s <= maxShift.
+func shr(x *decimal, s uint) {
+	// Division by 1<<s using shift-and-subtract algorithm.
+
+	// pick up enough leading digits to cover first shift
+	r := 0 // read index
+	var n Word
+	for n>>s == 0 && r < len(x.mant) {
+		ch := Word(x.mant[r])
+		r++
+		n = n*10 + ch - '0'
+	}
+	if n == 0 {
+		// x == 0; shouldn't get here, but handle anyway
+		x.mant = x.mant[:0]
+		return
+	}
+	for n>>s == 0 {
+		r++
+		n *= 10
+	}
+	x.exp += 1 - r
+
+	// read a digit, write a digit
+	w := 0 // write index
+	mask := Word(1)<<s - 1
+	for r < len(x.mant) {
+		ch := Word(x.mant[r])
+		r++
+		d := n >> s
+		n &= mask // n -= d << s
+		x.mant[w] = byte(d + '0')
+		w++
+		n = n*10 + ch - '0'
+	}
+
+	// write extra digits that still fit
+	for n > 0 && w < len(x.mant) {
+		d := n >> s
+		n &= mask
+		x.mant[w] = byte(d + '0')
+		w++
+		n = n * 10
+	}
+	x.mant = x.mant[:w] // the number may be shorter (e.g. 1024 >> 10)
+
+	// append additional digits that didn't fit
+	for n > 0 {
+		d := n >> s
+		n &= mask
+		x.mant = append(x.mant, byte(d+'0'))
+		n = n * 10
+	}
+
+	trim(x)
+}
+
+func (x *decimal) String() string {
+	if len(x.mant) == 0 {
+		return "0"
+	}
+
+	var buf []byte
+	switch {
+	case x.exp <= 0:
+		// 0.00ddd
+		buf = make([]byte, 0, 2+(-x.exp)+len(x.mant))
+		buf = append(buf, "0."...)
+		buf = appendZeros(buf, -x.exp)
+		buf = append(buf, x.mant...)
+
+	case /* 0 < */ x.exp < len(x.mant):
+		// dd.ddd
+		buf = make([]byte, 0, 1+len(x.mant))
+		buf = append(buf, x.mant[:x.exp]...)
+		buf = append(buf, '.')
+		buf = append(buf, x.mant[x.exp:]...)
+
+	default: // len(x.mant) <= x.exp
+		// ddd00
+		buf = make([]byte, 0, x.exp)
+		buf = append(buf, x.mant...)
+		buf = appendZeros(buf, x.exp-len(x.mant))
+	}
+
+	return string(buf)
+}
+
+// appendZeros appends n 0 digits to buf and returns buf.
+func appendZeros(buf []byte, n int) []byte {
+	for ; n > 0; n-- {
+		buf = append(buf, '0')
+	}
+	return buf
+}
+
+// shouldRoundUp reports if x should be rounded up
+// if shortened to n digits. n must be a valid index
+// for x.mant.
+func shouldRoundUp(x *decimal, n int) bool {
+	if x.mant[n] == '5' && n+1 == len(x.mant) {
+		// exactly halfway - round to even
+		return n > 0 && (x.mant[n-1]-'0')&1 != 0
+	}
+	// not halfway - digit tells all (x.mant has no trailing zeros)
+	return x.mant[n] >= '5'
+}
+
+// round sets x to (at most) n mantissa digits by rounding it
+// to the nearest even value with n (or fever) mantissa digits.
+// If n < 0, x remains unchanged.
+func (x *decimal) round(n int) {
+	if n < 0 || n >= len(x.mant) {
+		return // nothing to do
+	}
+
+	if shouldRoundUp(x, n) {
+		x.roundUp(n)
+	} else {
+		x.roundDown(n)
+	}
+}
+
+func (x *decimal) roundUp(n int) {
+	if n < 0 || n >= len(x.mant) {
+		return // nothing to do
+	}
+	// 0 <= n < len(x.mant)
+
+	// find first digit < '9'
+	for n > 0 && x.mant[n-1] >= '9' {
+		n--
+	}
+
+	if n == 0 {
+		// all digits are '9's => round up to '1' and update exponent
+		x.mant[0] = '1' // ok since len(x.mant) > n
+		x.mant = x.mant[:1]
+		x.exp++
+		return
+	}
+
+	// n > 0 && x.mant[n-1] < '9'
+	x.mant[n-1]++
+	x.mant = x.mant[:n]
+	// x already trimmed
+}
+
+func (x *decimal) roundDown(n int) {
+	if n < 0 || n >= len(x.mant) {
+		return // nothing to do
+	}
+	x.mant = x.mant[:n]
+	trim(x)
+}
+
+// trim cuts off any trailing zeros from x's mantissa;
+// they are meaningless for the value of x.
+func trim(x *decimal) {
+	i := len(x.mant)
+	for i > 0 && x.mant[i-1] == '0' {
+		i--
+	}
+	x.mant = x.mant[:i]
+	if i == 0 {
+		x.exp = 0
+	}
+}
diff --git a/src/math/big/decimal_test.go b/src/math/big/decimal_test.go
new file mode 100644
index 0000000..424811e
--- /dev/null
+++ b/src/math/big/decimal_test.go
@@ -0,0 +1,134 @@
+// Copyright 2015 The Go Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+package big
+
+import (
+	"fmt"
+	"testing"
+)
+
+func TestDecimalString(t *testing.T) {
+	for _, test := range []struct {
+		x    decimal
+		want string
+	}{
+		{want: "0"},
+		{decimal{nil, 1000}, "0"}, // exponent of 0 is ignored
+		{decimal{[]byte("12345"), 0}, "0.12345"},
+		{decimal{[]byte("12345"), -3}, "0.00012345"},
+		{decimal{[]byte("12345"), +3}, "123.45"},
+		{decimal{[]byte("12345"), +10}, "1234500000"},
+	} {
+		if got := test.x.String(); got != test.want {
+			t.Errorf("%v == %s; want %s", test.x, got, test.want)
+		}
+	}
+}
+
+func TestDecimalInit(t *testing.T) {
+	for _, test := range []struct {
+		x     Word
+		shift int
+		want  string
+	}{
+		{0, 0, "0"},
+		{0, -100, "0"},
+		{0, 100, "0"},
+		{1, 0, "1"},
+		{1, 10, "1024"},
+		{1, 100, "1267650600228229401496703205376"},
+		{1, -100, "0.0000000000000000000000000000007888609052210118054117285652827862296732064351090230047702789306640625"},
+		{12345678, 8, "3160493568"},
+		{12345678, -8, "48225.3046875"},
+		{195312, 9, "99999744"},
+		{1953125, 9, "1000000000"},
+	} {
+		var d decimal
+		d.init(nat{test.x}.norm(), test.shift)
+		if got := d.String(); got != test.want {
+			t.Errorf("%d << %d == %s; want %s", test.x, test.shift, got, test.want)
+		}
+	}
+}
+
+func TestDecimalRounding(t *testing.T) {
+	for _, test := range []struct {
+		x              uint64
+		n              int
+		down, even, up string
+	}{
+		{0, 0, "0", "0", "0"},
+		{0, 1, "0", "0", "0"},
+
+		{1, 0, "0", "0", "10"},
+		{5, 0, "0", "0", "10"},
+		{9, 0, "0", "10", "10"},
+
+		{15, 1, "10", "20", "20"},
+		{45, 1, "40", "40", "50"},
+		{95, 1, "90", "100", "100"},
+
+		{12344999, 4, "12340000", "12340000", "12350000"},
+		{12345000, 4, "12340000", "12340000", "12350000"},
+		{12345001, 4, "12340000", "12350000", "12350000"},
+		{23454999, 4, "23450000", "23450000", "23460000"},
+		{23455000, 4, "23450000", "23460000", "23460000"},
+		{23455001, 4, "23450000", "23460000", "23460000"},
+
+		{99994999, 4, "99990000", "99990000", "100000000"},
+		{99995000, 4, "99990000", "100000000", "100000000"},
+		{99999999, 4, "99990000", "100000000", "100000000"},
+
+		{12994999, 4, "12990000", "12990000", "13000000"},
+		{12995000, 4, "12990000", "13000000", "13000000"},
+		{12999999, 4, "12990000", "13000000", "13000000"},
+	} {
+		x := nat(nil).setUint64(test.x)
+
+		var d decimal
+		d.init(x, 0)
+		d.roundDown(test.n)
+		if got := d.String(); got != test.down {
+			t.Errorf("roundDown(%d, %d) = %s; want %s", test.x, test.n, got, test.down)
+		}
+
+		d.init(x, 0)
+		d.round(test.n)
+		if got := d.String(); got != test.even {
+			t.Errorf("round(%d, %d) = %s; want %s", test.x, test.n, got, test.even)
+		}
+
+		d.init(x, 0)
+		d.roundUp(test.n)
+		if got := d.String(); got != test.up {
+			t.Errorf("roundUp(%d, %d) = %s; want %s", test.x, test.n, got, test.up)
+		}
+	}
+}
+
+var sink string
+
+func BenchmarkDecimalConversion(b *testing.B) {
+	for i := 0; i < b.N; i++ {
+		for shift := -100; shift <= +100; shift++ {
+			var d decimal
+			d.init(natOne, shift)
+			sink = d.String()
+		}
+	}
+}
+
+func BenchmarkFloatString(b *testing.B) {
+	x := new(Float)
+	for _, prec := range []uint{1e2, 1e3, 1e4, 1e5} {
+		x.SetPrec(prec).SetRat(NewRat(1, 3))
+		b.Run(fmt.Sprintf("%v", prec), func(b *testing.B) {
+			b.ReportAllocs()
+			for i := 0; i < b.N; i++ {
+				sink = x.String()
+			}
+		})
+	}
+}
diff --git a/src/math/big/doc.go b/src/math/big/doc.go
new file mode 100644
index 0000000..fee5a65
--- /dev/null
+++ b/src/math/big/doc.go
@@ -0,0 +1,98 @@
+// Copyright 2009 The Go Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+/*
+Package big implements arbitrary-precision arithmetic (big numbers).
+The following numeric types are supported:
+
+	Int    signed integers
+	Rat    rational numbers
+	Float  floating-point numbers
+
+The zero value for an Int, Rat, or Float correspond to 0. Thus, new
+values can be declared in the usual ways and denote 0 without further
+initialization:
+
+	var x Int        // &x is an *Int of value 0
+	var r = &Rat{}   // r is a *Rat of value 0
+	y := new(Float)  // y is a *Float of value 0
+
+Alternatively, new values can be allocated and initialized with factory
+functions of the form:
+
+	func NewT(v V) *T
+
+For instance, NewInt(x) returns an *Int set to the value of the int64
+argument x, NewRat(a, b) returns a *Rat set to the fraction a/b where
+a and b are int64 values, and NewFloat(f) returns a *Float initialized
+to the float64 argument f. More flexibility is provided with explicit
+setters, for instance:
+
+	var z1 Int
+	z1.SetUint64(123)                 // z1 := 123
+	z2 := new(Rat).SetFloat64(1.25)   // z2 := 5/4
+	z3 := new(Float).SetInt(z1)       // z3 := 123.0
+
+Setters, numeric operations and predicates are represented as methods of
+the form:
+
+	func (z *T) SetV(v V) *T          // z = v
+	func (z *T) Unary(x *T) *T        // z = unary x
+	func (z *T) Binary(x, y *T) *T    // z = x binary y
+	func (x *T) Pred() P              // p = pred(x)
+
+with T one of Int, Rat, or Float. For unary and binary operations, the
+result is the receiver (usually named z in that case; see below); if it
+is one of the operands x or y it may be safely overwritten (and its memory
+reused).
+
+Arithmetic expressions are typically written as a sequence of individual
+method calls, with each call corresponding to an operation. The receiver
+denotes the result and the method arguments are the operation's operands.
+For instance, given three *Int values a, b and c, the invocation
+
+	c.Add(a, b)
+
+computes the sum a + b and stores the result in c, overwriting whatever
+value was held in c before. Unless specified otherwise, operations permit
+aliasing of parameters, so it is perfectly ok to write
+
+	sum.Add(sum, x)
+
+to accumulate values x in a sum.
+
+(By always passing in a result value via the receiver, memory use can be
+much better controlled. Instead of having to allocate new memory for each
+result, an operation can reuse the space allocated for the result value,
+and overwrite that value with the new result in the process.)
+
+Notational convention: Incoming method parameters (including the receiver)
+are named consistently in the API to clarify their use. Incoming operands
+are usually named x, y, a, b, and so on, but never z. A parameter specifying
+the result is named z (typically the receiver).
+
+For instance, the arguments for (*Int).Add are named x and y, and because
+the receiver specifies the result destination, it is called z:
+
+	func (z *Int) Add(x, y *Int) *Int
+
+Methods of this form typically return the incoming receiver as well, to
+enable simple call chaining.
+
+Methods which don't require a result value to be passed in (for instance,
+Int.Sign), simply return the result. In this case, the receiver is typically
+the first operand, named x:
+
+	func (x *Int) Sign() int
+
+Various methods support conversions between strings and corresponding
+numeric values, and vice versa: *Int, *Rat, and *Float values implement
+the Stringer interface for a (default) string representation of the value,
+but also provide SetString methods to initialize a value from a string in
+a variety of supported formats (see the respective SetString documentation).
+
+Finally, *Int, *Rat, and *Float satisfy [fmt.Scanner] for scanning
+and (except for *Rat) the Formatter interface for formatted printing.
+*/
+package big
diff --git a/src/math/big/example_rat_test.go b/src/math/big/example_rat_test.go
new file mode 100644
index 0000000..dc67430
--- /dev/null
+++ b/src/math/big/example_rat_test.go
@@ -0,0 +1,68 @@
+// Copyright 2015 The Go Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+package big_test
+
+import (
+	"fmt"
+	"math/big"
+)
+
+// Use the classic continued fraction for e
+//
+//	e = [1; 0, 1, 1, 2, 1, 1, ... 2n, 1, 1, ...]
+//
+// i.e., for the nth term, use
+//
+//	   1          if   n mod 3 != 1
+//	(n-1)/3 * 2   if   n mod 3 == 1
+func recur(n, lim int64) *big.Rat {
+	term := new(big.Rat)
+	if n%3 != 1 {
+		term.SetInt64(1)
+	} else {
+		term.SetInt64((n - 1) / 3 * 2)
+	}
+
+	if n > lim {
+		return term
+	}
+
+	// Directly initialize frac as the fractional
+	// inverse of the result of recur.
+	frac := new(big.Rat).Inv(recur(n+1, lim))
+
+	return term.Add(term, frac)
+}
+
+// This example demonstrates how to use big.Rat to compute the
+// first 15 terms in the sequence of rational convergents for
+// the constant e (base of natural logarithm).
+func Example_eConvergents() {
+	for i := 1; i <= 15; i++ {
+		r := recur(0, int64(i))
+
+		// Print r both as a fraction and as a floating-point number.
+		// Since big.Rat implements fmt.Formatter, we can use %-13s to
+		// get a left-aligned string representation of the fraction.
+		fmt.Printf("%-13s = %s\n", r, r.FloatString(8))
+	}
+
+	// Output:
+	// 2/1           = 2.00000000
+	// 3/1           = 3.00000000
+	// 8/3           = 2.66666667
+	// 11/4          = 2.75000000
+	// 19/7          = 2.71428571
+	// 87/32         = 2.71875000
+	// 106/39        = 2.71794872
+	// 193/71        = 2.71830986
+	// 1264/465      = 2.71827957
+	// 1457/536      = 2.71828358
+	// 2721/1001     = 2.71828172
+	// 23225/8544    = 2.71828184
+	// 25946/9545    = 2.71828182
+	// 49171/18089   = 2.71828183
+	// 517656/190435 = 2.71828183
+}
diff --git a/src/math/big/example_test.go b/src/math/big/example_test.go
new file mode 100644
index 0000000..31ca784
--- /dev/null
+++ b/src/math/big/example_test.go
@@ -0,0 +1,148 @@
+// Copyright 2012 The Go Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+package big_test
+
+import (
+	"fmt"
+	"log"
+	"math"
+	"math/big"
+)
+
+func ExampleRat_SetString() {
+	r := new(big.Rat)
+	r.SetString("355/113")
+	fmt.Println(r.FloatString(3))
+	// Output: 3.142
+}
+
+func ExampleInt_SetString() {
+	i := new(big.Int)
+	i.SetString("644", 8) // octal
+	fmt.Println(i)
+	// Output: 420
+}
+
+func ExampleFloat_SetString() {
+	f := new(big.Float)
+	f.SetString("3.14159")
+	fmt.Println(f)
+	// Output: 3.14159
+}
+
+func ExampleRat_Scan() {
+	// The Scan function is rarely used directly;
+	// the fmt package recognizes it as an implementation of fmt.Scanner.
+	r := new(big.Rat)
+	_, err := fmt.Sscan("1.5000", r)
+	if err != nil {
+		log.Println("error scanning value:", err)
+	} else {
+		fmt.Println(r)
+	}
+	// Output: 3/2
+}
+
+func ExampleInt_Scan() {
+	// The Scan function is rarely used directly;
+	// the fmt package recognizes it as an implementation of fmt.Scanner.
+	i := new(big.Int)
+	_, err := fmt.Sscan("18446744073709551617", i)
+	if err != nil {
+		log.Println("error scanning value:", err)
+	} else {
+		fmt.Println(i)
+	}
+	// Output: 18446744073709551617
+}
+
+func ExampleFloat_Scan() {
+	// The Scan function is rarely used directly;
+	// the fmt package recognizes it as an implementation of fmt.Scanner.
+	f := new(big.Float)
+	_, err := fmt.Sscan("1.19282e99", f)
+	if err != nil {
+		log.Println("error scanning value:", err)
+	} else {
+		fmt.Println(f)
+	}
+	// Output: 1.19282e+99
+}
+
+// This example demonstrates how to use big.Int to compute the smallest
+// Fibonacci number with 100 decimal digits and to test whether it is prime.
+func Example_fibonacci() {
+	// Initialize two big ints with the first two numbers in the sequence.
+	a := big.NewInt(0)
+	b := big.NewInt(1)
+
+	// Initialize limit as 10^99, the smallest integer with 100 digits.
+	var limit big.Int
+	limit.Exp(big.NewInt(10), big.NewInt(99), nil)
+
+	// Loop while a is smaller than 1e100.
+	for a.Cmp(&limit) < 0 {
+		// Compute the next Fibonacci number, storing it in a.
+		a.Add(a, b)
+		// Swap a and b so that b is the next number in the sequence.
+		a, b = b, a
+	}
+	fmt.Println(a) // 100-digit Fibonacci number
+
+	// Test a for primality.
+	// (ProbablyPrimes' argument sets the number of Miller-Rabin
+	// rounds to be performed. 20 is a good value.)
+	fmt.Println(a.ProbablyPrime(20))
+
+	// Output:
+	// 1344719667586153181419716641724567886890850696275767987106294472017884974410332069524504824747437757
+	// false
+}
+
+// This example shows how to use big.Float to compute the square root of 2 with
+// a precision of 200 bits, and how to print the result as a decimal number.
+func Example_sqrt2() {
+	// We'll do computations with 200 bits of precision in the mantissa.
+	const prec = 200
+
+	// Compute the square root of 2 using Newton's Method. We start with
+	// an initial estimate for sqrt(2), and then iterate:
+	//     x_{n+1} = 1/2 * ( x_n + (2.0 / x_n) )
+
+	// Since Newton's Method doubles the number of correct digits at each
+	// iteration, we need at least log_2(prec) steps.
+	steps := int(math.Log2(prec))
+
+	// Initialize values we need for the computation.
+	two := new(big.Float).SetPrec(prec).SetInt64(2)
+	half := new(big.Float).SetPrec(prec).SetFloat64(0.5)
+
+	// Use 1 as the initial estimate.
+	x := new(big.Float).SetPrec(prec).SetInt64(1)
+
+	// We use t as a temporary variable. There's no need to set its precision
+	// since big.Float values with unset (== 0) precision automatically assume
+	// the largest precision of the arguments when used as the result (receiver)
+	// of a big.Float operation.
+	t := new(big.Float)
+
+	// Iterate.
+	for i := 0; i <= steps; i++ {
+		t.Quo(two, x)  // t = 2.0 / x_n
+		t.Add(x, t)    // t = x_n + (2.0 / x_n)
+		x.Mul(half, t) // x_{n+1} = 0.5 * t
+	}
+
+	// We can use the usual fmt.Printf verbs since big.Float implements fmt.Formatter
+	fmt.Printf("sqrt(2) = %.50f\n", x)
+
+	// Print the error between 2 and x*x.
+	t.Mul(x, x) // t = x*x
+	fmt.Printf("error = %e\n", t.Sub(two, t))
+
+	// Output:
+	// sqrt(2) = 1.41421356237309504880168872420969807856967187537695
+	// error = 0.000000e+00
+}
diff --git a/src/math/big/float.go b/src/math/big/float.go
new file mode 100644
index 0000000..2f0635a
--- /dev/null
+++ b/src/math/big/float.go
@@ -0,0 +1,1736 @@
+// Copyright 2014 The Go Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+// This file implements multi-precision floating-point numbers.
+// Like in the GNU MPFR library (https://www.mpfr.org/), operands
+// can be of mixed precision. Unlike MPFR, the rounding mode is
+// not specified with each operation, but with each operand. The
+// rounding mode of the result operand determines the rounding
+// mode of an operation. This is a from-scratch implementation.
+
+package big
+
+import (
+	"fmt"
+	"math"
+	"math/bits"
+)
+
+const debugFloat = false // enable for debugging
+
+// A nonzero finite Float represents a multi-precision floating point number
+//
+//	sign × mantissa × 2**exponent
+//
+// with 0.5 <= mantissa < 1.0, and MinExp <= exponent <= MaxExp.
+// A Float may also be zero (+0, -0) or infinite (+Inf, -Inf).
+// All Floats are ordered, and the ordering of two Floats x and y
+// is defined by x.Cmp(y).
+//
+// Each Float value also has a precision, rounding mode, and accuracy.
+// The precision is the maximum number of mantissa bits available to
+// represent the value. The rounding mode specifies how a result should
+// be rounded to fit into the mantissa bits, and accuracy describes the
+// rounding error with respect to the exact result.
+//
+// Unless specified otherwise, all operations (including setters) that
+// specify a *Float variable for the result (usually via the receiver
+// with the exception of MantExp), round the numeric result according
+// to the precision and rounding mode of the result variable.
+//
+// If the provided result precision is 0 (see below), it is set to the
+// precision of the argument with the largest precision value before any
+// rounding takes place, and the rounding mode remains unchanged. Thus,
+// uninitialized Floats provided as result arguments will have their
+// precision set to a reasonable value determined by the operands, and
+// their mode is the zero value for RoundingMode (ToNearestEven).
+//
+// By setting the desired precision to 24 or 53 and using matching rounding
+// mode (typically ToNearestEven), Float operations produce the same results
+// as the corresponding float32 or float64 IEEE-754 arithmetic for operands
+// that correspond to normal (i.e., not denormal) float32 or float64 numbers.
+// Exponent underflow and overflow lead to a 0 or an Infinity for different
+// values than IEEE-754 because Float exponents have a much larger range.
+//
+// The zero (uninitialized) value for a Float is ready to use and represents
+// the number +0.0 exactly, with precision 0 and rounding mode ToNearestEven.
+//
+// Operations always take pointer arguments (*Float) rather
+// than Float values, and each unique Float value requires
+// its own unique *Float pointer. To "copy" a Float value,
+// an existing (or newly allocated) Float must be set to
+// a new value using the Float.Set method; shallow copies
+// of Floats are not supported and may lead to errors.
+type Float struct {
+	prec uint32
+	mode RoundingMode
+	acc  Accuracy
+	form form
+	neg  bool
+	mant nat
+	exp  int32
+}
+
+// An ErrNaN panic is raised by a Float operation that would lead to
+// a NaN under IEEE-754 rules. An ErrNaN implements the error interface.
+type ErrNaN struct {
+	msg string
+}
+
+func (err ErrNaN) Error() string {
+	return err.msg
+}
+
+// NewFloat allocates and returns a new Float set to x,
+// with precision 53 and rounding mode ToNearestEven.
+// NewFloat panics with ErrNaN if x is a NaN.
+func NewFloat(x float64) *Float {
+	if math.IsNaN(x) {
+		panic(ErrNaN{"NewFloat(NaN)"})
+	}
+	return new(Float).SetFloat64(x)
+}
+
+// Exponent and precision limits.
+const (
+	MaxExp  = math.MaxInt32  // largest supported exponent
+	MinExp  = math.MinInt32  // smallest supported exponent
+	MaxPrec = math.MaxUint32 // largest (theoretically) supported precision; likely memory-limited
+)
+
+// Internal representation: The mantissa bits x.mant of a nonzero finite
+// Float x are stored in a nat slice long enough to hold up to x.prec bits;
+// the slice may (but doesn't have to) be shorter if the mantissa contains
+// trailing 0 bits. x.mant is normalized if the msb of x.mant == 1 (i.e.,
+// the msb is shifted all the way "to the left"). Thus, if the mantissa has
+// trailing 0 bits or x.prec is not a multiple of the Word size _W,
+// x.mant[0] has trailing zero bits. The msb of the mantissa corresponds
+// to the value 0.5; the exponent x.exp shifts the binary point as needed.
+//
+// A zero or non-finite Float x ignores x.mant and x.exp.
+//
+// x                 form      neg      mant         exp
+// ----------------------------------------------------------
+// ±0                zero      sign     -            -
+// 0 < |x| < +Inf    finite    sign     mantissa     exponent
+// ±Inf              inf       sign     -            -
+
+// A form value describes the internal representation.
+type form byte
+
+// The form value order is relevant - do not change!
+const (
+	zero form = iota
+	finite
+	inf
+)
+
+// RoundingMode determines how a Float value is rounded to the
+// desired precision. Rounding may change the Float value; the
+// rounding error is described by the Float's Accuracy.
+type RoundingMode byte
+
+// These constants define supported rounding modes.
+const (
+	ToNearestEven RoundingMode = iota // == IEEE 754-2008 roundTiesToEven
+	ToNearestAway                     // == IEEE 754-2008 roundTiesToAway
+	ToZero                            // == IEEE 754-2008 roundTowardZero
+	AwayFromZero                      // no IEEE 754-2008 equivalent
+	ToNegativeInf                     // == IEEE 754-2008 roundTowardNegative
+	ToPositiveInf                     // == IEEE 754-2008 roundTowardPositive
+)
+
+//go:generate stringer -type=RoundingMode
+
+// Accuracy describes the rounding error produced by the most recent
+// operation that generated a Float value, relative to the exact value.
+type Accuracy int8
+
+// Constants describing the Accuracy of a Float.
+const (
+	Below Accuracy = -1
+	Exact Accuracy = 0
+	Above Accuracy = +1
+)
+
+//go:generate stringer -type=Accuracy
+
+// SetPrec sets z's precision to prec and returns the (possibly) rounded
+// value of z. Rounding occurs according to z's rounding mode if the mantissa
+// cannot be represented in prec bits without loss of precision.
+// SetPrec(0) maps all finite values to ±0; infinite values remain unchanged.
+// If prec > MaxPrec, it is set to MaxPrec.
+func (z *Float) SetPrec(prec uint) *Float {
+	z.acc = Exact // optimistically assume no rounding is needed
+
+	// special case
+	if prec == 0 {
+		z.prec = 0
+		if z.form == finite {
+			// truncate z to 0
+			z.acc = makeAcc(z.neg)
+			z.form = zero
+		}
+		return z
+	}
+
+	// general case
+	if prec > MaxPrec {
+		prec = MaxPrec
+	}
+	old := z.prec
+	z.prec = uint32(prec)
+	if z.prec < old {
+		z.round(0)
+	}
+	return z
+}
+
+func makeAcc(above bool) Accuracy {
+	if above {
+		return Above
+	}
+	return Below
+}
+
+// SetMode sets z's rounding mode to mode and returns an exact z.
+// z remains unchanged otherwise.
+// z.SetMode(z.Mode()) is a cheap way to set z's accuracy to Exact.
+func (z *Float) SetMode(mode RoundingMode) *Float {
+	z.mode = mode
+	z.acc = Exact
+	return z
+}
+
+// Prec returns the mantissa precision of x in bits.
+// The result may be 0 for |x| == 0 and |x| == Inf.
+func (x *Float) Prec() uint {
+	return uint(x.prec)
+}
+
+// MinPrec returns the minimum precision required to represent x exactly
+// (i.e., the smallest prec before x.SetPrec(prec) would start rounding x).
+// The result is 0 for |x| == 0 and |x| == Inf.
+func (x *Float) MinPrec() uint {
+	if x.form != finite {
+		return 0
+	}
+	return uint(len(x.mant))*_W - x.mant.trailingZeroBits()
+}
+
+// Mode returns the rounding mode of x.
+func (x *Float) Mode() RoundingMode {
+	return x.mode
+}
+
+// Acc returns the accuracy of x produced by the most recent
+// operation, unless explicitly documented otherwise by that
+// operation.
+func (x *Float) Acc() Accuracy {
+	return x.acc
+}
+
+// Sign returns:
+//
+//	-1 if x <   0
+//	 0 if x is ±0
+//	+1 if x >   0
+func (x *Float) Sign() int {
+	if debugFloat {
+		x.validate()
+	}
+	if x.form == zero {
+		return 0
+	}
+	if x.neg {
+		return -1
+	}
+	return 1
+}
+
+// MantExp breaks x into its mantissa and exponent components
+// and returns the exponent. If a non-nil mant argument is
+// provided its value is set to the mantissa of x, with the
+// same precision and rounding mode as x. The components
+// satisfy x == mant × 2**exp, with 0.5 <= |mant| < 1.0.
+// Calling MantExp with a nil argument is an efficient way to
+// get the exponent of the receiver.
+//
+// Special cases are:
+//
+//	(  ±0).MantExp(mant) = 0, with mant set to   ±0
+//	(±Inf).MantExp(mant) = 0, with mant set to ±Inf
+//
+// x and mant may be the same in which case x is set to its
+// mantissa value.
+func (x *Float) MantExp(mant *Float) (exp int) {
+	if debugFloat {
+		x.validate()
+	}
+	if x.form == finite {
+		exp = int(x.exp)
+	}
+	if mant != nil {
+		mant.Copy(x)
+		if mant.form == finite {
+			mant.exp = 0
+		}
+	}
+	return
+}
+
+func (z *Float) setExpAndRound(exp int64, sbit uint) {
+	if exp < MinExp {
+		// underflow
+		z.acc = makeAcc(z.neg)
+		z.form = zero
+		return
+	}
+
+	if exp > MaxExp {
+		// overflow
+		z.acc = makeAcc(!z.neg)
+		z.form = inf
+		return
+	}
+
+	z.form = finite
+	z.exp = int32(exp)
+	z.round(sbit)
+}
+
+// SetMantExp sets z to mant × 2**exp and returns z.
+// The result z has the same precision and rounding mode
+// as mant. SetMantExp is an inverse of MantExp but does
+// not require 0.5 <= |mant| < 1.0. Specifically, for a
+// given x of type *Float, SetMantExp relates to MantExp
+// as follows:
+//
+//	mant := new(Float)
+//	new(Float).SetMantExp(mant, x.MantExp(mant)).Cmp(x) == 0
+//
+// Special cases are:
+//
+//	z.SetMantExp(  ±0, exp) =   ±0
+//	z.SetMantExp(±Inf, exp) = ±Inf
+//
+// z and mant may be the same in which case z's exponent
+// is set to exp.
+func (z *Float) SetMantExp(mant *Float, exp int) *Float {
+	if debugFloat {
+		z.validate()
+		mant.validate()
+	}
+	z.Copy(mant)
+
+	if z.form == finite {
+		// 0 < |mant| < +Inf
+		z.setExpAndRound(int64(z.exp)+int64(exp), 0)
+	}
+	return z
+}
+
+// Signbit reports whether x is negative or negative zero.
+func (x *Float) Signbit() bool {
+	return x.neg
+}
+
+// IsInf reports whether x is +Inf or -Inf.
+func (x *Float) IsInf() bool {
+	return x.form == inf
+}
+
+// IsInt reports whether x is an integer.
+// ±Inf values are not integers.
+func (x *Float) IsInt() bool {
+	if debugFloat {
+		x.validate()
+	}
+	// special cases
+	if x.form != finite {
+		return x.form == zero
+	}
+	// x.form == finite
+	if x.exp <= 0 {
+		return false
+	}
+	// x.exp > 0
+	return x.prec <= uint32(x.exp) || x.MinPrec() <= uint(x.exp) // not enough bits for fractional mantissa
+}
+
+// debugging support
+func (x *Float) validate() {
+	if !debugFloat {
+		// avoid performance bugs
+		panic("validate called but debugFloat is not set")
+	}
+	if msg := x.validate0(); msg != "" {
+		panic(msg)
+	}
+}
+
+func (x *Float) validate0() string {
+	if x.form != finite {
+		return ""
+	}
+	m := len(x.mant)
+	if m == 0 {
+		return "nonzero finite number with empty mantissa"
+	}
+	const msb = 1 << (_W - 1)
+	if x.mant[m-1]&msb == 0 {
+		return fmt.Sprintf("msb not set in last word %#x of %s", x.mant[m-1], x.Text('p', 0))
+	}
+	if x.prec == 0 {
+		return "zero precision finite number"
+	}
+	return ""
+}
+
+// round rounds z according to z.mode to z.prec bits and sets z.acc accordingly.
+// sbit must be 0 or 1 and summarizes any "sticky bit" information one might
+// have before calling round. z's mantissa must be normalized (with the msb set)
+// or empty.
+//
+// CAUTION: The rounding modes ToNegativeInf, ToPositiveInf are affected by the
+// sign of z. For correct rounding, the sign of z must be set correctly before
+// calling round.
+func (z *Float) round(sbit uint) {
+	if debugFloat {
+		z.validate()
+	}
+
+	z.acc = Exact
+	if z.form != finite {
+		// ±0 or ±Inf => nothing left to do
+		return
+	}
+	// z.form == finite && len(z.mant) > 0
+	// m > 0 implies z.prec > 0 (checked by validate)
+
+	m := uint32(len(z.mant)) // present mantissa length in words
+	bits := m * _W           // present mantissa bits; bits > 0
+	if bits <= z.prec {
+		// mantissa fits => nothing to do
+		return
+	}
+	// bits > z.prec
+
+	// Rounding is based on two bits: the rounding bit (rbit) and the
+	// sticky bit (sbit). The rbit is the bit immediately before the
+	// z.prec leading mantissa bits (the "0.5"). The sbit is set if any
+	// of the bits before the rbit are set (the "0.25", "0.125", etc.):
+	//
+	//   rbit  sbit  => "fractional part"
+	//
+	//   0     0        == 0
+	//   0     1        >  0  , < 0.5
+	//   1     0        == 0.5
+	//   1     1        >  0.5, < 1.0
+
+	// bits > z.prec: mantissa too large => round
+	r := uint(bits - z.prec - 1) // rounding bit position; r >= 0
+	rbit := z.mant.bit(r) & 1    // rounding bit; be safe and ensure it's a single bit
+	// The sticky bit is only needed for rounding ToNearestEven
+	// or when the rounding bit is zero. Avoid computation otherwise.
+	if sbit == 0 && (rbit == 0 || z.mode == ToNearestEven) {
+		sbit = z.mant.sticky(r)
+	}
+	sbit &= 1 // be safe and ensure it's a single bit
+
+	// cut off extra words
+	n := (z.prec + (_W - 1)) / _W // mantissa length in words for desired precision
+	if m > n {
+		copy(z.mant, z.mant[m-n:]) // move n last words to front
+		z.mant = z.mant[:n]
+	}
+
+	// determine number of trailing zero bits (ntz) and compute lsb mask of mantissa's least-significant word
+	ntz := n*_W - z.prec // 0 <= ntz < _W
+	lsb := Word(1) << ntz
+
+	// round if result is inexact
+	if rbit|sbit != 0 {
+		// Make rounding decision: The result mantissa is truncated ("rounded down")
+		// by default. Decide if we need to increment, or "round up", the (unsigned)
+		// mantissa.
+		inc := false
+		switch z.mode {
+		case ToNegativeInf:
+			inc = z.neg
+		case ToZero:
+			// nothing to do
+		case ToNearestEven:
+			inc = rbit != 0 && (sbit != 0 || z.mant[0]&lsb != 0)
+		case ToNearestAway:
+			inc = rbit != 0
+		case AwayFromZero:
+			inc = true
+		case ToPositiveInf:
+			inc = !z.neg
+		default:
+			panic("unreachable")
+		}
+
+		// A positive result (!z.neg) is Above the exact result if we increment,
+		// and it's Below if we truncate (Exact results require no rounding).
+		// For a negative result (z.neg) it is exactly the opposite.
+		z.acc = makeAcc(inc != z.neg)
+
+		if inc {
+			// add 1 to mantissa
+			if addVW(z.mant, z.mant, lsb) != 0 {
+				// mantissa overflow => adjust exponent
+				if z.exp >= MaxExp {
+					// exponent overflow
+					z.form = inf
+					return
+				}
+				z.exp++
+				// adjust mantissa: divide by 2 to compensate for exponent adjustment
+				shrVU(z.mant, z.mant, 1)
+				// set msb == carry == 1 from the mantissa overflow above
+				const msb = 1 << (_W - 1)
+				z.mant[n-1] |= msb
+			}
+		}
+	}
+
+	// zero out trailing bits in least-significant word
+	z.mant[0] &^= lsb - 1
+
+	if debugFloat {
+		z.validate()
+	}
+}
+
+func (z *Float) setBits64(neg bool, x uint64) *Float {
+	if z.prec == 0 {
+		z.prec = 64
+	}
+	z.acc = Exact
+	z.neg = neg
+	if x == 0 {
+		z.form = zero
+		return z
+	}
+	// x != 0
+	z.form = finite
+	s := bits.LeadingZeros64(x)
+	z.mant = z.mant.setUint64(x << uint(s))
+	z.exp = int32(64 - s) // always fits
+	if z.prec < 64 {
+		z.round(0)
+	}
+	return z
+}
+
+// SetUint64 sets z to the (possibly rounded) value of x and returns z.
+// If z's precision is 0, it is changed to 64 (and rounding will have
+// no effect).
+func (z *Float) SetUint64(x uint64) *Float {
+	return z.setBits64(false, x)
+}
+
+// SetInt64 sets z to the (possibly rounded) value of x and returns z.
+// If z's precision is 0, it is changed to 64 (and rounding will have
+// no effect).
+func (z *Float) SetInt64(x int64) *Float {
+	u := x
+	if u < 0 {
+		u = -u
+	}
+	// We cannot simply call z.SetUint64(uint64(u)) and change
+	// the sign afterwards because the sign affects rounding.
+	return z.setBits64(x < 0, uint64(u))
+}
+
+// SetFloat64 sets z to the (possibly rounded) value of x and returns z.
+// If z's precision is 0, it is changed to 53 (and rounding will have
+// no effect). SetFloat64 panics with ErrNaN if x is a NaN.
+func (z *Float) SetFloat64(x float64) *Float {
+	if z.prec == 0 {
+		z.prec = 53
+	}
+	if math.IsNaN(x) {
+		panic(ErrNaN{"Float.SetFloat64(NaN)"})
+	}
+	z.acc = Exact
+	z.neg = math.Signbit(x) // handle -0, -Inf correctly
+	if x == 0 {
+		z.form = zero
+		return z
+	}
+	if math.IsInf(x, 0) {
+		z.form = inf
+		return z
+	}
+	// normalized x != 0
+	z.form = finite
+	fmant, exp := math.Frexp(x) // get normalized mantissa
+	z.mant = z.mant.setUint64(1<<63 | math.Float64bits(fmant)<<11)
+	z.exp = int32(exp) // always fits
+	if z.prec < 53 {
+		z.round(0)
+	}
+	return z
+}
+
+// fnorm normalizes mantissa m by shifting it to the left
+// such that the msb of the most-significant word (msw) is 1.
+// It returns the shift amount. It assumes that len(m) != 0.
+func fnorm(m nat) int64 {
+	if debugFloat && (len(m) == 0 || m[len(m)-1] == 0) {
+		panic("msw of mantissa is 0")
+	}
+	s := nlz(m[len(m)-1])
+	if s > 0 {
+		c := shlVU(m, m, s)
+		if debugFloat && c != 0 {
+			panic("nlz or shlVU incorrect")
+		}
+	}
+	return int64(s)
+}
+
+// SetInt sets z to the (possibly rounded) value of x and returns z.
+// If z's precision is 0, it is changed to the larger of x.BitLen()
+// or 64 (and rounding will have no effect).
+func (z *Float) SetInt(x *Int) *Float {
+	// TODO(gri) can be more efficient if z.prec > 0
+	// but small compared to the size of x, or if there
+	// are many trailing 0's.
+	bits := uint32(x.BitLen())
+	if z.prec == 0 {
+		z.prec = umax32(bits, 64)
+	}
+	z.acc = Exact
+	z.neg = x.neg
+	if len(x.abs) == 0 {
+		z.form = zero
+		return z
+	}
+	// x != 0
+	z.mant = z.mant.set(x.abs)
+	fnorm(z.mant)
+	z.setExpAndRound(int64(bits), 0)
+	return z
+}
+
+// SetRat sets z to the (possibly rounded) value of x and returns z.
+// If z's precision is 0, it is changed to the largest of a.BitLen(),
+// b.BitLen(), or 64; with x = a/b.
+func (z *Float) SetRat(x *Rat) *Float {
+	if x.IsInt() {
+		return z.SetInt(x.Num())
+	}
+	var a, b Float
+	a.SetInt(x.Num())
+	b.SetInt(x.Denom())
+	if z.prec == 0 {
+		z.prec = umax32(a.prec, b.prec)
+	}
+	return z.Quo(&a, &b)
+}
+
+// SetInf sets z to the infinite Float -Inf if signbit is
+// set, or +Inf if signbit is not set, and returns z. The
+// precision of z is unchanged and the result is always
+// Exact.
+func (z *Float) SetInf(signbit bool) *Float {
+	z.acc = Exact
+	z.form = inf
+	z.neg = signbit
+	return z
+}
+
+// Set sets z to the (possibly rounded) value of x and returns z.
+// If z's precision is 0, it is changed to the precision of x
+// before setting z (and rounding will have no effect).
+// Rounding is performed according to z's precision and rounding
+// mode; and z's accuracy reports the result error relative to the
+// exact (not rounded) result.
+func (z *Float) Set(x *Float) *Float {
+	if debugFloat {
+		x.validate()
+	}
+	z.acc = Exact
+	if z != x {
+		z.form = x.form
+		z.neg = x.neg
+		if x.form == finite {
+			z.exp = x.exp
+			z.mant = z.mant.set(x.mant)
+		}
+		if z.prec == 0 {
+			z.prec = x.prec
+		} else if z.prec < x.prec {
+			z.round(0)
+		}
+	}
+	return z
+}
+
+// Copy sets z to x, with the same precision, rounding mode, and
+// accuracy as x, and returns z. x is not changed even if z and
+// x are the same.
+func (z *Float) Copy(x *Float) *Float {
+	if debugFloat {
+		x.validate()
+	}
+	if z != x {
+		z.prec = x.prec
+		z.mode = x.mode
+		z.acc = x.acc
+		z.form = x.form
+		z.neg = x.neg
+		if z.form == finite {
+			z.mant = z.mant.set(x.mant)
+			z.exp = x.exp
+		}
+	}
+	return z
+}
+
+// msb32 returns the 32 most significant bits of x.
+func msb32(x nat) uint32 {
+	i := len(x) - 1
+	if i < 0 {
+		return 0
+	}
+	if debugFloat && x[i]&(1<<(_W-1)) == 0 {
+		panic("x not normalized")
+	}
+	switch _W {
+	case 32:
+		return uint32(x[i])
+	case 64:
+		return uint32(x[i] >> 32)
+	}
+	panic("unreachable")
+}
+
+// msb64 returns the 64 most significant bits of x.
+func msb64(x nat) uint64 {
+	i := len(x) - 1
+	if i < 0 {
+		return 0
+	}
+	if debugFloat && x[i]&(1<<(_W-1)) == 0 {
+		panic("x not normalized")
+	}
+	switch _W {
+	case 32:
+		v := uint64(x[i]) << 32
+		if i > 0 {
+			v |= uint64(x[i-1])
+		}
+		return v
+	case 64:
+		return uint64(x[i])
+	}
+	panic("unreachable")
+}
+
+// Uint64 returns the unsigned integer resulting from truncating x
+// towards zero. If 0 <= x <= math.MaxUint64, the result is Exact
+// if x is an integer and Below otherwise.
+// The result is (0, Above) for x < 0, and (math.MaxUint64, Below)
+// for x > math.MaxUint64.
+func (x *Float) Uint64() (uint64, Accuracy) {
+	if debugFloat {
+		x.validate()
+	}
+
+	switch x.form {
+	case finite:
+		if x.neg {
+			return 0, Above
+		}
+		// 0 < x < +Inf
+		if x.exp <= 0 {
+			// 0 < x < 1
+			return 0, Below
+		}
+		// 1 <= x < Inf
+		if x.exp <= 64 {
+			// u = trunc(x) fits into a uint64
+			u := msb64(x.mant) >> (64 - uint32(x.exp))
+			if x.MinPrec() <= 64 {
+				return u, Exact
+			}
+			return u, Below // x truncated
+		}
+		// x too large
+		return math.MaxUint64, Below
+
+	case zero:
+		return 0, Exact
+
+	case inf:
+		if x.neg {
+			return 0, Above
+		}
+		return math.MaxUint64, Below
+	}
+
+	panic("unreachable")
+}
+
+// Int64 returns the integer resulting from truncating x towards zero.
+// If math.MinInt64 <= x <= math.MaxInt64, the result is Exact if x is
+// an integer, and Above (x < 0) or Below (x > 0) otherwise.
+// The result is (math.MinInt64, Above) for x < math.MinInt64,
+// and (math.MaxInt64, Below) for x > math.MaxInt64.
+func (x *Float) Int64() (int64, Accuracy) {
+	if debugFloat {
+		x.validate()
+	}
+
+	switch x.form {
+	case finite:
+		// 0 < |x| < +Inf
+		acc := makeAcc(x.neg)
+		if x.exp <= 0 {
+			// 0 < |x| < 1
+			return 0, acc
+		}
+		// x.exp > 0
+
+		// 1 <= |x| < +Inf
+		if x.exp <= 63 {
+			// i = trunc(x) fits into an int64 (excluding math.MinInt64)
+			i := int64(msb64(x.mant) >> (64 - uint32(x.exp)))
+			if x.neg {
+				i = -i
+			}
+			if x.MinPrec() <= uint(x.exp) {
+				return i, Exact
+			}
+			return i, acc // x truncated
+		}
+		if x.neg {
+			// check for special case x == math.MinInt64 (i.e., x == -(0.5 << 64))
+			if x.exp == 64 && x.MinPrec() == 1 {
+				acc = Exact
+			}
+			return math.MinInt64, acc
+		}
+		// x too large
+		return math.MaxInt64, Below
+
+	case zero:
+		return 0, Exact
+
+	case inf:
+		if x.neg {
+			return math.MinInt64, Above
+		}
+		return math.MaxInt64, Below
+	}
+
+	panic("unreachable")
+}
+
+// Float32 returns the float32 value nearest to x. If x is too small to be
+// represented by a float32 (|x| < math.SmallestNonzeroFloat32), the result
+// is (0, Below) or (-0, Above), respectively, depending on the sign of x.
+// If x is too large to be represented by a float32 (|x| > math.MaxFloat32),
+// the result is (+Inf, Above) or (-Inf, Below), depending on the sign of x.
+func (x *Float) Float32() (float32, Accuracy) {
+	if debugFloat {
+		x.validate()
+	}
+
+	switch x.form {
+	case finite:
+		// 0 < |x| < +Inf
+
+		const (
+			fbits = 32                //        float size
+			mbits = 23                //        mantissa size (excluding implicit msb)
+			ebits = fbits - mbits - 1 //     8  exponent size
+			bias  = 1<<(ebits-1) - 1  //   127  exponent bias
+			dmin  = 1 - bias - mbits  //  -149  smallest unbiased exponent (denormal)
+			emin  = 1 - bias          //  -126  smallest unbiased exponent (normal)
+			emax  = bias              //   127  largest unbiased exponent (normal)
+		)
+
+		// Float mantissa m is 0.5 <= m < 1.0; compute exponent e for float32 mantissa.
+		e := x.exp - 1 // exponent for normal mantissa m with 1.0 <= m < 2.0
+
+		// Compute precision p for float32 mantissa.
+		// If the exponent is too small, we have a denormal number before
+		// rounding and fewer than p mantissa bits of precision available
+		// (the exponent remains fixed but the mantissa gets shifted right).
+		p := mbits + 1 // precision of normal float
+		if e < emin {
+			// recompute precision
+			p = mbits + 1 - emin + int(e)
+			// If p == 0, the mantissa of x is shifted so much to the right
+			// that its msb falls immediately to the right of the float32
+			// mantissa space. In other words, if the smallest denormal is
+			// considered "1.0", for p == 0, the mantissa value m is >= 0.5.
+			// If m > 0.5, it is rounded up to 1.0; i.e., the smallest denormal.
+			// If m == 0.5, it is rounded down to even, i.e., 0.0.
+			// If p < 0, the mantissa value m is <= "0.25" which is never rounded up.
+			if p < 0 /* m <= 0.25 */ || p == 0 && x.mant.sticky(uint(len(x.mant))*_W-1) == 0 /* m == 0.5 */ {
+				// underflow to ±0
+				if x.neg {
+					var z float32
+					return -z, Above
+				}
+				return 0.0, Below
+			}
+			// otherwise, round up
+			// We handle p == 0 explicitly because it's easy and because
+			// Float.round doesn't support rounding to 0 bits of precision.
+			if p == 0 {
+				if x.neg {
+					return -math.SmallestNonzeroFloat32, Below
+				}
+				return math.SmallestNonzeroFloat32, Above
+			}
+		}
+		// p > 0
+
+		// round
+		var r Float
+		r.prec = uint32(p)
+		r.Set(x)
+		e = r.exp - 1
+
+		// Rounding may have caused r to overflow to ±Inf
+		// (rounding never causes underflows to 0).
+		// If the exponent is too large, also overflow to ±Inf.
+		if r.form == inf || e > emax {
+			// overflow
+			if x.neg {
+				return float32(math.Inf(-1)), Below
+			}
+			return float32(math.Inf(+1)), Above
+		}
+		// e <= emax
+
+		// Determine sign, biased exponent, and mantissa.
+		var sign, bexp, mant uint32
+		if x.neg {
+			sign = 1 << (fbits - 1)
+		}
+
+		// Rounding may have caused a denormal number to
+		// become normal. Check again.
+		if e < emin {
+			// denormal number: recompute precision
+			// Since rounding may have at best increased precision
+			// and we have eliminated p <= 0 early, we know p > 0.
+			// bexp == 0 for denormals
+			p = mbits + 1 - emin + int(e)
+			mant = msb32(r.mant) >> uint(fbits-p)
+		} else {
+			// normal number: emin <= e <= emax
+			bexp = uint32(e+bias) << mbits
+			mant = msb32(r.mant) >> ebits & (1<<mbits - 1) // cut off msb (implicit 1 bit)
+		}
+
+		return math.Float32frombits(sign | bexp | mant), r.acc
+
+	case zero:
+		if x.neg {
+			var z float32
+			return -z, Exact
+		}
+		return 0.0, Exact
+
+	case inf:
+		if x.neg {
+			return float32(math.Inf(-1)), Exact
+		}
+		return float32(math.Inf(+1)), Exact
+	}
+
+	panic("unreachable")
+}
+
+// Float64 returns the float64 value nearest to x. If x is too small to be
+// represented by a float64 (|x| < math.SmallestNonzeroFloat64), the result
+// is (0, Below) or (-0, Above), respectively, depending on the sign of x.
+// If x is too large to be represented by a float64 (|x| > math.MaxFloat64),
+// the result is (+Inf, Above) or (-Inf, Below), depending on the sign of x.
+func (x *Float) Float64() (float64, Accuracy) {
+	if debugFloat {
+		x.validate()
+	}
+
+	switch x.form {
+	case finite:
+		// 0 < |x| < +Inf
+
+		const (
+			fbits = 64                //        float size
+			mbits = 52                //        mantissa size (excluding implicit msb)
+			ebits = fbits - mbits - 1 //    11  exponent size
+			bias  = 1<<(ebits-1) - 1  //  1023  exponent bias
+			dmin  = 1 - bias - mbits  // -1074  smallest unbiased exponent (denormal)
+			emin  = 1 - bias          // -1022  smallest unbiased exponent (normal)
+			emax  = bias              //  1023  largest unbiased exponent (normal)
+		)
+
+		// Float mantissa m is 0.5 <= m < 1.0; compute exponent e for float64 mantissa.
+		e := x.exp - 1 // exponent for normal mantissa m with 1.0 <= m < 2.0
+
+		// Compute precision p for float64 mantissa.
+		// If the exponent is too small, we have a denormal number before
+		// rounding and fewer than p mantissa bits of precision available
+		// (the exponent remains fixed but the mantissa gets shifted right).
+		p := mbits + 1 // precision of normal float
+		if e < emin {
+			// recompute precision
+			p = mbits + 1 - emin + int(e)
+			// If p == 0, the mantissa of x is shifted so much to the right
+			// that its msb falls immediately to the right of the float64
+			// mantissa space. In other words, if the smallest denormal is
+			// considered "1.0", for p == 0, the mantissa value m is >= 0.5.
+			// If m > 0.5, it is rounded up to 1.0; i.e., the smallest denormal.
+			// If m == 0.5, it is rounded down to even, i.e., 0.0.
+			// If p < 0, the mantissa value m is <= "0.25" which is never rounded up.
+			if p < 0 /* m <= 0.25 */ || p == 0 && x.mant.sticky(uint(len(x.mant))*_W-1) == 0 /* m == 0.5 */ {
+				// underflow to ±0
+				if x.neg {
+					var z float64
+					return -z, Above
+				}
+				return 0.0, Below
+			}
+			// otherwise, round up
+			// We handle p == 0 explicitly because it's easy and because
+			// Float.round doesn't support rounding to 0 bits of precision.
+			if p == 0 {
+				if x.neg {
+					return -math.SmallestNonzeroFloat64, Below
+				}
+				return math.SmallestNonzeroFloat64, Above
+			}
+		}
+		// p > 0
+
+		// round
+		var r Float
+		r.prec = uint32(p)
+		r.Set(x)
+		e = r.exp - 1
+
+		// Rounding may have caused r to overflow to ±Inf
+		// (rounding never causes underflows to 0).
+		// If the exponent is too large, also overflow to ±Inf.
+		if r.form == inf || e > emax {
+			// overflow
+			if x.neg {
+				return math.Inf(-1), Below
+			}
+			return math.Inf(+1), Above
+		}
+		// e <= emax
+
+		// Determine sign, biased exponent, and mantissa.
+		var sign, bexp, mant uint64
+		if x.neg {
+			sign = 1 << (fbits - 1)
+		}
+
+		// Rounding may have caused a denormal number to
+		// become normal. Check again.
+		if e < emin {
+			// denormal number: recompute precision
+			// Since rounding may have at best increased precision
+			// and we have eliminated p <= 0 early, we know p > 0.
+			// bexp == 0 for denormals
+			p = mbits + 1 - emin + int(e)
+			mant = msb64(r.mant) >> uint(fbits-p)
+		} else {
+			// normal number: emin <= e <= emax
+			bexp = uint64(e+bias) << mbits
+			mant = msb64(r.mant) >> ebits & (1<<mbits - 1) // cut off msb (implicit 1 bit)
+		}
+
+		return math.Float64frombits(sign | bexp | mant), r.acc
+
+	case zero:
+		if x.neg {
+			var z float64
+			return -z, Exact
+		}
+		return 0.0, Exact
+
+	case inf:
+		if x.neg {
+			return math.Inf(-1), Exact
+		}
+		return math.Inf(+1), Exact
+	}
+
+	panic("unreachable")
+}
+
+// Int returns the result of truncating x towards zero;
+// or nil if x is an infinity.
+// The result is Exact if x.IsInt(); otherwise it is Below
+// for x > 0, and Above for x < 0.
+// If a non-nil *Int argument z is provided, Int stores
+// the result in z instead of allocating a new Int.
+func (x *Float) Int(z *Int) (*Int, Accuracy) {
+	if debugFloat {
+		x.validate()
+	}
+
+	if z == nil && x.form <= finite {
+		z = new(Int)
+	}
+
+	switch x.form {
+	case finite:
+		// 0 < |x| < +Inf
+		acc := makeAcc(x.neg)
+		if x.exp <= 0 {
+			// 0 < |x| < 1
+			return z.SetInt64(0), acc
+		}
+		// x.exp > 0
+
+		// 1 <= |x| < +Inf
+		// determine minimum required precision for x
+		allBits := uint(len(x.mant)) * _W
+		exp := uint(x.exp)
+		if x.MinPrec() <= exp {
+			acc = Exact
+		}
+		// shift mantissa as needed
+		if z == nil {
+			z = new(Int)
+		}
+		z.neg = x.neg
+		switch {
+		case exp > allBits:
+			z.abs = z.abs.shl(x.mant, exp-allBits)
+		default:
+			z.abs = z.abs.set(x.mant)
+		case exp < allBits:
+			z.abs = z.abs.shr(x.mant, allBits-exp)
+		}
+		return z, acc
+
+	case zero:
+		return z.SetInt64(0), Exact
+
+	case inf:
+		return nil, makeAcc(x.neg)
+	}
+
+	panic("unreachable")
+}
+
+// Rat returns the rational number corresponding to x;
+// or nil if x is an infinity.
+// The result is Exact if x is not an Inf.
+// If a non-nil *Rat argument z is provided, Rat stores
+// the result in z instead of allocating a new Rat.
+func (x *Float) Rat(z *Rat) (*Rat, Accuracy) {
+	if debugFloat {
+		x.validate()
+	}
+
+	if z == nil && x.form <= finite {
+		z = new(Rat)
+	}
+
+	switch x.form {
+	case finite:
+		// 0 < |x| < +Inf
+		allBits := int32(len(x.mant)) * _W
+		// build up numerator and denominator
+		z.a.neg = x.neg
+		switch {
+		case x.exp > allBits:
+			z.a.abs = z.a.abs.shl(x.mant, uint(x.exp-allBits))
+			z.b.abs = z.b.abs[:0] // == 1 (see Rat)
+			// z already in normal form
+		default:
+			z.a.abs = z.a.abs.set(x.mant)
+			z.b.abs = z.b.abs[:0] // == 1 (see Rat)
+			// z already in normal form
+		case x.exp < allBits:
+			z.a.abs = z.a.abs.set(x.mant)
+			t := z.b.abs.setUint64(1)
+			z.b.abs = t.shl(t, uint(allBits-x.exp))
+			z.norm()
+		}
+		return z, Exact
+
+	case zero:
+		return z.SetInt64(0), Exact
+
+	case inf:
+		return nil, makeAcc(x.neg)
+	}
+
+	panic("unreachable")
+}
+
+// Abs sets z to the (possibly rounded) value |x| (the absolute value of x)
+// and returns z.
+func (z *Float) Abs(x *Float) *Float {
+	z.Set(x)
+	z.neg = false
+	return z
+}
+
+// Neg sets z to the (possibly rounded) value of x with its sign negated,
+// and returns z.
+func (z *Float) Neg(x *Float) *Float {
+	z.Set(x)
+	z.neg = !z.neg
+	return z
+}
+
+func validateBinaryOperands(x, y *Float) {
+	if !debugFloat {
+		// avoid performance bugs
+		panic("validateBinaryOperands called but debugFloat is not set")
+	}
+	if len(x.mant) == 0 {
+		panic("empty mantissa for x")
+	}
+	if len(y.mant) == 0 {
+		panic("empty mantissa for y")
+	}
+}
+
+// z = x + y, ignoring signs of x and y for the addition
+// but using the sign of z for rounding the result.
+// x and y must have a non-empty mantissa and valid exponent.
+func (z *Float) uadd(x, y *Float) {
+	// Note: This implementation requires 2 shifts most of the
+	// time. It is also inefficient if exponents or precisions
+	// differ by wide margins. The following article describes
+	// an efficient (but much more complicated) implementation
+	// compatible with the internal representation used here:
+	//
+	// Vincent Lefèvre: "The Generic Multiple-Precision Floating-
+	// Point Addition With Exact Rounding (as in the MPFR Library)"
+	// http://www.vinc17.net/research/papers/rnc6.pdf
+
+	if debugFloat {
+		validateBinaryOperands(x, y)
+	}
+
+	// compute exponents ex, ey for mantissa with "binary point"
+	// on the right (mantissa.0) - use int64 to avoid overflow
+	ex := int64(x.exp) - int64(len(x.mant))*_W
+	ey := int64(y.exp) - int64(len(y.mant))*_W
+
+	al := alias(z.mant, x.mant) || alias(z.mant, y.mant)
+
+	// TODO(gri) having a combined add-and-shift primitive
+	//           could make this code significantly faster
+	switch {
+	case ex < ey:
+		if al {
+			t := nat(nil).shl(y.mant, uint(ey-ex))
+			z.mant = z.mant.add(x.mant, t)
+		} else {
+			z.mant = z.mant.shl(y.mant, uint(ey-ex))
+			z.mant = z.mant.add(x.mant, z.mant)
+		}
+	default:
+		// ex == ey, no shift needed
+		z.mant = z.mant.add(x.mant, y.mant)
+	case ex > ey:
+		if al {
+			t := nat(nil).shl(x.mant, uint(ex-ey))
+			z.mant = z.mant.add(t, y.mant)
+		} else {
+			z.mant = z.mant.shl(x.mant, uint(ex-ey))
+			z.mant = z.mant.add(z.mant, y.mant)
+		}
+		ex = ey
+	}
+	// len(z.mant) > 0
+
+	z.setExpAndRound(ex+int64(len(z.mant))*_W-fnorm(z.mant), 0)
+}
+
+// z = x - y for |x| > |y|, ignoring signs of x and y for the subtraction
+// but using the sign of z for rounding the result.
+// x and y must have a non-empty mantissa and valid exponent.
+func (z *Float) usub(x, y *Float) {
+	// This code is symmetric to uadd.
+	// We have not factored the common code out because
+	// eventually uadd (and usub) should be optimized
+	// by special-casing, and the code will diverge.
+
+	if debugFloat {
+		validateBinaryOperands(x, y)
+	}
+
+	ex := int64(x.exp) - int64(len(x.mant))*_W
+	ey := int64(y.exp) - int64(len(y.mant))*_W
+
+	al := alias(z.mant, x.mant) || alias(z.mant, y.mant)
+
+	switch {
+	case ex < ey:
+		if al {
+			t := nat(nil).shl(y.mant, uint(ey-ex))
+			z.mant = t.sub(x.mant, t)
+		} else {
+			z.mant = z.mant.shl(y.mant, uint(ey-ex))
+			z.mant = z.mant.sub(x.mant, z.mant)
+		}
+	default:
+		// ex == ey, no shift needed
+		z.mant = z.mant.sub(x.mant, y.mant)
+	case ex > ey:
+		if al {
+			t := nat(nil).shl(x.mant, uint(ex-ey))
+			z.mant = t.sub(t, y.mant)
+		} else {
+			z.mant = z.mant.shl(x.mant, uint(ex-ey))
+			z.mant = z.mant.sub(z.mant, y.mant)
+		}
+		ex = ey
+	}
+
+	// operands may have canceled each other out
+	if len(z.mant) == 0 {
+		z.acc = Exact
+		z.form = zero
+		z.neg = false
+		return
+	}
+	// len(z.mant) > 0
+
+	z.setExpAndRound(ex+int64(len(z.mant))*_W-fnorm(z.mant), 0)
+}
+
+// z = x * y, ignoring signs of x and y for the multiplication
+// but using the sign of z for rounding the result.
+// x and y must have a non-empty mantissa and valid exponent.
+func (z *Float) umul(x, y *Float) {
+	if debugFloat {
+		validateBinaryOperands(x, y)
+	}
+
+	// Note: This is doing too much work if the precision
+	// of z is less than the sum of the precisions of x
+	// and y which is often the case (e.g., if all floats
+	// have the same precision).
+	// TODO(gri) Optimize this for the common case.
+
+	e := int64(x.exp) + int64(y.exp)
+	if x == y {
+		z.mant = z.mant.sqr(x.mant)
+	} else {
+		z.mant = z.mant.mul(x.mant, y.mant)
+	}
+	z.setExpAndRound(e-fnorm(z.mant), 0)
+}
+
+// z = x / y, ignoring signs of x and y for the division
+// but using the sign of z for rounding the result.
+// x and y must have a non-empty mantissa and valid exponent.
+func (z *Float) uquo(x, y *Float) {
+	if debugFloat {
+		validateBinaryOperands(x, y)
+	}
+
+	// mantissa length in words for desired result precision + 1
+	// (at least one extra bit so we get the rounding bit after
+	// the division)
+	n := int(z.prec/_W) + 1
+
+	// compute adjusted x.mant such that we get enough result precision
+	xadj := x.mant
+	if d := n - len(x.mant) + len(y.mant); d > 0 {
+		// d extra words needed => add d "0 digits" to x
+		xadj = make(nat, len(x.mant)+d)
+		copy(xadj[d:], x.mant)
+	}
+	// TODO(gri): If we have too many digits (d < 0), we should be able
+	// to shorten x for faster division. But we must be extra careful
+	// with rounding in that case.
+
+	// Compute d before division since there may be aliasing of x.mant
+	// (via xadj) or y.mant with z.mant.
+	d := len(xadj) - len(y.mant)
+
+	// divide
+	var r nat
+	z.mant, r = z.mant.div(nil, xadj, y.mant)
+	e := int64(x.exp) - int64(y.exp) - int64(d-len(z.mant))*_W
+
+	// The result is long enough to include (at least) the rounding bit.
+	// If there's a non-zero remainder, the corresponding fractional part
+	// (if it were computed), would have a non-zero sticky bit (if it were
+	// zero, it couldn't have a non-zero remainder).
+	var sbit uint
+	if len(r) > 0 {
+		sbit = 1
+	}
+
+	z.setExpAndRound(e-fnorm(z.mant), sbit)
+}
+
+// ucmp returns -1, 0, or +1, depending on whether
+// |x| < |y|, |x| == |y|, or |x| > |y|.
+// x and y must have a non-empty mantissa and valid exponent.
+func (x *Float) ucmp(y *Float) int {
+	if debugFloat {
+		validateBinaryOperands(x, y)
+	}
+
+	switch {
+	case x.exp < y.exp:
+		return -1
+	case x.exp > y.exp:
+		return +1
+	}
+	// x.exp == y.exp
+
+	// compare mantissas
+	i := len(x.mant)
+	j := len(y.mant)
+	for i > 0 || j > 0 {
+		var xm, ym Word
+		if i > 0 {
+			i--
+			xm = x.mant[i]
+		}
+		if j > 0 {
+			j--
+			ym = y.mant[j]
+		}
+		switch {
+		case xm < ym:
+			return -1
+		case xm > ym:
+			return +1
+		}
+	}
+
+	return 0
+}
+
+// Handling of sign bit as defined by IEEE 754-2008, section 6.3:
+//
+// When neither the inputs nor result are NaN, the sign of a product or
+// quotient is the exclusive OR of the operands’ signs; the sign of a sum,
+// or of a difference x−y regarded as a sum x+(−y), differs from at most
+// one of the addends’ signs; and the sign of the result of conversions,
+// the quantize operation, the roundToIntegral operations, and the
+// roundToIntegralExact (see 5.3.1) is the sign of the first or only operand.
+// These rules shall apply even when operands or results are zero or infinite.
+//
+// When the sum of two operands with opposite signs (or the difference of
+// two operands with like signs) is exactly zero, the sign of that sum (or
+// difference) shall be +0 in all rounding-direction attributes except
+// roundTowardNegative; under that attribute, the sign of an exact zero
+// sum (or difference) shall be −0. However, x+x = x−(−x) retains the same
+// sign as x even when x is zero.
+//
+// See also: https://play.golang.org/p/RtH3UCt5IH
+
+// Add sets z to the rounded sum x+y and returns z. If z's precision is 0,
+// it is changed to the larger of x's or y's precision before the operation.
+// Rounding is performed according to z's precision and rounding mode; and
+// z's accuracy reports the result error relative to the exact (not rounded)
+// result. Add panics with ErrNaN if x and y are infinities with opposite
+// signs. The value of z is undefined in that case.
+func (z *Float) Add(x, y *Float) *Float {
+	if debugFloat {
+		x.validate()
+		y.validate()
+	}
+
+	if z.prec == 0 {
+		z.prec = umax32(x.prec, y.prec)
+	}
+
+	if x.form == finite && y.form == finite {
+		// x + y (common case)
+
+		// Below we set z.neg = x.neg, and when z aliases y this will
+		// change the y operand's sign. This is fine, because if an
+		// operand aliases the receiver it'll be overwritten, but we still
+		// want the original x.neg and y.neg values when we evaluate
+		// x.neg != y.neg, so we need to save y.neg before setting z.neg.
+		yneg := y.neg
+
+		z.neg = x.neg
+		if x.neg == yneg {
+			// x + y == x + y
+			// (-x) + (-y) == -(x + y)
+			z.uadd(x, y)
+		} else {
+			// x + (-y) == x - y == -(y - x)
+			// (-x) + y == y - x == -(x - y)
+			if x.ucmp(y) > 0 {
+				z.usub(x, y)
+			} else {
+				z.neg = !z.neg
+				z.usub(y, x)
+			}
+		}
+		if z.form == zero && z.mode == ToNegativeInf && z.acc == Exact {
+			z.neg = true
+		}
+		return z
+	}
+
+	if x.form == inf && y.form == inf && x.neg != y.neg {
+		// +Inf + -Inf
+		// -Inf + +Inf
+		// value of z is undefined but make sure it's valid
+		z.acc = Exact
+		z.form = zero
+		z.neg = false
+		panic(ErrNaN{"addition of infinities with opposite signs"})
+	}
+
+	if x.form == zero && y.form == zero {
+		// ±0 + ±0
+		z.acc = Exact
+		z.form = zero
+		z.neg = x.neg && y.neg // -0 + -0 == -0
+		return z
+	}
+
+	if x.form == inf || y.form == zero {
+		// ±Inf + y
+		// x + ±0
+		return z.Set(x)
+	}
+
+	// ±0 + y
+	// x + ±Inf
+	return z.Set(y)
+}
+
+// Sub sets z to the rounded difference x-y and returns z.
+// Precision, rounding, and accuracy reporting are as for Add.
+// Sub panics with ErrNaN if x and y are infinities with equal
+// signs. The value of z is undefined in that case.
+func (z *Float) Sub(x, y *Float) *Float {
+	if debugFloat {
+		x.validate()
+		y.validate()
+	}
+
+	if z.prec == 0 {
+		z.prec = umax32(x.prec, y.prec)
+	}
+
+	if x.form == finite && y.form == finite {
+		// x - y (common case)
+		yneg := y.neg
+		z.neg = x.neg
+		if x.neg != yneg {
+			// x - (-y) == x + y
+			// (-x) - y == -(x + y)
+			z.uadd(x, y)
+		} else {
+			// x - y == x - y == -(y - x)
+			// (-x) - (-y) == y - x == -(x - y)
+			if x.ucmp(y) > 0 {
+				z.usub(x, y)
+			} else {
+				z.neg = !z.neg
+				z.usub(y, x)
+			}
+		}
+		if z.form == zero && z.mode == ToNegativeInf && z.acc == Exact {
+			z.neg = true
+		}
+		return z
+	}
+
+	if x.form == inf && y.form == inf && x.neg == y.neg {
+		// +Inf - +Inf
+		// -Inf - -Inf
+		// value of z is undefined but make sure it's valid
+		z.acc = Exact
+		z.form = zero
+		z.neg = false
+		panic(ErrNaN{"subtraction of infinities with equal signs"})
+	}
+
+	if x.form == zero && y.form == zero {
+		// ±0 - ±0
+		z.acc = Exact
+		z.form = zero
+		z.neg = x.neg && !y.neg // -0 - +0 == -0
+		return z
+	}
+
+	if x.form == inf || y.form == zero {
+		// ±Inf - y
+		// x - ±0
+		return z.Set(x)
+	}
+
+	// ±0 - y
+	// x - ±Inf
+	return z.Neg(y)
+}
+
+// Mul sets z to the rounded product x*y and returns z.
+// Precision, rounding, and accuracy reporting are as for Add.
+// Mul panics with ErrNaN if one operand is zero and the other
+// operand an infinity. The value of z is undefined in that case.
+func (z *Float) Mul(x, y *Float) *Float {
+	if debugFloat {
+		x.validate()
+		y.validate()
+	}
+
+	if z.prec == 0 {
+		z.prec = umax32(x.prec, y.prec)
+	}
+
+	z.neg = x.neg != y.neg
+
+	if x.form == finite && y.form == finite {
+		// x * y (common case)
+		z.umul(x, y)
+		return z
+	}
+
+	z.acc = Exact
+	if x.form == zero && y.form == inf || x.form == inf && y.form == zero {
+		// ±0 * ±Inf
+		// ±Inf * ±0
+		// value of z is undefined but make sure it's valid
+		z.form = zero
+		z.neg = false
+		panic(ErrNaN{"multiplication of zero with infinity"})
+	}
+
+	if x.form == inf || y.form == inf {
+		// ±Inf * y
+		// x * ±Inf
+		z.form = inf
+		return z
+	}
+
+	// ±0 * y
+	// x * ±0
+	z.form = zero
+	return z
+}
+
+// Quo sets z to the rounded quotient x/y and returns z.
+// Precision, rounding, and accuracy reporting are as for Add.
+// Quo panics with ErrNaN if both operands are zero or infinities.
+// The value of z is undefined in that case.
+func (z *Float) Quo(x, y *Float) *Float {
+	if debugFloat {
+		x.validate()
+		y.validate()
+	}
+
+	if z.prec == 0 {
+		z.prec = umax32(x.prec, y.prec)
+	}
+
+	z.neg = x.neg != y.neg
+
+	if x.form == finite && y.form == finite {
+		// x / y (common case)
+		z.uquo(x, y)
+		return z
+	}
+
+	z.acc = Exact
+	if x.form == zero && y.form == zero || x.form == inf && y.form == inf {
+		// ±0 / ±0
+		// ±Inf / ±Inf
+		// value of z is undefined but make sure it's valid
+		z.form = zero
+		z.neg = false
+		panic(ErrNaN{"division of zero by zero or infinity by infinity"})
+	}
+
+	if x.form == zero || y.form == inf {
+		// ±0 / y
+		// x / ±Inf
+		z.form = zero
+		return z
+	}
+
+	// x / ±0
+	// ±Inf / y
+	z.form = inf
+	return z
+}
+
+// Cmp compares x and y and returns:
+//
+//	-1 if x <  y
+//	 0 if x == y (incl. -0 == 0, -Inf == -Inf, and +Inf == +Inf)
+//	+1 if x >  y
+func (x *Float) Cmp(y *Float) int {
+	if debugFloat {
+		x.validate()
+		y.validate()
+	}
+
+	mx := x.ord()
+	my := y.ord()
+	switch {
+	case mx < my:
+		return -1
+	case mx > my:
+		return +1
+	}
+	// mx == my
+
+	// only if |mx| == 1 we have to compare the mantissae
+	switch mx {
+	case -1:
+		return y.ucmp(x)
+	case +1:
+		return x.ucmp(y)
+	}
+
+	return 0
+}
+
+// ord classifies x and returns:
+//
+//	-2 if -Inf == x
+//	-1 if -Inf < x < 0
+//	 0 if x == 0 (signed or unsigned)
+//	+1 if 0 < x < +Inf
+//	+2 if x == +Inf
+func (x *Float) ord() int {
+	var m int
+	switch x.form {
+	case finite:
+		m = 1
+	case zero:
+		return 0
+	case inf:
+		m = 2
+	}
+	if x.neg {
+		m = -m
+	}
+	return m
+}
+
+func umax32(x, y uint32) uint32 {
+	if x > y {
+		return x
+	}
+	return y
+}
diff --git a/src/math/big/float_test.go b/src/math/big/float_test.go
new file mode 100644
index 0000000..7d6bf03
--- /dev/null
+++ b/src/math/big/float_test.go
@@ -0,0 +1,1858 @@
+// Copyright 2014 The Go Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+package big
+
+import (
+	"flag"
+	"fmt"
+	"math"
+	"strconv"
+	"strings"
+	"testing"
+)
+
+// Verify that ErrNaN implements the error interface.
+var _ error = ErrNaN{}
+
+func (x *Float) uint64() uint64 {
+	u, acc := x.Uint64()
+	if acc != Exact {
+		panic(fmt.Sprintf("%s is not a uint64", x.Text('g', 10)))
+	}
+	return u
+}
+
+func (x *Float) int64() int64 {
+	i, acc := x.Int64()
+	if acc != Exact {
+		panic(fmt.Sprintf("%s is not an int64", x.Text('g', 10)))
+	}
+	return i
+}
+
+func TestFloatZeroValue(t *testing.T) {
+	// zero (uninitialized) value is a ready-to-use 0.0
+	var x Float
+	if s := x.Text('f', 1); s != "0.0" {
+		t.Errorf("zero value = %s; want 0.0", s)
+	}
+
+	// zero value has precision 0
+	if prec := x.Prec(); prec != 0 {
+		t.Errorf("prec = %d; want 0", prec)
+	}
+
+	// zero value can be used in any and all positions of binary operations
+	make := func(x int) *Float {
+		var f Float
+		if x != 0 {
+			f.SetInt64(int64(x))
+		}
+		// x == 0 translates into the zero value
+		return &f
+	}
+	for _, test := range []struct {
+		z, x, y, want int
+		opname        rune
+		op            func(z, x, y *Float) *Float
+	}{
+		{0, 0, 0, 0, '+', (*Float).Add},
+		{0, 1, 2, 3, '+', (*Float).Add},
+		{1, 2, 0, 2, '+', (*Float).Add},
+		{2, 0, 1, 1, '+', (*Float).Add},
+
+		{0, 0, 0, 0, '-', (*Float).Sub},
+		{0, 1, 2, -1, '-', (*Float).Sub},
+		{1, 2, 0, 2, '-', (*Float).Sub},
+		{2, 0, 1, -1, '-', (*Float).Sub},
+
+		{0, 0, 0, 0, '*', (*Float).Mul},
+		{0, 1, 2, 2, '*', (*Float).Mul},
+		{1, 2, 0, 0, '*', (*Float).Mul},
+		{2, 0, 1, 0, '*', (*Float).Mul},
+
+		// {0, 0, 0, 0, '/', (*Float).Quo}, // panics
+		{0, 2, 1, 2, '/', (*Float).Quo},
+		{1, 2, 0, 0, '/', (*Float).Quo}, // = +Inf
+		{2, 0, 1, 0, '/', (*Float).Quo},
+	} {
+		z := make(test.z)
+		test.op(z, make(test.x), make(test.y))
+		got := 0
+		if !z.IsInf() {
+			got = int(z.int64())
+		}
+		if got != test.want {
+			t.Errorf("%d %c %d = %d; want %d", test.x, test.opname, test.y, got, test.want)
+		}
+	}
+
+	// TODO(gri) test how precision is set for zero value results
+}
+
+func makeFloat(s string) *Float {
+	x, _, err := ParseFloat(s, 0, 1000, ToNearestEven)
+	if err != nil {
+		panic(err)
+	}
+	return x
+}
+
+func TestFloatSetPrec(t *testing.T) {
+	for _, test := range []struct {
+		x    string
+		prec uint
+		want string
+		acc  Accuracy
+	}{
+		// prec 0
+		{"0", 0, "0", Exact},
+		{"-0", 0, "-0", Exact},
+		{"-Inf", 0, "-Inf", Exact},
+		{"+Inf", 0, "+Inf", Exact},
+		{"123", 0, "0", Below},
+		{"-123", 0, "-0", Above},
+
+		// prec at upper limit
+		{"0", MaxPrec, "0", Exact},
+		{"-0", MaxPrec, "-0", Exact},
+		{"-Inf", MaxPrec, "-Inf", Exact},
+		{"+Inf", MaxPrec, "+Inf", Exact},
+
+		// just a few regular cases - general rounding is tested elsewhere
+		{"1.5", 1, "2", Above},
+		{"-1.5", 1, "-2", Below},
+		{"123", 1e6, "123", Exact},
+		{"-123", 1e6, "-123", Exact},
+	} {
+		x := makeFloat(test.x).SetPrec(test.prec)
+		prec := test.prec
+		if prec > MaxPrec {
+			prec = MaxPrec
+		}
+		if got := x.Prec(); got != prec {
+			t.Errorf("%s.SetPrec(%d).Prec() == %d; want %d", test.x, test.prec, got, prec)
+		}
+		if got, acc := x.String(), x.Acc(); got != test.want || acc != test.acc {
+			t.Errorf("%s.SetPrec(%d) = %s (%s); want %s (%s)", test.x, test.prec, got, acc, test.want, test.acc)
+		}
+	}
+}
+
+func TestFloatMinPrec(t *testing.T) {
+	const max = 100
+	for _, test := range []struct {
+		x    string
+		want uint
+	}{
+		{"0", 0},
+		{"-0", 0},
+		{"+Inf", 0},
+		{"-Inf", 0},
+		{"1", 1},
+		{"2", 1},
+		{"3", 2},
+		{"0x8001", 16},
+		{"0x8001p-1000", 16},
+		{"0x8001p+1000", 16},
+		{"0.1", max},
+	} {
+		x := makeFloat(test.x).SetPrec(max)
+		if got := x.MinPrec(); got != test.want {
+			t.Errorf("%s.MinPrec() = %d; want %d", test.x, got, test.want)
+		}
+	}
+}
+
+func TestFloatSign(t *testing.T) {
+	for _, test := range []struct {
+		x string
+		s int
+	}{
+		{"-Inf", -1},
+		{"-1", -1},
+		{"-0", 0},
+		{"+0", 0},
+		{"+1", +1},
+		{"+Inf", +1},
+	} {
+		x := makeFloat(test.x)
+		s := x.Sign()
+		if s != test.s {
+			t.Errorf("%s.Sign() = %d; want %d", test.x, s, test.s)
+		}
+	}
+}
+
+// alike(x, y) is like x.Cmp(y) == 0 but also considers the sign of 0 (0 != -0).
+func alike(x, y *Float) bool {
+	return x.Cmp(y) == 0 && x.Signbit() == y.Signbit()
+}
+
+func alike32(x, y float32) bool {
+	// we can ignore NaNs
+	return x == y && math.Signbit(float64(x)) == math.Signbit(float64(y))
+
+}
+
+func alike64(x, y float64) bool {
+	// we can ignore NaNs
+	return x == y && math.Signbit(x) == math.Signbit(y)
+
+}
+
+func TestFloatMantExp(t *testing.T) {
+	for _, test := range []struct {
+		x    string
+		mant string
+		exp  int
+	}{
+		{"0", "0", 0},
+		{"+0", "0", 0},
+		{"-0", "-0", 0},
+		{"Inf", "+Inf", 0},
+		{"+Inf", "+Inf", 0},
+		{"-Inf", "-Inf", 0},
+		{"1.5", "0.75", 1},
+		{"1.024e3", "0.5", 11},
+		{"-0.125", "-0.5", -2},
+	} {
+		x := makeFloat(test.x)
+		mant := makeFloat(test.mant)
+		m := new(Float)
+		e := x.MantExp(m)
+		if !alike(m, mant) || e != test.exp {
+			t.Errorf("%s.MantExp() = %s, %d; want %s, %d", test.x, m.Text('g', 10), e, test.mant, test.exp)
+		}
+	}
+}
+
+func TestFloatMantExpAliasing(t *testing.T) {
+	x := makeFloat("0.5p10")
+	if e := x.MantExp(x); e != 10 {
+		t.Fatalf("Float.MantExp aliasing error: got %d; want 10", e)
+	}
+	if want := makeFloat("0.5"); !alike(x, want) {
+		t.Fatalf("Float.MantExp aliasing error: got %s; want %s", x.Text('g', 10), want.Text('g', 10))
+	}
+}
+
+func TestFloatSetMantExp(t *testing.T) {
+	for _, test := range []struct {
+		frac string
+		exp  int
+		z    string
+	}{
+		{"0", 0, "0"},
+		{"+0", 0, "0"},
+		{"-0", 0, "-0"},
+		{"Inf", 1234, "+Inf"},
+		{"+Inf", -1234, "+Inf"},
+		{"-Inf", -1234, "-Inf"},
+		{"0", MinExp, "0"},
+		{"0.25", MinExp, "+0"},    // exponent underflow
+		{"-0.25", MinExp, "-0"},   // exponent underflow
+		{"1", MaxExp, "+Inf"},     // exponent overflow
+		{"2", MaxExp - 1, "+Inf"}, // exponent overflow
+		{"0.75", 1, "1.5"},
+		{"0.5", 11, "1024"},
+		{"-0.5", -2, "-0.125"},
+		{"32", 5, "1024"},
+		{"1024", -10, "1"},
+	} {
+		frac := makeFloat(test.frac)
+		want := makeFloat(test.z)
+		var z Float
+		z.SetMantExp(frac, test.exp)
+		if !alike(&z, want) {
+			t.Errorf("SetMantExp(%s, %d) = %s; want %s", test.frac, test.exp, z.Text('g', 10), test.z)
+		}
+		// test inverse property
+		mant := new(Float)
+		if z.SetMantExp(mant, want.MantExp(mant)).Cmp(want) != 0 {
+			t.Errorf("Inverse property not satisfied: got %s; want %s", z.Text('g', 10), test.z)
+		}
+	}
+}
+
+func TestFloatPredicates(t *testing.T) {
+	for _, test := range []struct {
+		x            string
+		sign         int
+		signbit, inf bool
+	}{
+		{x: "-Inf", sign: -1, signbit: true, inf: true},
+		{x: "-1", sign: -1, signbit: true},
+		{x: "-0", signbit: true},
+		{x: "0"},
+		{x: "1", sign: 1},
+		{x: "+Inf", sign: 1, inf: true},
+	} {
+		x := makeFloat(test.x)
+		if got := x.Signbit(); got != test.signbit {
+			t.Errorf("(%s).Signbit() = %v; want %v", test.x, got, test.signbit)
+		}
+		if got := x.Sign(); got != test.sign {
+			t.Errorf("(%s).Sign() = %d; want %d", test.x, got, test.sign)
+		}
+		if got := x.IsInf(); got != test.inf {
+			t.Errorf("(%s).IsInf() = %v; want %v", test.x, got, test.inf)
+		}
+	}
+}
+
+func TestFloatIsInt(t *testing.T) {
+	for _, test := range []string{
+		"0 int",
+		"-0 int",
+		"1 int",
+		"-1 int",
+		"0.5",
+		"1.23",
+		"1.23e1",
+		"1.23e2 int",
+		"0.000000001e+8",
+		"0.000000001e+9 int",
+		"1.2345e200 int",
+		"Inf",
+		"+Inf",
+		"-Inf",
+	} {
+		s := strings.TrimSuffix(test, " int")
+		want := s != test
+		if got := makeFloat(s).IsInt(); got != want {
+			t.Errorf("%s.IsInt() == %t", s, got)
+		}
+	}
+}
+
+func fromBinary(s string) int64 {
+	x, err := strconv.ParseInt(s, 2, 64)
+	if err != nil {
+		panic(err)
+	}
+	return x
+}
+
+func toBinary(x int64) string {
+	return strconv.FormatInt(x, 2)
+}
+
+func testFloatRound(t *testing.T, x, r int64, prec uint, mode RoundingMode) {
+	// verify test data
+	var ok bool
+	switch mode {
+	case ToNearestEven, ToNearestAway:
+		ok = true // nothing to do for now
+	case ToZero:
+		if x < 0 {
+			ok = r >= x
+		} else {
+			ok = r <= x
+		}
+	case AwayFromZero:
+		if x < 0 {
+			ok = r <= x
+		} else {
+			ok = r >= x
+		}
+	case ToNegativeInf:
+		ok = r <= x
+	case ToPositiveInf:
+		ok = r >= x
+	default:
+		panic("unreachable")
+	}
+	if !ok {
+		t.Fatalf("incorrect test data for prec = %d, %s: x = %s, r = %s", prec, mode, toBinary(x), toBinary(r))
+	}
+
+	// compute expected accuracy
+	a := Exact
+	switch {
+	case r < x:
+		a = Below
+	case r > x:
+		a = Above
+	}
+
+	// round
+	f := new(Float).SetMode(mode).SetInt64(x).SetPrec(prec)
+
+	// check result
+	r1 := f.int64()
+	p1 := f.Prec()
+	a1 := f.Acc()
+	if r1 != r || p1 != prec || a1 != a {
+		t.Errorf("round %s (%d bits, %s) incorrect: got %s (%d bits, %s); want %s (%d bits, %s)",
+			toBinary(x), prec, mode,
+			toBinary(r1), p1, a1,
+			toBinary(r), prec, a)
+		return
+	}
+
+	// g and f should be the same
+	// (rounding by SetPrec after SetInt64 using default precision
+	// should be the same as rounding by SetInt64 after setting the
+	// precision)
+	g := new(Float).SetMode(mode).SetPrec(prec).SetInt64(x)
+	if !alike(g, f) {
+		t.Errorf("round %s (%d bits, %s) not symmetric: got %s and %s; want %s",
+			toBinary(x), prec, mode,
+			toBinary(g.int64()),
+			toBinary(r1),
+			toBinary(r),
+		)
+		return
+	}
+
+	// h and f should be the same
+	// (repeated rounding should be idempotent)
+	h := new(Float).SetMode(mode).SetPrec(prec).Set(f)
+	if !alike(h, f) {
+		t.Errorf("round %s (%d bits, %s) not idempotent: got %s and %s; want %s",
+			toBinary(x), prec, mode,
+			toBinary(h.int64()),
+			toBinary(r1),
+			toBinary(r),
+		)
+		return
+	}
+}
+
+// TestFloatRound tests basic rounding.
+func TestFloatRound(t *testing.T) {
+	for _, test := range []struct {
+		prec                        uint
+		x, zero, neven, naway, away string // input, results rounded to prec bits
+	}{
+		{5, "1000", "1000", "1000", "1000", "1000"},
+		{5, "1001", "1001", "1001", "1001", "1001"},
+		{5, "1010", "1010", "1010", "1010", "1010"},
+		{5, "1011", "1011", "1011", "1011", "1011"},
+		{5, "1100", "1100", "1100", "1100", "1100"},
+		{5, "1101", "1101", "1101", "1101", "1101"},
+		{5, "1110", "1110", "1110", "1110", "1110"},
+		{5, "1111", "1111", "1111", "1111", "1111"},
+
+		{4, "1000", "1000", "1000", "1000", "1000"},
+		{4, "1001", "1001", "1001", "1001", "1001"},
+		{4, "1010", "1010", "1010", "1010", "1010"},
+		{4, "1011", "1011", "1011", "1011", "1011"},
+		{4, "1100", "1100", "1100", "1100", "1100"},
+		{4, "1101", "1101", "1101", "1101", "1101"},
+		{4, "1110", "1110", "1110", "1110", "1110"},
+		{4, "1111", "1111", "1111", "1111", "1111"},
+
+		{3, "1000", "1000", "1000", "1000", "1000"},
+		{3, "1001", "1000", "1000", "1010", "1010"},
+		{3, "1010", "1010", "1010", "1010", "1010"},
+		{3, "1011", "1010", "1100", "1100", "1100"},
+		{3, "1100", "1100", "1100", "1100", "1100"},
+		{3, "1101", "1100", "1100", "1110", "1110"},
+		{3, "1110", "1110", "1110", "1110", "1110"},
+		{3, "1111", "1110", "10000", "10000", "10000"},
+
+		{3, "1000001", "1000000", "1000000", "1000000", "1010000"},
+		{3, "1001001", "1000000", "1010000", "1010000", "1010000"},
+		{3, "1010001", "1010000", "1010000", "1010000", "1100000"},
+		{3, "1011001", "1010000", "1100000", "1100000", "1100000"},
+		{3, "1100001", "1100000", "1100000", "1100000", "1110000"},
+		{3, "1101001", "1100000", "1110000", "1110000", "1110000"},
+		{3, "1110001", "1110000", "1110000", "1110000", "10000000"},
+		{3, "1111001", "1110000", "10000000", "10000000", "10000000"},
+
+		{2, "1000", "1000", "1000", "1000", "1000"},
+		{2, "1001", "1000", "1000", "1000", "1100"},
+		{2, "1010", "1000", "1000", "1100", "1100"},
+		{2, "1011", "1000", "1100", "1100", "1100"},
+		{2, "1100", "1100", "1100", "1100", "1100"},
+		{2, "1101", "1100", "1100", "1100", "10000"},
+		{2, "1110", "1100", "10000", "10000", "10000"},
+		{2, "1111", "1100", "10000", "10000", "10000"},
+
+		{2, "1000001", "1000000", "1000000", "1000000", "1100000"},
+		{2, "1001001", "1000000", "1000000", "1000000", "1100000"},
+		{2, "1010001", "1000000", "1100000", "1100000", "1100000"},
+		{2, "1011001", "1000000", "1100000", "1100000", "1100000"},
+		{2, "1100001", "1100000", "1100000", "1100000", "10000000"},
+		{2, "1101001", "1100000", "1100000", "1100000", "10000000"},
+		{2, "1110001", "1100000", "10000000", "10000000", "10000000"},
+		{2, "1111001", "1100000", "10000000", "10000000", "10000000"},
+
+		{1, "1000", "1000", "1000", "1000", "1000"},
+		{1, "1001", "1000", "1000", "1000", "10000"},
+		{1, "1010", "1000", "1000", "1000", "10000"},
+		{1, "1011", "1000", "1000", "1000", "10000"},
+		{1, "1100", "1000", "10000", "10000", "10000"},
+		{1, "1101", "1000", "10000", "10000", "10000"},
+		{1, "1110", "1000", "10000", "10000", "10000"},
+		{1, "1111", "1000", "10000", "10000", "10000"},
+
+		{1, "1000001", "1000000", "1000000", "1000000", "10000000"},
+		{1, "1001001", "1000000", "1000000", "1000000", "10000000"},
+		{1, "1010001", "1000000", "1000000", "1000000", "10000000"},
+		{1, "1011001", "1000000", "1000000", "1000000", "10000000"},
+		{1, "1100001", "1000000", "10000000", "10000000", "10000000"},
+		{1, "1101001", "1000000", "10000000", "10000000", "10000000"},
+		{1, "1110001", "1000000", "10000000", "10000000", "10000000"},
+		{1, "1111001", "1000000", "10000000", "10000000", "10000000"},
+	} {
+		x := fromBinary(test.x)
+		z := fromBinary(test.zero)
+		e := fromBinary(test.neven)
+		n := fromBinary(test.naway)
+		a := fromBinary(test.away)
+		prec := test.prec
+
+		testFloatRound(t, x, z, prec, ToZero)
+		testFloatRound(t, x, e, prec, ToNearestEven)
+		testFloatRound(t, x, n, prec, ToNearestAway)
+		testFloatRound(t, x, a, prec, AwayFromZero)
+
+		testFloatRound(t, x, z, prec, ToNegativeInf)
+		testFloatRound(t, x, a, prec, ToPositiveInf)
+
+		testFloatRound(t, -x, -a, prec, ToNegativeInf)
+		testFloatRound(t, -x, -z, prec, ToPositiveInf)
+	}
+}
+
+// TestFloatRound24 tests that rounding a float64 to 24 bits
+// matches IEEE-754 rounding to nearest when converting a
+// float64 to a float32 (excluding denormal numbers).
+func TestFloatRound24(t *testing.T) {
+	const x0 = 1<<26 - 0x10 // 11...110000 (26 bits)
+	for d := 0; d <= 0x10; d++ {
+		x := float64(x0 + d)
+		f := new(Float).SetPrec(24).SetFloat64(x)
+		got, _ := f.Float32()
+		want := float32(x)
+		if got != want {
+			t.Errorf("Round(%g, 24) = %g; want %g", x, got, want)
+		}
+	}
+}
+
+func TestFloatSetUint64(t *testing.T) {
+	for _, want := range []uint64{
+		0,
+		1,
+		2,
+		10,
+		100,
+		1<<32 - 1,
+		1 << 32,
+		1<<64 - 1,
+	} {
+		var f Float
+		f.SetUint64(want)
+		if got := f.uint64(); got != want {
+			t.Errorf("got %#x (%s); want %#x", got, f.Text('p', 0), want)
+		}
+	}
+
+	// test basic rounding behavior (exhaustive rounding testing is done elsewhere)
+	const x uint64 = 0x8765432187654321 // 64 bits needed
+	for prec := uint(1); prec <= 64; prec++ {
+		f := new(Float).SetPrec(prec).SetMode(ToZero).SetUint64(x)
+		got := f.uint64()
+		want := x &^ (1<<(64-prec) - 1) // cut off (round to zero) low 64-prec bits
+		if got != want {
+			t.Errorf("got %#x (%s); want %#x", got, f.Text('p', 0), want)
+		}
+	}
+}
+
+func TestFloatSetInt64(t *testing.T) {
+	for _, want := range []int64{
+		0,
+		1,
+		2,
+		10,
+		100,
+		1<<32 - 1,
+		1 << 32,
+		1<<63 - 1,
+	} {
+		for i := range [2]int{} {
+			if i&1 != 0 {
+				want = -want
+			}
+			var f Float
+			f.SetInt64(want)
+			if got := f.int64(); got != want {
+				t.Errorf("got %#x (%s); want %#x", got, f.Text('p', 0), want)
+			}
+		}
+	}
+
+	// test basic rounding behavior (exhaustive rounding testing is done elsewhere)
+	const x int64 = 0x7654321076543210 // 63 bits needed
+	for prec := uint(1); prec <= 63; prec++ {
+		f := new(Float).SetPrec(prec).SetMode(ToZero).SetInt64(x)
+		got := f.int64()
+		want := x &^ (1<<(63-prec) - 1) // cut off (round to zero) low 63-prec bits
+		if got != want {
+			t.Errorf("got %#x (%s); want %#x", got, f.Text('p', 0), want)
+		}
+	}
+}
+
+func TestFloatSetFloat64(t *testing.T) {
+	for _, want := range []float64{
+		0,
+		1,
+		2,
+		12345,
+		1e10,
+		1e100,
+		3.14159265e10,
+		2.718281828e-123,
+		1.0 / 3,
+		math.MaxFloat32,
+		math.MaxFloat64,
+		math.SmallestNonzeroFloat32,
+		math.SmallestNonzeroFloat64,
+		math.Inf(-1),
+		math.Inf(0),
+		-math.Inf(1),
+	} {
+		for i := range [2]int{} {
+			if i&1 != 0 {
+				want = -want
+			}
+			var f Float
+			f.SetFloat64(want)
+			if got, acc := f.Float64(); got != want || acc != Exact {
+				t.Errorf("got %g (%s, %s); want %g (Exact)", got, f.Text('p', 0), acc, want)
+			}
+		}
+	}
+
+	// test basic rounding behavior (exhaustive rounding testing is done elsewhere)
+	const x uint64 = 0x8765432143218 // 53 bits needed
+	for prec := uint(1); prec <= 52; prec++ {
+		f := new(Float).SetPrec(prec).SetMode(ToZero).SetFloat64(float64(x))
+		got, _ := f.Float64()
+		want := float64(x &^ (1<<(52-prec) - 1)) // cut off (round to zero) low 53-prec bits
+		if got != want {
+			t.Errorf("got %g (%s); want %g", got, f.Text('p', 0), want)
+		}
+	}
+
+	// test NaN
+	defer func() {
+		if p, ok := recover().(ErrNaN); !ok {
+			t.Errorf("got %v; want ErrNaN panic", p)
+		}
+	}()
+	var f Float
+	f.SetFloat64(math.NaN())
+	// should not reach here
+	t.Errorf("got %s; want ErrNaN panic", f.Text('p', 0))
+}
+
+func TestFloatSetInt(t *testing.T) {
+	for _, want := range []string{
+		"0",
+		"1",
+		"-1",
+		"1234567890",
+		"123456789012345678901234567890",
+		"123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890",
+	} {
+		var x Int
+		_, ok := x.SetString(want, 0)
+		if !ok {
+			t.Errorf("invalid integer %s", want)
+			continue
+		}
+		n := x.BitLen()
+
+		var f Float
+		f.SetInt(&x)
+
+		// check precision
+		if n < 64 {
+			n = 64
+		}
+		if prec := f.Prec(); prec != uint(n) {
+			t.Errorf("got prec = %d; want %d", prec, n)
+		}
+
+		// check value
+		got := f.Text('g', 100)
+		if got != want {
+			t.Errorf("got %s (%s); want %s", got, f.Text('p', 0), want)
+		}
+	}
+
+	// TODO(gri) test basic rounding behavior
+}
+
+func TestFloatSetRat(t *testing.T) {
+	for _, want := range []string{
+		"0",
+		"1",
+		"-1",
+		"1234567890",
+		"123456789012345678901234567890",
+		"123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890",
+		"1.2",
+		"3.14159265",
+		// TODO(gri) expand
+	} {
+		var x Rat
+		_, ok := x.SetString(want)
+		if !ok {
+			t.Errorf("invalid fraction %s", want)
+			continue
+		}
+		n := max(x.Num().BitLen(), x.Denom().BitLen())
+
+		var f1, f2 Float
+		f2.SetPrec(1000)
+		f1.SetRat(&x)
+		f2.SetRat(&x)
+
+		// check precision when set automatically
+		if n < 64 {
+			n = 64
+		}
+		if prec := f1.Prec(); prec != uint(n) {
+			t.Errorf("got prec = %d; want %d", prec, n)
+		}
+
+		got := f2.Text('g', 100)
+		if got != want {
+			t.Errorf("got %s (%s); want %s", got, f2.Text('p', 0), want)
+		}
+	}
+}
+
+func TestFloatSetInf(t *testing.T) {
+	var f Float
+	for _, test := range []struct {
+		signbit bool
+		prec    uint
+		want    string
+	}{
+		{false, 0, "+Inf"},
+		{true, 0, "-Inf"},
+		{false, 10, "+Inf"},
+		{true, 30, "-Inf"},
+	} {
+		x := f.SetPrec(test.prec).SetInf(test.signbit)
+		if got := x.String(); got != test.want || x.Prec() != test.prec {
+			t.Errorf("SetInf(%v) = %s (prec = %d); want %s (prec = %d)", test.signbit, got, x.Prec(), test.want, test.prec)
+		}
+	}
+}
+
+func TestFloatUint64(t *testing.T) {
+	for _, test := range []struct {
+		x   string
+		out uint64
+		acc Accuracy
+	}{
+		{"-Inf", 0, Above},
+		{"-1", 0, Above},
+		{"-1e-1000", 0, Above},
+		{"-0", 0, Exact},
+		{"0", 0, Exact},
+		{"1e-1000", 0, Below},
+		{"1", 1, Exact},
+		{"1.000000000000000000001", 1, Below},
+		{"12345.0", 12345, Exact},
+		{"12345.000000000000000000001", 12345, Below},
+		{"18446744073709551615", 18446744073709551615, Exact},
+		{"18446744073709551615.000000000000000000001", math.MaxUint64, Below},
+		{"18446744073709551616", math.MaxUint64, Below},
+		{"1e10000", math.MaxUint64, Below},
+		{"+Inf", math.MaxUint64, Below},
+	} {
+		x := makeFloat(test.x)
+		out, acc := x.Uint64()
+		if out != test.out || acc != test.acc {
+			t.Errorf("%s: got %d (%s); want %d (%s)", test.x, out, acc, test.out, test.acc)
+		}
+	}
+}
+
+func TestFloatInt64(t *testing.T) {
+	for _, test := range []struct {
+		x   string
+		out int64
+		acc Accuracy
+	}{
+		{"-Inf", math.MinInt64, Above},
+		{"-1e10000", math.MinInt64, Above},
+		{"-9223372036854775809", math.MinInt64, Above},
+		{"-9223372036854775808.000000000000000000001", math.MinInt64, Above},
+		{"-9223372036854775808", -9223372036854775808, Exact},
+		{"-9223372036854775807.000000000000000000001", -9223372036854775807, Above},
+		{"-9223372036854775807", -9223372036854775807, Exact},
+		{"-12345.000000000000000000001", -12345, Above},
+		{"-12345.0", -12345, Exact},
+		{"-1.000000000000000000001", -1, Above},
+		{"-1.5", -1, Above},
+		{"-1", -1, Exact},
+		{"-1e-1000", 0, Above},
+		{"0", 0, Exact},
+		{"1e-1000", 0, Below},
+		{"1", 1, Exact},
+		{"1.000000000000000000001", 1, Below},
+		{"1.5", 1, Below},
+		{"12345.0", 12345, Exact},
+		{"12345.000000000000000000001", 12345, Below},
+		{"9223372036854775807", 9223372036854775807, Exact},
+		{"9223372036854775807.000000000000000000001", math.MaxInt64, Below},
+		{"9223372036854775808", math.MaxInt64, Below},
+		{"1e10000", math.MaxInt64, Below},
+		{"+Inf", math.MaxInt64, Below},
+	} {
+		x := makeFloat(test.x)
+		out, acc := x.Int64()
+		if out != test.out || acc != test.acc {
+			t.Errorf("%s: got %d (%s); want %d (%s)", test.x, out, acc, test.out, test.acc)
+		}
+	}
+}
+
+func TestFloatFloat32(t *testing.T) {
+	for _, test := range []struct {
+		x   string
+		out float32
+		acc Accuracy
+	}{
+		{"0", 0, Exact},
+
+		// underflow to zero
+		{"1e-1000", 0, Below},
+		{"0x0.000002p-127", 0, Below},
+		{"0x.0000010p-126", 0, Below},
+
+		// denormals
+		{"1.401298464e-45", math.SmallestNonzeroFloat32, Above}, // rounded up to smallest denormal
+		{"0x.ffffff8p-149", math.SmallestNonzeroFloat32, Above}, // rounded up to smallest denormal
+		{"0x.0000018p-126", math.SmallestNonzeroFloat32, Above}, // rounded up to smallest denormal
+		{"0x.0000020p-126", math.SmallestNonzeroFloat32, Exact},
+		{"0x.8p-148", math.SmallestNonzeroFloat32, Exact},
+		{"1p-149", math.SmallestNonzeroFloat32, Exact},
+		{"0x.fffffep-126", math.Float32frombits(0x7fffff), Exact}, // largest denormal
+
+		// special denormal cases (see issues 14553, 14651)
+		{"0x0.0000001p-126", math.Float32frombits(0x00000000), Below}, // underflow to zero
+		{"0x0.0000008p-126", math.Float32frombits(0x00000000), Below}, // underflow to zero
+		{"0x0.0000010p-126", math.Float32frombits(0x00000000), Below}, // rounded down to even
+		{"0x0.0000011p-126", math.Float32frombits(0x00000001), Above}, // rounded up to smallest denormal
+		{"0x0.0000018p-126", math.Float32frombits(0x00000001), Above}, // rounded up to smallest denormal
+
+		{"0x1.0000000p-149", math.Float32frombits(0x00000001), Exact}, // smallest denormal
+		{"0x0.0000020p-126", math.Float32frombits(0x00000001), Exact}, // smallest denormal
+		{"0x0.fffffe0p-126", math.Float32frombits(0x007fffff), Exact}, // largest denormal
+		{"0x1.0000000p-126", math.Float32frombits(0x00800000), Exact}, // smallest normal
+
+		{"0x0.8p-149", math.Float32frombits(0x000000000), Below}, // rounded down to even
+		{"0x0.9p-149", math.Float32frombits(0x000000001), Above}, // rounded up to smallest denormal
+		{"0x0.ap-149", math.Float32frombits(0x000000001), Above}, // rounded up to smallest denormal
+		{"0x0.bp-149", math.Float32frombits(0x000000001), Above}, // rounded up to smallest denormal
+		{"0x0.cp-149", math.Float32frombits(0x000000001), Above}, // rounded up to smallest denormal
+
+		{"0x1.0p-149", math.Float32frombits(0x000000001), Exact}, // smallest denormal
+		{"0x1.7p-149", math.Float32frombits(0x000000001), Below},
+		{"0x1.8p-149", math.Float32frombits(0x000000002), Above},
+		{"0x1.9p-149", math.Float32frombits(0x000000002), Above},
+
+		{"0x2.0p-149", math.Float32frombits(0x000000002), Exact},
+		{"0x2.8p-149", math.Float32frombits(0x000000002), Below}, // rounded down to even
+		{"0x2.9p-149", math.Float32frombits(0x000000003), Above},
+
+		{"0x3.0p-149", math.Float32frombits(0x000000003), Exact},
+		{"0x3.7p-149", math.Float32frombits(0x000000003), Below},
+		{"0x3.8p-149", math.Float32frombits(0x000000004), Above}, // rounded up to even
+
+		{"0x4.0p-149", math.Float32frombits(0x000000004), Exact},
+		{"0x4.8p-149", math.Float32frombits(0x000000004), Below}, // rounded down to even
+		{"0x4.9p-149", math.Float32frombits(0x000000005), Above},
+
+		// specific case from issue 14553
+		{"0x7.7p-149", math.Float32frombits(0x000000007), Below},
+		{"0x7.8p-149", math.Float32frombits(0x000000008), Above},
+		{"0x7.9p-149", math.Float32frombits(0x000000008), Above},
+
+		// normals
+		{"0x.ffffffp-126", math.Float32frombits(0x00800000), Above}, // rounded up to smallest normal
+		{"1p-126", math.Float32frombits(0x00800000), Exact},         // smallest normal
+		{"0x1.fffffep-126", math.Float32frombits(0x00ffffff), Exact},
+		{"0x1.ffffffp-126", math.Float32frombits(0x01000000), Above}, // rounded up
+		{"1", 1, Exact},
+		{"1.000000000000000000001", 1, Below},
+		{"12345.0", 12345, Exact},
+		{"12345.000000000000000000001", 12345, Below},
+		{"0x1.fffffe0p127", math.MaxFloat32, Exact},
+		{"0x1.fffffe8p127", math.MaxFloat32, Below},
+
+		// overflow
+		{"0x1.ffffff0p127", float32(math.Inf(+1)), Above},
+		{"0x1p128", float32(math.Inf(+1)), Above},
+		{"1e10000", float32(math.Inf(+1)), Above},
+		{"0x1.ffffff0p2147483646", float32(math.Inf(+1)), Above}, // overflow in rounding
+
+		// inf
+		{"Inf", float32(math.Inf(+1)), Exact},
+	} {
+		for i := 0; i < 2; i++ {
+			// test both signs
+			tx, tout, tacc := test.x, test.out, test.acc
+			if i != 0 {
+				tx = "-" + tx
+				tout = -tout
+				tacc = -tacc
+			}
+
+			// conversion should match strconv where syntax is agreeable
+			if f, err := strconv.ParseFloat(tx, 32); err == nil && !alike32(float32(f), tout) {
+				t.Errorf("%s: got %g; want %g (incorrect test data)", tx, f, tout)
+			}
+
+			x := makeFloat(tx)
+			out, acc := x.Float32()
+			if !alike32(out, tout) || acc != tacc {
+				t.Errorf("%s: got %g (%#08x, %s); want %g (%#08x, %s)", tx, out, math.Float32bits(out), acc, test.out, math.Float32bits(test.out), tacc)
+			}
+
+			// test that x.SetFloat64(float64(f)).Float32() == f
+			var x2 Float
+			out2, acc2 := x2.SetFloat64(float64(out)).Float32()
+			if !alike32(out2, out) || acc2 != Exact {
+				t.Errorf("idempotency test: got %g (%s); want %g (Exact)", out2, acc2, out)
+			}
+		}
+	}
+}
+
+func TestFloatFloat64(t *testing.T) {
+	const smallestNormalFloat64 = 2.2250738585072014e-308 // 1p-1022
+	for _, test := range []struct {
+		x   string
+		out float64
+		acc Accuracy
+	}{
+		{"0", 0, Exact},
+
+		// underflow to zero
+		{"1e-1000", 0, Below},
+		{"0x0.0000000000001p-1023", 0, Below},
+		{"0x0.00000000000008p-1022", 0, Below},
+
+		// denormals
+		{"0x0.0000000000000cp-1022", math.SmallestNonzeroFloat64, Above}, // rounded up to smallest denormal
+		{"0x0.00000000000010p-1022", math.SmallestNonzeroFloat64, Exact}, // smallest denormal
+		{"0x.8p-1073", math.SmallestNonzeroFloat64, Exact},
+		{"1p-1074", math.SmallestNonzeroFloat64, Exact},
+		{"0x.fffffffffffffp-1022", math.Float64frombits(0x000fffffffffffff), Exact}, // largest denormal
+
+		// special denormal cases (see issues 14553, 14651)
+		{"0x0.00000000000001p-1022", math.Float64frombits(0x00000000000000000), Below}, // underflow to zero
+		{"0x0.00000000000004p-1022", math.Float64frombits(0x00000000000000000), Below}, // underflow to zero
+		{"0x0.00000000000008p-1022", math.Float64frombits(0x00000000000000000), Below}, // rounded down to even
+		{"0x0.00000000000009p-1022", math.Float64frombits(0x00000000000000001), Above}, // rounded up to smallest denormal
+		{"0x0.0000000000000ap-1022", math.Float64frombits(0x00000000000000001), Above}, // rounded up to smallest denormal
+
+		{"0x0.8p-1074", math.Float64frombits(0x00000000000000000), Below}, // rounded down to even
+		{"0x0.9p-1074", math.Float64frombits(0x00000000000000001), Above}, // rounded up to smallest denormal
+		{"0x0.ap-1074", math.Float64frombits(0x00000000000000001), Above}, // rounded up to smallest denormal
+		{"0x0.bp-1074", math.Float64frombits(0x00000000000000001), Above}, // rounded up to smallest denormal
+		{"0x0.cp-1074", math.Float64frombits(0x00000000000000001), Above}, // rounded up to smallest denormal
+
+		{"0x1.0p-1074", math.Float64frombits(0x00000000000000001), Exact},
+		{"0x1.7p-1074", math.Float64frombits(0x00000000000000001), Below},
+		{"0x1.8p-1074", math.Float64frombits(0x00000000000000002), Above},
+		{"0x1.9p-1074", math.Float64frombits(0x00000000000000002), Above},
+
+		{"0x2.0p-1074", math.Float64frombits(0x00000000000000002), Exact},
+		{"0x2.8p-1074", math.Float64frombits(0x00000000000000002), Below}, // rounded down to even
+		{"0x2.9p-1074", math.Float64frombits(0x00000000000000003), Above},
+
+		{"0x3.0p-1074", math.Float64frombits(0x00000000000000003), Exact},
+		{"0x3.7p-1074", math.Float64frombits(0x00000000000000003), Below},
+		{"0x3.8p-1074", math.Float64frombits(0x00000000000000004), Above}, // rounded up to even
+
+		{"0x4.0p-1074", math.Float64frombits(0x00000000000000004), Exact},
+		{"0x4.8p-1074", math.Float64frombits(0x00000000000000004), Below}, // rounded down to even
+		{"0x4.9p-1074", math.Float64frombits(0x00000000000000005), Above},
+
+		// normals
+		{"0x.fffffffffffff8p-1022", math.Float64frombits(0x0010000000000000), Above}, // rounded up to smallest normal
+		{"1p-1022", math.Float64frombits(0x0010000000000000), Exact},                 // smallest normal
+		{"1", 1, Exact},
+		{"1.000000000000000000001", 1, Below},
+		{"12345.0", 12345, Exact},
+		{"12345.000000000000000000001", 12345, Below},
+		{"0x1.fffffffffffff0p1023", math.MaxFloat64, Exact},
+		{"0x1.fffffffffffff4p1023", math.MaxFloat64, Below},
+
+		// overflow
+		{"0x1.fffffffffffff8p1023", math.Inf(+1), Above},
+		{"0x1p1024", math.Inf(+1), Above},
+		{"1e10000", math.Inf(+1), Above},
+		{"0x1.fffffffffffff8p2147483646", math.Inf(+1), Above}, // overflow in rounding
+		{"Inf", math.Inf(+1), Exact},
+
+		// selected denormalized values that were handled incorrectly in the past
+		{"0x.fffffffffffffp-1022", smallestNormalFloat64 - math.SmallestNonzeroFloat64, Exact},
+		{"4503599627370495p-1074", smallestNormalFloat64 - math.SmallestNonzeroFloat64, Exact},
+
+		// https://www.exploringbinary.com/php-hangs-on-numeric-value-2-2250738585072011e-308/
+		{"2.2250738585072011e-308", 2.225073858507201e-308, Below},
+		// https://www.exploringbinary.com/java-hangs-when-converting-2-2250738585072012e-308/
+		{"2.2250738585072012e-308", 2.2250738585072014e-308, Above},
+	} {
+		for i := 0; i < 2; i++ {
+			// test both signs
+			tx, tout, tacc := test.x, test.out, test.acc
+			if i != 0 {
+				tx = "-" + tx
+				tout = -tout
+				tacc = -tacc
+			}
+
+			// conversion should match strconv where syntax is agreeable
+			if f, err := strconv.ParseFloat(tx, 64); err == nil && !alike64(f, tout) {
+				t.Errorf("%s: got %g; want %g (incorrect test data)", tx, f, tout)
+			}
+
+			x := makeFloat(tx)
+			out, acc := x.Float64()
+			if !alike64(out, tout) || acc != tacc {
+				t.Errorf("%s: got %g (%#016x, %s); want %g (%#016x, %s)", tx, out, math.Float64bits(out), acc, test.out, math.Float64bits(test.out), tacc)
+			}
+
+			// test that x.SetFloat64(f).Float64() == f
+			var x2 Float
+			out2, acc2 := x2.SetFloat64(out).Float64()
+			if !alike64(out2, out) || acc2 != Exact {
+				t.Errorf("idempotency test: got %g (%s); want %g (Exact)", out2, acc2, out)
+			}
+		}
+	}
+}
+
+func TestFloatInt(t *testing.T) {
+	for _, test := range []struct {
+		x    string
+		want string
+		acc  Accuracy
+	}{
+		{"0", "0", Exact},
+		{"+0", "0", Exact},
+		{"-0", "0", Exact},
+		{"Inf", "nil", Below},
+		{"+Inf", "nil", Below},
+		{"-Inf", "nil", Above},
+		{"1", "1", Exact},
+		{"-1", "-1", Exact},
+		{"1.23", "1", Below},
+		{"-1.23", "-1", Above},
+		{"123e-2", "1", Below},
+		{"123e-3", "0", Below},
+		{"123e-4", "0", Below},
+		{"1e-1000", "0", Below},
+		{"-1e-1000", "0", Above},
+		{"1e+10", "10000000000", Exact},
+		{"1e+100", "10000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000", Exact},
+	} {
+		x := makeFloat(test.x)
+		res, acc := x.Int(nil)
+		got := "nil"
+		if res != nil {
+			got = res.String()
+		}
+		if got != test.want || acc != test.acc {
+			t.Errorf("%s: got %s (%s); want %s (%s)", test.x, got, acc, test.want, test.acc)
+		}
+	}
+
+	// check that supplied *Int is used
+	for _, f := range []string{"0", "1", "-1", "1234"} {
+		x := makeFloat(f)
+		i := new(Int)
+		if res, _ := x.Int(i); res != i {
+			t.Errorf("(%s).Int is not using supplied *Int", f)
+		}
+	}
+}
+
+func TestFloatRat(t *testing.T) {
+	for _, test := range []struct {
+		x, want string
+		acc     Accuracy
+	}{
+		{"0", "0/1", Exact},
+		{"+0", "0/1", Exact},
+		{"-0", "0/1", Exact},
+		{"Inf", "nil", Below},
+		{"+Inf", "nil", Below},
+		{"-Inf", "nil", Above},
+		{"1", "1/1", Exact},
+		{"-1", "-1/1", Exact},
+		{"1.25", "5/4", Exact},
+		{"-1.25", "-5/4", Exact},
+		{"1e10", "10000000000/1", Exact},
+		{"1p10", "1024/1", Exact},
+		{"-1p-10", "-1/1024", Exact},
+		{"3.14159265", "7244019449799623199/2305843009213693952", Exact},
+	} {
+		x := makeFloat(test.x).SetPrec(64)
+		res, acc := x.Rat(nil)
+		got := "nil"
+		if res != nil {
+			got = res.String()
+		}
+		if got != test.want {
+			t.Errorf("%s: got %s; want %s", test.x, got, test.want)
+			continue
+		}
+		if acc != test.acc {
+			t.Errorf("%s: got %s; want %s", test.x, acc, test.acc)
+			continue
+		}
+
+		// inverse conversion
+		if res != nil {
+			got := new(Float).SetPrec(64).SetRat(res)
+			if got.Cmp(x) != 0 {
+				t.Errorf("%s: got %s; want %s", test.x, got, x)
+			}
+		}
+	}
+
+	// check that supplied *Rat is used
+	for _, f := range []string{"0", "1", "-1", "1234"} {
+		x := makeFloat(f)
+		r := new(Rat)
+		if res, _ := x.Rat(r); res != r {
+			t.Errorf("(%s).Rat is not using supplied *Rat", f)
+		}
+	}
+}
+
+func TestFloatAbs(t *testing.T) {
+	for _, test := range []string{
+		"0",
+		"1",
+		"1234",
+		"1.23e-2",
+		"1e-1000",
+		"1e1000",
+		"Inf",
+	} {
+		p := makeFloat(test)
+		a := new(Float).Abs(p)
+		if !alike(a, p) {
+			t.Errorf("%s: got %s; want %s", test, a.Text('g', 10), test)
+		}
+
+		n := makeFloat("-" + test)
+		a.Abs(n)
+		if !alike(a, p) {
+			t.Errorf("-%s: got %s; want %s", test, a.Text('g', 10), test)
+		}
+	}
+}
+
+func TestFloatNeg(t *testing.T) {
+	for _, test := range []string{
+		"0",
+		"1",
+		"1234",
+		"1.23e-2",
+		"1e-1000",
+		"1e1000",
+		"Inf",
+	} {
+		p1 := makeFloat(test)
+		n1 := makeFloat("-" + test)
+		n2 := new(Float).Neg(p1)
+		p2 := new(Float).Neg(n2)
+		if !alike(n2, n1) {
+			t.Errorf("%s: got %s; want %s", test, n2.Text('g', 10), n1.Text('g', 10))
+		}
+		if !alike(p2, p1) {
+			t.Errorf("%s: got %s; want %s", test, p2.Text('g', 10), p1.Text('g', 10))
+		}
+	}
+}
+
+func TestFloatInc(t *testing.T) {
+	const n = 10
+	for _, prec := range precList {
+		if 1<<prec < n {
+			continue // prec must be large enough to hold all numbers from 0 to n
+		}
+		var x, one Float
+		x.SetPrec(prec)
+		one.SetInt64(1)
+		for i := 0; i < n; i++ {
+			x.Add(&x, &one)
+		}
+		if x.Cmp(new(Float).SetInt64(n)) != 0 {
+			t.Errorf("prec = %d: got %s; want %d", prec, &x, n)
+		}
+	}
+}
+
+// Selected precisions with which to run various tests.
+var precList = [...]uint{1, 2, 5, 8, 10, 16, 23, 24, 32, 50, 53, 64, 100, 128, 500, 511, 512, 513, 1000, 10000}
+
+// Selected bits with which to run various tests.
+// Each entry is a list of bits representing a floating-point number (see fromBits).
+var bitsList = [...]Bits{
+	{},           // = 0
+	{0},          // = 1
+	{1},          // = 2
+	{-1},         // = 1/2
+	{10},         // = 2**10 == 1024
+	{-10},        // = 2**-10 == 1/1024
+	{100, 10, 1}, // = 2**100 + 2**10 + 2**1
+	{0, -1, -2, -10},
+	// TODO(gri) add more test cases
+}
+
+// TestFloatAdd tests Float.Add/Sub by comparing the result of a "manual"
+// addition/subtraction of arguments represented by Bits values with the
+// respective Float addition/subtraction for a variety of precisions
+// and rounding modes.
+func TestFloatAdd(t *testing.T) {
+	for _, xbits := range bitsList {
+		for _, ybits := range bitsList {
+			// exact values
+			x := xbits.Float()
+			y := ybits.Float()
+			zbits := xbits.add(ybits)
+			z := zbits.Float()
+
+			for i, mode := range [...]RoundingMode{ToZero, ToNearestEven, AwayFromZero} {
+				for _, prec := range precList {
+					got := new(Float).SetPrec(prec).SetMode(mode)
+					got.Add(x, y)
+					want := zbits.round(prec, mode)
+					if got.Cmp(want) != 0 {
+						t.Errorf("i = %d, prec = %d, %s:\n\t     %s %v\n\t+    %s %v\n\t=    %s\n\twant %s",
+							i, prec, mode, x, xbits, y, ybits, got, want)
+					}
+
+					got.Sub(z, x)
+					want = ybits.round(prec, mode)
+					if got.Cmp(want) != 0 {
+						t.Errorf("i = %d, prec = %d, %s:\n\t     %s %v\n\t-    %s %v\n\t=    %s\n\twant %s",
+							i, prec, mode, z, zbits, x, xbits, got, want)
+					}
+				}
+			}
+		}
+	}
+}
+
+// TestFloatAddRoundZero tests Float.Add/Sub rounding when the result is exactly zero.
+// x + (-x) or x - x for non-zero x should be +0 in all cases except when
+// the rounding mode is ToNegativeInf in which case it should be -0.
+func TestFloatAddRoundZero(t *testing.T) {
+	for _, mode := range [...]RoundingMode{ToNearestEven, ToNearestAway, ToZero, AwayFromZero, ToPositiveInf, ToNegativeInf} {
+		x := NewFloat(5.0)
+		y := new(Float).Neg(x)
+		want := NewFloat(0.0)
+		if mode == ToNegativeInf {
+			want.Neg(want)
+		}
+		got := new(Float).SetMode(mode)
+		got.Add(x, y)
+		if got.Cmp(want) != 0 || got.neg != (mode == ToNegativeInf) {
+			t.Errorf("%s:\n\t     %v\n\t+    %v\n\t=    %v\n\twant %v",
+				mode, x, y, got, want)
+		}
+		got.Sub(x, x)
+		if got.Cmp(want) != 0 || got.neg != (mode == ToNegativeInf) {
+			t.Errorf("%v:\n\t     %v\n\t-    %v\n\t=    %v\n\twant %v",
+				mode, x, x, got, want)
+		}
+	}
+}
+
+// TestFloatAdd32 tests that Float.Add/Sub of numbers with
+// 24bit mantissa behaves like float32 addition/subtraction
+// (excluding denormal numbers).
+func TestFloatAdd32(t *testing.T) {
+	// chose base such that we cross the mantissa precision limit
+	const base = 1<<26 - 0x10 // 11...110000 (26 bits)
+	for d := 0; d <= 0x10; d++ {
+		for i := range [2]int{} {
+			x0, y0 := float64(base), float64(d)
+			if i&1 != 0 {
+				x0, y0 = y0, x0
+			}
+
+			x := NewFloat(x0)
+			y := NewFloat(y0)
+			z := new(Float).SetPrec(24)
+
+			z.Add(x, y)
+			got, acc := z.Float32()
+			want := float32(y0) + float32(x0)
+			if got != want || acc != Exact {
+				t.Errorf("d = %d: %g + %g = %g (%s); want %g (Exact)", d, x0, y0, got, acc, want)
+			}
+
+			z.Sub(z, y)
+			got, acc = z.Float32()
+			want = float32(want) - float32(y0)
+			if got != want || acc != Exact {
+				t.Errorf("d = %d: %g - %g = %g (%s); want %g (Exact)", d, x0+y0, y0, got, acc, want)
+			}
+		}
+	}
+}
+
+// TestFloatAdd64 tests that Float.Add/Sub of numbers with
+// 53bit mantissa behaves like float64 addition/subtraction.
+func TestFloatAdd64(t *testing.T) {
+	// chose base such that we cross the mantissa precision limit
+	const base = 1<<55 - 0x10 // 11...110000 (55 bits)
+	for d := 0; d <= 0x10; d++ {
+		for i := range [2]int{} {
+			x0, y0 := float64(base), float64(d)
+			if i&1 != 0 {
+				x0, y0 = y0, x0
+			}
+
+			x := NewFloat(x0)
+			y := NewFloat(y0)
+			z := new(Float).SetPrec(53)
+
+			z.Add(x, y)
+			got, acc := z.Float64()
+			want := x0 + y0
+			if got != want || acc != Exact {
+				t.Errorf("d = %d: %g + %g = %g (%s); want %g (Exact)", d, x0, y0, got, acc, want)
+			}
+
+			z.Sub(z, y)
+			got, acc = z.Float64()
+			want -= y0
+			if got != want || acc != Exact {
+				t.Errorf("d = %d: %g - %g = %g (%s); want %g (Exact)", d, x0+y0, y0, got, acc, want)
+			}
+		}
+	}
+}
+
+func TestIssue20490(t *testing.T) {
+	var tests = []struct {
+		a, b float64
+	}{
+		{4, 1},
+		{-4, 1},
+		{4, -1},
+		{-4, -1},
+	}
+
+	for _, test := range tests {
+		a, b := NewFloat(test.a), NewFloat(test.b)
+		diff := new(Float).Sub(a, b)
+		b.Sub(a, b)
+		if b.Cmp(diff) != 0 {
+			t.Errorf("got %g - %g = %g; want %g\n", a, NewFloat(test.b), b, diff)
+		}
+
+		b = NewFloat(test.b)
+		sum := new(Float).Add(a, b)
+		b.Add(a, b)
+		if b.Cmp(sum) != 0 {
+			t.Errorf("got %g + %g = %g; want %g\n", a, NewFloat(test.b), b, sum)
+		}
+
+	}
+}
+
+// TestFloatMul tests Float.Mul/Quo by comparing the result of a "manual"
+// multiplication/division of arguments represented by Bits values with the
+// respective Float multiplication/division for a variety of precisions
+// and rounding modes.
+func TestFloatMul(t *testing.T) {
+	for _, xbits := range bitsList {
+		for _, ybits := range bitsList {
+			// exact values
+			x := xbits.Float()
+			y := ybits.Float()
+			zbits := xbits.mul(ybits)
+			z := zbits.Float()
+
+			for i, mode := range [...]RoundingMode{ToZero, ToNearestEven, AwayFromZero} {
+				for _, prec := range precList {
+					got := new(Float).SetPrec(prec).SetMode(mode)
+					got.Mul(x, y)
+					want := zbits.round(prec, mode)
+					if got.Cmp(want) != 0 {
+						t.Errorf("i = %d, prec = %d, %s:\n\t     %v %v\n\t*    %v %v\n\t=    %v\n\twant %v",
+							i, prec, mode, x, xbits, y, ybits, got, want)
+					}
+
+					if x.Sign() == 0 {
+						continue // ignore div-0 case (not invertable)
+					}
+					got.Quo(z, x)
+					want = ybits.round(prec, mode)
+					if got.Cmp(want) != 0 {
+						t.Errorf("i = %d, prec = %d, %s:\n\t     %v %v\n\t/    %v %v\n\t=    %v\n\twant %v",
+							i, prec, mode, z, zbits, x, xbits, got, want)
+					}
+				}
+			}
+		}
+	}
+}
+
+// TestFloatMul64 tests that Float.Mul/Quo of numbers with
+// 53bit mantissa behaves like float64 multiplication/division.
+func TestFloatMul64(t *testing.T) {
+	for _, test := range []struct {
+		x, y float64
+	}{
+		{0, 0},
+		{0, 1},
+		{1, 1},
+		{1, 1.5},
+		{1.234, 0.5678},
+		{2.718281828, 3.14159265358979},
+		{2.718281828e10, 3.14159265358979e-32},
+		{1.0 / 3, 1e200},
+	} {
+		for i := range [8]int{} {
+			x0, y0 := test.x, test.y
+			if i&1 != 0 {
+				x0 = -x0
+			}
+			if i&2 != 0 {
+				y0 = -y0
+			}
+			if i&4 != 0 {
+				x0, y0 = y0, x0
+			}
+
+			x := NewFloat(x0)
+			y := NewFloat(y0)
+			z := new(Float).SetPrec(53)
+
+			z.Mul(x, y)
+			got, _ := z.Float64()
+			want := x0 * y0
+			if got != want {
+				t.Errorf("%g * %g = %g; want %g", x0, y0, got, want)
+			}
+
+			if y0 == 0 {
+				continue // avoid division-by-zero
+			}
+			z.Quo(z, y)
+			got, _ = z.Float64()
+			want /= y0
+			if got != want {
+				t.Errorf("%g / %g = %g; want %g", x0*y0, y0, got, want)
+			}
+		}
+	}
+}
+
+func TestIssue6866(t *testing.T) {
+	for _, prec := range precList {
+		two := new(Float).SetPrec(prec).SetInt64(2)
+		one := new(Float).SetPrec(prec).SetInt64(1)
+		three := new(Float).SetPrec(prec).SetInt64(3)
+		msix := new(Float).SetPrec(prec).SetInt64(-6)
+		psix := new(Float).SetPrec(prec).SetInt64(+6)
+
+		p := new(Float).SetPrec(prec)
+		z1 := new(Float).SetPrec(prec)
+		z2 := new(Float).SetPrec(prec)
+
+		// z1 = 2 + 1.0/3*-6
+		p.Quo(one, three)
+		p.Mul(p, msix)
+		z1.Add(two, p)
+
+		// z2 = 2 - 1.0/3*+6
+		p.Quo(one, three)
+		p.Mul(p, psix)
+		z2.Sub(two, p)
+
+		if z1.Cmp(z2) != 0 {
+			t.Fatalf("prec %d: got z1 = %v != z2 = %v; want z1 == z2\n", prec, z1, z2)
+		}
+		if z1.Sign() != 0 {
+			t.Errorf("prec %d: got z1 = %v; want 0", prec, z1)
+		}
+		if z2.Sign() != 0 {
+			t.Errorf("prec %d: got z2 = %v; want 0", prec, z2)
+		}
+	}
+}
+
+func TestFloatQuo(t *testing.T) {
+	// TODO(gri) make the test vary these precisions
+	preci := 200 // precision of integer part
+	precf := 20  // precision of fractional part
+
+	for i := 0; i < 8; i++ {
+		// compute accurate (not rounded) result z
+		bits := Bits{preci - 1}
+		if i&3 != 0 {
+			bits = append(bits, 0)
+		}
+		if i&2 != 0 {
+			bits = append(bits, -1)
+		}
+		if i&1 != 0 {
+			bits = append(bits, -precf)
+		}
+		z := bits.Float()
+
+		// compute accurate x as z*y
+		y := NewFloat(3.14159265358979323e123)
+
+		x := new(Float).SetPrec(z.Prec() + y.Prec()).SetMode(ToZero)
+		x.Mul(z, y)
+
+		// leave for debugging
+		// fmt.Printf("x = %s\ny = %s\nz = %s\n", x, y, z)
+
+		if got := x.Acc(); got != Exact {
+			t.Errorf("got acc = %s; want exact", got)
+		}
+
+		// round accurate z for a variety of precisions and
+		// modes and compare against result of x / y.
+		for _, mode := range [...]RoundingMode{ToZero, ToNearestEven, AwayFromZero} {
+			for d := -5; d < 5; d++ {
+				prec := uint(preci + d)
+				got := new(Float).SetPrec(prec).SetMode(mode).Quo(x, y)
+				want := bits.round(prec, mode)
+				if got.Cmp(want) != 0 {
+					t.Errorf("i = %d, prec = %d, %s:\n\t     %s\n\t/    %s\n\t=    %s\n\twant %s",
+						i, prec, mode, x, y, got, want)
+				}
+			}
+		}
+	}
+}
+
+var long = flag.Bool("long", false, "run very long tests")
+
+// TestFloatQuoSmoke tests all divisions x/y for values x, y in the range [-n, +n];
+// it serves as a smoke test for basic correctness of division.
+func TestFloatQuoSmoke(t *testing.T) {
+	n := 10
+	if *long {
+		n = 1000
+	}
+
+	const dprec = 3         // max. precision variation
+	const prec = 10 + dprec // enough bits to hold n precisely
+	for x := -n; x <= n; x++ {
+		for y := -n; y < n; y++ {
+			if y == 0 {
+				continue
+			}
+
+			a := float64(x)
+			b := float64(y)
+			c := a / b
+
+			// vary operand precision (only ok as long as a, b can be represented correctly)
+			for ad := -dprec; ad <= dprec; ad++ {
+				for bd := -dprec; bd <= dprec; bd++ {
+					A := new(Float).SetPrec(uint(prec + ad)).SetFloat64(a)
+					B := new(Float).SetPrec(uint(prec + bd)).SetFloat64(b)
+					C := new(Float).SetPrec(53).Quo(A, B) // C has float64 mantissa width
+
+					cc, acc := C.Float64()
+					if cc != c {
+						t.Errorf("%g/%g = %s; want %.5g\n", a, b, C.Text('g', 5), c)
+						continue
+					}
+					if acc != Exact {
+						t.Errorf("%g/%g got %s result; want exact result", a, b, acc)
+					}
+				}
+			}
+		}
+	}
+}
+
+// TestFloatArithmeticSpecialValues tests that Float operations produce the
+// correct results for combinations of zero (±0), finite (±1 and ±2.71828),
+// and infinite (±Inf) operands.
+func TestFloatArithmeticSpecialValues(t *testing.T) {
+	zero := 0.0
+	args := []float64{math.Inf(-1), -2.71828, -1, -zero, zero, 1, 2.71828, math.Inf(1)}
+	xx := new(Float)
+	yy := new(Float)
+	got := new(Float)
+	want := new(Float)
+	for i := 0; i < 4; i++ {
+		for _, x := range args {
+			xx.SetFloat64(x)
+			// check conversion is correct
+			// (no need to do this for y, since we see exactly the
+			// same values there)
+			if got, acc := xx.Float64(); got != x || acc != Exact {
+				t.Errorf("Float(%g) == %g (%s)", x, got, acc)
+			}
+			for _, y := range args {
+				yy.SetFloat64(y)
+				var (
+					op string
+					z  float64
+					f  func(z, x, y *Float) *Float
+				)
+				switch i {
+				case 0:
+					op = "+"
+					z = x + y
+					f = (*Float).Add
+				case 1:
+					op = "-"
+					z = x - y
+					f = (*Float).Sub
+				case 2:
+					op = "*"
+					z = x * y
+					f = (*Float).Mul
+				case 3:
+					op = "/"
+					z = x / y
+					f = (*Float).Quo
+				default:
+					panic("unreachable")
+				}
+				var errnan bool // set if execution of f panicked with ErrNaN
+				// protect execution of f
+				func() {
+					defer func() {
+						if p := recover(); p != nil {
+							_ = p.(ErrNaN) // re-panic if not ErrNaN
+							errnan = true
+						}
+					}()
+					f(got, xx, yy)
+				}()
+				if math.IsNaN(z) {
+					if !errnan {
+						t.Errorf("%5g %s %5g = %5s; want ErrNaN panic", x, op, y, got)
+					}
+					continue
+				}
+				if errnan {
+					t.Errorf("%5g %s %5g panicked with ErrNan; want %5s", x, op, y, want)
+					continue
+				}
+				want.SetFloat64(z)
+				if !alike(got, want) {
+					t.Errorf("%5g %s %5g = %5s; want %5s", x, op, y, got, want)
+				}
+			}
+		}
+	}
+}
+
+func TestFloatArithmeticOverflow(t *testing.T) {
+	for _, test := range []struct {
+		prec       uint
+		mode       RoundingMode
+		op         byte
+		x, y, want string
+		acc        Accuracy
+	}{
+		{4, ToNearestEven, '+', "0", "0", "0", Exact},                   // smoke test
+		{4, ToNearestEven, '+', "0x.8p+0", "0x.8p+0", "0x.8p+1", Exact}, // smoke test
+
+		{4, ToNearestEven, '+', "0", "0x.8p2147483647", "0x.8p+2147483647", Exact},
+		{4, ToNearestEven, '+', "0x.8p2147483500", "0x.8p2147483647", "0x.8p+2147483647", Below}, // rounded to zero
+		{4, ToNearestEven, '+', "0x.8p2147483647", "0x.8p2147483647", "+Inf", Above},             // exponent overflow in +
+		{4, ToNearestEven, '+', "-0x.8p2147483647", "-0x.8p2147483647", "-Inf", Below},           // exponent overflow in +
+		{4, ToNearestEven, '-', "-0x.8p2147483647", "0x.8p2147483647", "-Inf", Below},            // exponent overflow in -
+
+		{4, ToZero, '+', "0x.fp2147483647", "0x.8p2147483643", "0x.fp+2147483647", Below}, // rounded to zero
+		{4, ToNearestEven, '+', "0x.fp2147483647", "0x.8p2147483643", "+Inf", Above},      // exponent overflow in rounding
+		{4, AwayFromZero, '+', "0x.fp2147483647", "0x.8p2147483643", "+Inf", Above},       // exponent overflow in rounding
+
+		{4, AwayFromZero, '-', "-0x.fp2147483647", "0x.8p2147483644", "-Inf", Below},        // exponent overflow in rounding
+		{4, ToNearestEven, '-', "-0x.fp2147483647", "0x.8p2147483643", "-Inf", Below},       // exponent overflow in rounding
+		{4, ToZero, '-', "-0x.fp2147483647", "0x.8p2147483643", "-0x.fp+2147483647", Above}, // rounded to zero
+
+		{4, ToNearestEven, '+', "0", "0x.8p-2147483648", "0x.8p-2147483648", Exact},
+		{4, ToNearestEven, '+', "0x.8p-2147483648", "0x.8p-2147483648", "0x.8p-2147483647", Exact},
+
+		{4, ToNearestEven, '*', "1", "0x.8p2147483647", "0x.8p+2147483647", Exact},
+		{4, ToNearestEven, '*', "2", "0x.8p2147483647", "+Inf", Above},  // exponent overflow in *
+		{4, ToNearestEven, '*', "-2", "0x.8p2147483647", "-Inf", Below}, // exponent overflow in *
+
+		{4, ToNearestEven, '/', "0.5", "0x.8p2147483647", "0x.8p-2147483646", Exact},
+		{4, ToNearestEven, '/', "0x.8p+0", "0x.8p2147483647", "0x.8p-2147483646", Exact},
+		{4, ToNearestEven, '/', "0x.8p-1", "0x.8p2147483647", "0x.8p-2147483647", Exact},
+		{4, ToNearestEven, '/', "0x.8p-2", "0x.8p2147483647", "0x.8p-2147483648", Exact},
+		{4, ToNearestEven, '/', "0x.8p-3", "0x.8p2147483647", "0", Below}, // exponent underflow in /
+	} {
+		x := makeFloat(test.x)
+		y := makeFloat(test.y)
+		z := new(Float).SetPrec(test.prec).SetMode(test.mode)
+		switch test.op {
+		case '+':
+			z.Add(x, y)
+		case '-':
+			z.Sub(x, y)
+		case '*':
+			z.Mul(x, y)
+		case '/':
+			z.Quo(x, y)
+		default:
+			panic("unreachable")
+		}
+		if got := z.Text('p', 0); got != test.want || z.Acc() != test.acc {
+			t.Errorf(
+				"prec = %d (%s): %s %c %s = %s (%s); want %s (%s)",
+				test.prec, test.mode, x.Text('p', 0), test.op, y.Text('p', 0), got, z.Acc(), test.want, test.acc,
+			)
+		}
+	}
+}
+
+// TODO(gri) Add tests that check correctness in the presence of aliasing.
+
+// For rounding modes ToNegativeInf and ToPositiveInf, rounding is affected
+// by the sign of the value to be rounded. Test that rounding happens after
+// the sign of a result has been set.
+// This test uses specific values that are known to fail if rounding is
+// "factored" out before setting the result sign.
+func TestFloatArithmeticRounding(t *testing.T) {
+	for _, test := range []struct {
+		mode       RoundingMode
+		prec       uint
+		x, y, want int64
+		op         byte
+	}{
+		{ToZero, 3, -0x8, -0x1, -0x8, '+'},
+		{AwayFromZero, 3, -0x8, -0x1, -0xa, '+'},
+		{ToNegativeInf, 3, -0x8, -0x1, -0xa, '+'},
+
+		{ToZero, 3, -0x8, 0x1, -0x8, '-'},
+		{AwayFromZero, 3, -0x8, 0x1, -0xa, '-'},
+		{ToNegativeInf, 3, -0x8, 0x1, -0xa, '-'},
+
+		{ToZero, 3, -0x9, 0x1, -0x8, '*'},
+		{AwayFromZero, 3, -0x9, 0x1, -0xa, '*'},
+		{ToNegativeInf, 3, -0x9, 0x1, -0xa, '*'},
+
+		{ToZero, 3, -0x9, 0x1, -0x8, '/'},
+		{AwayFromZero, 3, -0x9, 0x1, -0xa, '/'},
+		{ToNegativeInf, 3, -0x9, 0x1, -0xa, '/'},
+	} {
+		var x, y, z Float
+		x.SetInt64(test.x)
+		y.SetInt64(test.y)
+		z.SetPrec(test.prec).SetMode(test.mode)
+		switch test.op {
+		case '+':
+			z.Add(&x, &y)
+		case '-':
+			z.Sub(&x, &y)
+		case '*':
+			z.Mul(&x, &y)
+		case '/':
+			z.Quo(&x, &y)
+		default:
+			panic("unreachable")
+		}
+		if got, acc := z.Int64(); got != test.want || acc != Exact {
+			t.Errorf("%s, %d bits: %d %c %d = %d (%s); want %d (Exact)",
+				test.mode, test.prec, test.x, test.op, test.y, got, acc, test.want,
+			)
+		}
+	}
+}
+
+// TestFloatCmpSpecialValues tests that Cmp produces the correct results for
+// combinations of zero (±0), finite (±1 and ±2.71828), and infinite (±Inf)
+// operands.
+func TestFloatCmpSpecialValues(t *testing.T) {
+	zero := 0.0
+	args := []float64{math.Inf(-1), -2.71828, -1, -zero, zero, 1, 2.71828, math.Inf(1)}
+	xx := new(Float)
+	yy := new(Float)
+	for i := 0; i < 4; i++ {
+		for _, x := range args {
+			xx.SetFloat64(x)
+			// check conversion is correct
+			// (no need to do this for y, since we see exactly the
+			// same values there)
+			if got, acc := xx.Float64(); got != x || acc != Exact {
+				t.Errorf("Float(%g) == %g (%s)", x, got, acc)
+			}
+			for _, y := range args {
+				yy.SetFloat64(y)
+				got := xx.Cmp(yy)
+				want := 0
+				switch {
+				case x < y:
+					want = -1
+				case x > y:
+					want = +1
+				}
+				if got != want {
+					t.Errorf("(%g).Cmp(%g) = %v; want %v", x, y, got, want)
+				}
+			}
+		}
+	}
+}
+
+func BenchmarkFloatAdd(b *testing.B) {
+	x := new(Float)
+	y := new(Float)
+	z := new(Float)
+
+	for _, prec := range []uint{10, 1e2, 1e3, 1e4, 1e5} {
+		x.SetPrec(prec).SetRat(NewRat(1, 3))
+		y.SetPrec(prec).SetRat(NewRat(1, 6))
+		z.SetPrec(prec)
+
+		b.Run(fmt.Sprintf("%v", prec), func(b *testing.B) {
+			b.ReportAllocs()
+			for i := 0; i < b.N; i++ {
+				z.Add(x, y)
+			}
+		})
+	}
+}
+
+func BenchmarkFloatSub(b *testing.B) {
+	x := new(Float)
+	y := new(Float)
+	z := new(Float)
+
+	for _, prec := range []uint{10, 1e2, 1e3, 1e4, 1e5} {
+		x.SetPrec(prec).SetRat(NewRat(1, 3))
+		y.SetPrec(prec).SetRat(NewRat(1, 6))
+		z.SetPrec(prec)
+
+		b.Run(fmt.Sprintf("%v", prec), func(b *testing.B) {
+			b.ReportAllocs()
+			for i := 0; i < b.N; i++ {
+				z.Sub(x, y)
+			}
+		})
+	}
+}
diff --git a/src/math/big/floatconv.go b/src/math/big/floatconv.go
new file mode 100644
index 0000000..6501185
--- /dev/null
+++ b/src/math/big/floatconv.go
@@ -0,0 +1,302 @@
+// Copyright 2015 The Go Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+// This file implements string-to-Float conversion functions.
+
+package big
+
+import (
+	"fmt"
+	"io"
+	"strings"
+)
+
+var floatZero Float
+
+// SetString sets z to the value of s and returns z and a boolean indicating
+// success. s must be a floating-point number of the same format as accepted
+// by Parse, with base argument 0. The entire string (not just a prefix) must
+// be valid for success. If the operation failed, the value of z is undefined
+// but the returned value is nil.
+func (z *Float) SetString(s string) (*Float, bool) {
+	if f, _, err := z.Parse(s, 0); err == nil {
+		return f, true
+	}
+	return nil, false
+}
+
+// scan is like Parse but reads the longest possible prefix representing a valid
+// floating point number from an io.ByteScanner rather than a string. It serves
+// as the implementation of Parse. It does not recognize ±Inf and does not expect
+// EOF at the end.
+func (z *Float) scan(r io.ByteScanner, base int) (f *Float, b int, err error) {
+	prec := z.prec
+	if prec == 0 {
+		prec = 64
+	}
+
+	// A reasonable value in case of an error.
+	z.form = zero
+
+	// sign
+	z.neg, err = scanSign(r)
+	if err != nil {
+		return
+	}
+
+	// mantissa
+	var fcount int // fractional digit count; valid if <= 0
+	z.mant, b, fcount, err = z.mant.scan(r, base, true)
+	if err != nil {
+		return
+	}
+
+	// exponent
+	var exp int64
+	var ebase int
+	exp, ebase, err = scanExponent(r, true, base == 0)
+	if err != nil {
+		return
+	}
+
+	// special-case 0
+	if len(z.mant) == 0 {
+		z.prec = prec
+		z.acc = Exact
+		z.form = zero
+		f = z
+		return
+	}
+	// len(z.mant) > 0
+
+	// The mantissa may have a radix point (fcount <= 0) and there
+	// may be a nonzero exponent exp. The radix point amounts to a
+	// division by b**(-fcount). An exponent means multiplication by
+	// ebase**exp. Finally, mantissa normalization (shift left) requires
+	// a correcting multiplication by 2**(-shiftcount). Multiplications
+	// are commutative, so we can apply them in any order as long as there
+	// is no loss of precision. We only have powers of 2 and 10, and
+	// we split powers of 10 into the product of the same powers of
+	// 2 and 5. This reduces the size of the multiplication factor
+	// needed for base-10 exponents.
+
+	// normalize mantissa and determine initial exponent contributions
+	exp2 := int64(len(z.mant))*_W - fnorm(z.mant)
+	exp5 := int64(0)
+
+	// determine binary or decimal exponent contribution of radix point
+	if fcount < 0 {
+		// The mantissa has a radix point ddd.dddd; and
+		// -fcount is the number of digits to the right
+		// of '.'. Adjust relevant exponent accordingly.
+		d := int64(fcount)
+		switch b {
+		case 10:
+			exp5 = d
+			fallthrough // 10**e == 5**e * 2**e
+		case 2:
+			exp2 += d
+		case 8:
+			exp2 += d * 3 // octal digits are 3 bits each
+		case 16:
+			exp2 += d * 4 // hexadecimal digits are 4 bits each
+		default:
+			panic("unexpected mantissa base")
+		}
+		// fcount consumed - not needed anymore
+	}
+
+	// take actual exponent into account
+	switch ebase {
+	case 10:
+		exp5 += exp
+		fallthrough // see fallthrough above
+	case 2:
+		exp2 += exp
+	default:
+		panic("unexpected exponent base")
+	}
+	// exp consumed - not needed anymore
+
+	// apply 2**exp2
+	if MinExp <= exp2 && exp2 <= MaxExp {
+		z.prec = prec
+		z.form = finite
+		z.exp = int32(exp2)
+		f = z
+	} else {
+		err = fmt.Errorf("exponent overflow")
+		return
+	}
+
+	if exp5 == 0 {
+		// no decimal exponent contribution
+		z.round(0)
+		return
+	}
+	// exp5 != 0
+
+	// apply 5**exp5
+	p := new(Float).SetPrec(z.Prec() + 64) // use more bits for p -- TODO(gri) what is the right number?
+	if exp5 < 0 {
+		z.Quo(z, p.pow5(uint64(-exp5)))
+	} else {
+		z.Mul(z, p.pow5(uint64(exp5)))
+	}
+
+	return
+}
+
+// These powers of 5 fit into a uint64.
+//
+//	for p, q := uint64(0), uint64(1); p < q; p, q = q, q*5 {
+//		fmt.Println(q)
+//	}
+var pow5tab = [...]uint64{
+	1,
+	5,
+	25,
+	125,
+	625,
+	3125,
+	15625,
+	78125,
+	390625,
+	1953125,
+	9765625,
+	48828125,
+	244140625,
+	1220703125,
+	6103515625,
+	30517578125,
+	152587890625,
+	762939453125,
+	3814697265625,
+	19073486328125,
+	95367431640625,
+	476837158203125,
+	2384185791015625,
+	11920928955078125,
+	59604644775390625,
+	298023223876953125,
+	1490116119384765625,
+	7450580596923828125,
+}
+
+// pow5 sets z to 5**n and returns z.
+// n must not be negative.
+func (z *Float) pow5(n uint64) *Float {
+	const m = uint64(len(pow5tab) - 1)
+	if n <= m {
+		return z.SetUint64(pow5tab[n])
+	}
+	// n > m
+
+	z.SetUint64(pow5tab[m])
+	n -= m
+
+	// use more bits for f than for z
+	// TODO(gri) what is the right number?
+	f := new(Float).SetPrec(z.Prec() + 64).SetUint64(5)
+
+	for n > 0 {
+		if n&1 != 0 {
+			z.Mul(z, f)
+		}
+		f.Mul(f, f)
+		n >>= 1
+	}
+
+	return z
+}
+
+// Parse parses s which must contain a text representation of a floating-
+// point number with a mantissa in the given conversion base (the exponent
+// is always a decimal number), or a string representing an infinite value.
+//
+// For base 0, an underscore character “_” may appear between a base
+// prefix and an adjacent digit, and between successive digits; such
+// underscores do not change the value of the number, or the returned
+// digit count. Incorrect placement of underscores is reported as an
+// error if there are no other errors. If base != 0, underscores are
+// not recognized and thus terminate scanning like any other character
+// that is not a valid radix point or digit.
+//
+// It sets z to the (possibly rounded) value of the corresponding floating-
+// point value, and returns z, the actual base b, and an error err, if any.
+// The entire string (not just a prefix) must be consumed for success.
+// If z's precision is 0, it is changed to 64 before rounding takes effect.
+// The number must be of the form:
+//
+//	number    = [ sign ] ( float | "inf" | "Inf" ) .
+//	sign      = "+" | "-" .
+//	float     = ( mantissa | prefix pmantissa ) [ exponent ] .
+//	prefix    = "0" [ "b" | "B" | "o" | "O" | "x" | "X" ] .
+//	mantissa  = digits "." [ digits ] | digits | "." digits .
+//	pmantissa = [ "_" ] digits "." [ digits ] | [ "_" ] digits | "." digits .
+//	exponent  = ( "e" | "E" | "p" | "P" ) [ sign ] digits .
+//	digits    = digit { [ "_" ] digit } .
+//	digit     = "0" ... "9" | "a" ... "z" | "A" ... "Z" .
+//
+// The base argument must be 0, 2, 8, 10, or 16. Providing an invalid base
+// argument will lead to a run-time panic.
+//
+// For base 0, the number prefix determines the actual base: A prefix of
+// “0b” or “0B” selects base 2, “0o” or “0O” selects base 8, and
+// “0x” or “0X” selects base 16. Otherwise, the actual base is 10 and
+// no prefix is accepted. The octal prefix "0" is not supported (a leading
+// "0" is simply considered a "0").
+//
+// A "p" or "P" exponent indicates a base 2 (rather than base 10) exponent;
+// for instance, "0x1.fffffffffffffp1023" (using base 0) represents the
+// maximum float64 value. For hexadecimal mantissae, the exponent character
+// must be one of 'p' or 'P', if present (an "e" or "E" exponent indicator
+// cannot be distinguished from a mantissa digit).
+//
+// The returned *Float f is nil and the value of z is valid but not
+// defined if an error is reported.
+func (z *Float) Parse(s string, base int) (f *Float, b int, err error) {
+	// scan doesn't handle ±Inf
+	if len(s) == 3 && (s == "Inf" || s == "inf") {
+		f = z.SetInf(false)
+		return
+	}
+	if len(s) == 4 && (s[0] == '+' || s[0] == '-') && (s[1:] == "Inf" || s[1:] == "inf") {
+		f = z.SetInf(s[0] == '-')
+		return
+	}
+
+	r := strings.NewReader(s)
+	if f, b, err = z.scan(r, base); err != nil {
+		return
+	}
+
+	// entire string must have been consumed
+	if ch, err2 := r.ReadByte(); err2 == nil {
+		err = fmt.Errorf("expected end of string, found %q", ch)
+	} else if err2 != io.EOF {
+		err = err2
+	}
+
+	return
+}
+
+// ParseFloat is like f.Parse(s, base) with f set to the given precision
+// and rounding mode.
+func ParseFloat(s string, base int, prec uint, mode RoundingMode) (f *Float, b int, err error) {
+	return new(Float).SetPrec(prec).SetMode(mode).Parse(s, base)
+}
+
+var _ fmt.Scanner = (*Float)(nil) // *Float must implement fmt.Scanner
+
+// Scan is a support routine for fmt.Scanner; it sets z to the value of
+// the scanned number. It accepts formats whose verbs are supported by
+// fmt.Scan for floating point values, which are:
+// 'b' (binary), 'e', 'E', 'f', 'F', 'g' and 'G'.
+// Scan doesn't handle ±Inf.
+func (z *Float) Scan(s fmt.ScanState, ch rune) error {
+	s.SkipSpace()
+	_, _, err := z.scan(byteReader{s}, 0)
+	return err
+}
diff --git a/src/math/big/floatconv_test.go b/src/math/big/floatconv_test.go
new file mode 100644
index 0000000..a1cc38a
--- /dev/null
+++ b/src/math/big/floatconv_test.go
@@ -0,0 +1,825 @@
+// Copyright 2015 The Go Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+package big
+
+import (
+	"bytes"
+	"fmt"
+	"math"
+	"math/bits"
+	"strconv"
+	"testing"
+)
+
+var zero_ float64
+
+func TestFloatSetFloat64String(t *testing.T) {
+	inf := math.Inf(0)
+	nan := math.NaN()
+
+	for _, test := range []struct {
+		s string
+		x float64 // NaNs represent invalid inputs
+	}{
+		// basics
+		{"0", 0},
+		{"-0", -zero_},
+		{"+0", 0},
+		{"1", 1},
+		{"-1", -1},
+		{"+1", 1},
+		{"1.234", 1.234},
+		{"-1.234", -1.234},
+		{"+1.234", 1.234},
+		{".1", 0.1},
+		{"1.", 1},
+		{"+1.", 1},
+
+		// various zeros
+		{"0e100", 0},
+		{"-0e+100", -zero_},
+		{"+0e-100", 0},
+		{"0E100", 0},
+		{"-0E+100", -zero_},
+		{"+0E-100", 0},
+
+		// various decimal exponent formats
+		{"1.e10", 1e10},
+		{"1e+10", 1e10},
+		{"+1e-10", 1e-10},
+		{"1E10", 1e10},
+		{"1.E+10", 1e10},
+		{"+1E-10", 1e-10},
+
+		// infinities
+		{"Inf", inf},
+		{"+Inf", inf},
+		{"-Inf", -inf},
+		{"inf", inf},
+		{"+inf", inf},
+		{"-inf", -inf},
+
+		// invalid numbers
+		{"", nan},
+		{"-", nan},
+		{"0x", nan},
+		{"0e", nan},
+		{"1.2ef", nan},
+		{"2..3", nan},
+		{"123..", nan},
+		{"infinity", nan},
+		{"foobar", nan},
+
+		// invalid underscores
+		{"_", nan},
+		{"0_", nan},
+		{"1__0", nan},
+		{"123_.", nan},
+		{"123._", nan},
+		{"123._4", nan},
+		{"1_2.3_4_", nan},
+		{"_.123", nan},
+		{"_123.456", nan},
+		{"10._0", nan},
+		{"10.0e_0", nan},
+		{"10.0e0_", nan},
+		{"0P-0__0", nan},
+
+		// misc decimal values
+		{"3.14159265", 3.14159265},
+		{"-687436.79457e-245", -687436.79457e-245},
+		{"-687436.79457E245", -687436.79457e245},
+		{".0000000000000000000000000000000000000001", 1e-40},
+		{"+10000000000000000000000000000000000000000e-0", 1e40},
+
+		// decimal mantissa, binary exponent
+		{"0p0", 0},
+		{"-0p0", -zero_},
+		{"1p10", 1 << 10},
+		{"1p+10", 1 << 10},
+		{"+1p-10", 1.0 / (1 << 10)},
+		{"1024p-12", 0.25},
+		{"-1p10", -1024},
+		{"1.5p1", 3},
+
+		// binary mantissa, decimal exponent
+		{"0b0", 0},
+		{"-0b0", -zero_},
+		{"0b0e+10", 0},
+		{"-0b0e-10", -zero_},
+		{"0b1010", 10},
+		{"0B1010E2", 1000},
+		{"0b.1", 0.5},
+		{"0b.001", 0.125},
+		{"0b.001e3", 125},
+
+		// binary mantissa, binary exponent
+		{"0b0p+10", 0},
+		{"-0b0p-10", -zero_},
+		{"0b.1010p4", 10},
+		{"0b1p-1", 0.5},
+		{"0b001p-3", 0.125},
+		{"0b.001p3", 1},
+		{"0b0.01p2", 1},
+		{"0b0.01P+2", 1},
+
+		// octal mantissa, decimal exponent
+		{"0o0", 0},
+		{"-0o0", -zero_},
+		{"0o0e+10", 0},
+		{"-0o0e-10", -zero_},
+		{"0o12", 10},
+		{"0O12E2", 1000},
+		{"0o.4", 0.5},
+		{"0o.01", 0.015625},
+		{"0o.01e3", 15.625},
+
+		// octal mantissa, binary exponent
+		{"0o0p+10", 0},
+		{"-0o0p-10", -zero_},
+		{"0o.12p6", 10},
+		{"0o4p-3", 0.5},
+		{"0o0014p-6", 0.1875},
+		{"0o.001p9", 1},
+		{"0o0.01p7", 2},
+		{"0O0.01P+2", 0.0625},
+
+		// hexadecimal mantissa and exponent
+		{"0x0", 0},
+		{"-0x0", -zero_},
+		{"0x0p+10", 0},
+		{"-0x0p-10", -zero_},
+		{"0xff", 255},
+		{"0X.8p1", 1},
+		{"-0X0.00008p16", -0.5},
+		{"-0X0.00008P+16", -0.5},
+		{"0x0.0000000000001p-1022", math.SmallestNonzeroFloat64},
+		{"0x1.fffffffffffffp1023", math.MaxFloat64},
+
+		// underscores
+		{"0_0", 0},
+		{"1_000.", 1000},
+		{"1_2_3.4_5_6", 123.456},
+		{"1.0e0_0", 1},
+		{"1p+1_0", 1024},
+		{"0b_1000", 0x8},
+		{"0b_1011_1101", 0xbd},
+		{"0x_f0_0d_1eP+0_8", 0xf00d1e00},
+	} {
+		var x Float
+		x.SetPrec(53)
+		_, ok := x.SetString(test.s)
+		if math.IsNaN(test.x) {
+			// test.s is invalid
+			if ok {
+				t.Errorf("%s: want parse error", test.s)
+			}
+			continue
+		}
+		// test.s is valid
+		if !ok {
+			t.Errorf("%s: got parse error", test.s)
+			continue
+		}
+		f, _ := x.Float64()
+		want := new(Float).SetFloat64(test.x)
+		if x.Cmp(want) != 0 || x.Signbit() != want.Signbit() {
+			t.Errorf("%s: got %v (%v); want %v", test.s, &x, f, test.x)
+		}
+	}
+}
+
+func fdiv(a, b float64) float64 { return a / b }
+
+const (
+	below1e23 = 99999999999999974834176
+	above1e23 = 100000000000000008388608
+)
+
+func TestFloat64Text(t *testing.T) {
+	for _, test := range []struct {
+		x      float64
+		format byte
+		prec   int
+		want   string
+	}{
+		{0, 'f', 0, "0"},
+		{math.Copysign(0, -1), 'f', 0, "-0"},
+		{1, 'f', 0, "1"},
+		{-1, 'f', 0, "-1"},
+
+		{0.001, 'e', 0, "1e-03"},
+		{0.459, 'e', 0, "5e-01"},
+		{1.459, 'e', 0, "1e+00"},
+		{2.459, 'e', 1, "2.5e+00"},
+		{3.459, 'e', 2, "3.46e+00"},
+		{4.459, 'e', 3, "4.459e+00"},
+		{5.459, 'e', 4, "5.4590e+00"},
+
+		{0.001, 'f', 0, "0"},
+		{0.459, 'f', 0, "0"},
+		{1.459, 'f', 0, "1"},
+		{2.459, 'f', 1, "2.5"},
+		{3.459, 'f', 2, "3.46"},
+		{4.459, 'f', 3, "4.459"},
+		{5.459, 'f', 4, "5.4590"},
+
+		{0, 'b', 0, "0"},
+		{math.Copysign(0, -1), 'b', 0, "-0"},
+		{1.0, 'b', 0, "4503599627370496p-52"},
+		{-1.0, 'b', 0, "-4503599627370496p-52"},
+		{4503599627370496, 'b', 0, "4503599627370496p+0"},
+
+		{0, 'p', 0, "0"},
+		{math.Copysign(0, -1), 'p', 0, "-0"},
+		{1024.0, 'p', 0, "0x.8p+11"},
+		{-1024.0, 'p', 0, "-0x.8p+11"},
+
+		// all test cases below from strconv/ftoa_test.go
+		{1, 'e', 5, "1.00000e+00"},
+		{1, 'f', 5, "1.00000"},
+		{1, 'g', 5, "1"},
+		{1, 'g', -1, "1"},
+		{20, 'g', -1, "20"},
+		{1234567.8, 'g', -1, "1.2345678e+06"},
+		{200000, 'g', -1, "200000"},
+		{2000000, 'g', -1, "2e+06"},
+
+		// g conversion and zero suppression
+		{400, 'g', 2, "4e+02"},
+		{40, 'g', 2, "40"},
+		{4, 'g', 2, "4"},
+		{.4, 'g', 2, "0.4"},
+		{.04, 'g', 2, "0.04"},
+		{.004, 'g', 2, "0.004"},
+		{.0004, 'g', 2, "0.0004"},
+		{.00004, 'g', 2, "4e-05"},
+		{.000004, 'g', 2, "4e-06"},
+
+		{0, 'e', 5, "0.00000e+00"},
+		{0, 'f', 5, "0.00000"},
+		{0, 'g', 5, "0"},
+		{0, 'g', -1, "0"},
+
+		{-1, 'e', 5, "-1.00000e+00"},
+		{-1, 'f', 5, "-1.00000"},
+		{-1, 'g', 5, "-1"},
+		{-1, 'g', -1, "-1"},
+
+		{12, 'e', 5, "1.20000e+01"},
+		{12, 'f', 5, "12.00000"},
+		{12, 'g', 5, "12"},
+		{12, 'g', -1, "12"},
+
+		{123456700, 'e', 5, "1.23457e+08"},
+		{123456700, 'f', 5, "123456700.00000"},
+		{123456700, 'g', 5, "1.2346e+08"},
+		{123456700, 'g', -1, "1.234567e+08"},
+
+		{1.2345e6, 'e', 5, "1.23450e+06"},
+		{1.2345e6, 'f', 5, "1234500.00000"},
+		{1.2345e6, 'g', 5, "1.2345e+06"},
+
+		{1e23, 'e', 17, "9.99999999999999916e+22"},
+		{1e23, 'f', 17, "99999999999999991611392.00000000000000000"},
+		{1e23, 'g', 17, "9.9999999999999992e+22"},
+
+		{1e23, 'e', -1, "1e+23"},
+		{1e23, 'f', -1, "100000000000000000000000"},
+		{1e23, 'g', -1, "1e+23"},
+
+		{below1e23, 'e', 17, "9.99999999999999748e+22"},
+		{below1e23, 'f', 17, "99999999999999974834176.00000000000000000"},
+		{below1e23, 'g', 17, "9.9999999999999975e+22"},
+
+		{below1e23, 'e', -1, "9.999999999999997e+22"},
+		{below1e23, 'f', -1, "99999999999999970000000"},
+		{below1e23, 'g', -1, "9.999999999999997e+22"},
+
+		{above1e23, 'e', 17, "1.00000000000000008e+23"},
+		{above1e23, 'f', 17, "100000000000000008388608.00000000000000000"},
+		{above1e23, 'g', 17, "1.0000000000000001e+23"},
+
+		{above1e23, 'e', -1, "1.0000000000000001e+23"},
+		{above1e23, 'f', -1, "100000000000000010000000"},
+		{above1e23, 'g', -1, "1.0000000000000001e+23"},
+
+		{5e-304 / 1e20, 'g', -1, "5e-324"},
+		{-5e-304 / 1e20, 'g', -1, "-5e-324"},
+		{fdiv(5e-304, 1e20), 'g', -1, "5e-324"},   // avoid constant arithmetic
+		{fdiv(-5e-304, 1e20), 'g', -1, "-5e-324"}, // avoid constant arithmetic
+
+		{32, 'g', -1, "32"},
+		{32, 'g', 0, "3e+01"},
+
+		{100, 'x', -1, "0x1.9p+06"},
+
+		// {math.NaN(), 'g', -1, "NaN"},  // Float doesn't support NaNs
+		// {-math.NaN(), 'g', -1, "NaN"}, // Float doesn't support NaNs
+		{math.Inf(0), 'g', -1, "+Inf"},
+		{math.Inf(-1), 'g', -1, "-Inf"},
+		{-math.Inf(0), 'g', -1, "-Inf"},
+
+		{-1, 'b', -1, "-4503599627370496p-52"},
+
+		// fixed bugs
+		{0.9, 'f', 1, "0.9"},
+		{0.09, 'f', 1, "0.1"},
+		{0.0999, 'f', 1, "0.1"},
+		{0.05, 'f', 1, "0.1"},
+		{0.05, 'f', 0, "0"},
+		{0.5, 'f', 1, "0.5"},
+		{0.5, 'f', 0, "0"},
+		{1.5, 'f', 0, "2"},
+
+		// https://www.exploringbinary.com/java-hangs-when-converting-2-2250738585072012e-308/
+		{2.2250738585072012e-308, 'g', -1, "2.2250738585072014e-308"},
+		// https://www.exploringbinary.com/php-hangs-on-numeric-value-2-2250738585072011e-308/
+		{2.2250738585072011e-308, 'g', -1, "2.225073858507201e-308"},
+
+		// Issue 2625.
+		{383260575764816448, 'f', 0, "383260575764816448"},
+		{383260575764816448, 'g', -1, "3.8326057576481645e+17"},
+
+		// Issue 15918.
+		{1, 'f', -10, "1"},
+		{1, 'f', -11, "1"},
+		{1, 'f', -12, "1"},
+	} {
+		// The test cases are from the strconv package which tests float64 values.
+		// When formatting values with prec = -1 (shortest representation),
+		// the actually available mantissa precision matters.
+		// For denormalized values, that precision is < 53 (SetFloat64 default).
+		// Compute and set the actual precision explicitly.
+		f := new(Float).SetPrec(actualPrec(test.x)).SetFloat64(test.x)
+		got := f.Text(test.format, test.prec)
+		if got != test.want {
+			t.Errorf("%v: got %s; want %s", test, got, test.want)
+			continue
+		}
+
+		if test.format == 'b' && test.x == 0 {
+			continue // 'b' format in strconv.Float requires knowledge of bias for 0.0
+		}
+		if test.format == 'p' {
+			continue // 'p' format not supported in strconv.Format
+		}
+
+		// verify that Float format matches strconv format
+		want := strconv.FormatFloat(test.x, test.format, test.prec, 64)
+		if got != want {
+			t.Errorf("%v: got %s; want %s (strconv)", test, got, want)
+		}
+	}
+}
+
+// actualPrec returns the number of actually used mantissa bits.
+func actualPrec(x float64) uint {
+	if mant := math.Float64bits(x); x != 0 && mant&(0x7ff<<52) == 0 {
+		// x is denormalized
+		return 64 - uint(bits.LeadingZeros64(mant&(1<<52-1)))
+	}
+	return 53
+}
+
+func TestFloatText(t *testing.T) {
+	const defaultRound = ^RoundingMode(0)
+
+	for _, test := range []struct {
+		x      string
+		round  RoundingMode
+		prec   uint
+		format byte
+		digits int
+		want   string
+	}{
+		{"0", defaultRound, 10, 'f', 0, "0"},
+		{"-0", defaultRound, 10, 'f', 0, "-0"},
+		{"1", defaultRound, 10, 'f', 0, "1"},
+		{"-1", defaultRound, 10, 'f', 0, "-1"},
+
+		{"1.459", defaultRound, 100, 'e', 0, "1e+00"},
+		{"2.459", defaultRound, 100, 'e', 1, "2.5e+00"},
+		{"3.459", defaultRound, 100, 'e', 2, "3.46e+00"},
+		{"4.459", defaultRound, 100, 'e', 3, "4.459e+00"},
+		{"5.459", defaultRound, 100, 'e', 4, "5.4590e+00"},
+
+		{"1.459", defaultRound, 100, 'E', 0, "1E+00"},
+		{"2.459", defaultRound, 100, 'E', 1, "2.5E+00"},
+		{"3.459", defaultRound, 100, 'E', 2, "3.46E+00"},
+		{"4.459", defaultRound, 100, 'E', 3, "4.459E+00"},
+		{"5.459", defaultRound, 100, 'E', 4, "5.4590E+00"},
+
+		{"1.459", defaultRound, 100, 'f', 0, "1"},
+		{"2.459", defaultRound, 100, 'f', 1, "2.5"},
+		{"3.459", defaultRound, 100, 'f', 2, "3.46"},
+		{"4.459", defaultRound, 100, 'f', 3, "4.459"},
+		{"5.459", defaultRound, 100, 'f', 4, "5.4590"},
+
+		{"1.459", defaultRound, 100, 'g', 0, "1"},
+		{"2.459", defaultRound, 100, 'g', 1, "2"},
+		{"3.459", defaultRound, 100, 'g', 2, "3.5"},
+		{"4.459", defaultRound, 100, 'g', 3, "4.46"},
+		{"5.459", defaultRound, 100, 'g', 4, "5.459"},
+
+		{"1459", defaultRound, 53, 'g', 0, "1e+03"},
+		{"2459", defaultRound, 53, 'g', 1, "2e+03"},
+		{"3459", defaultRound, 53, 'g', 2, "3.5e+03"},
+		{"4459", defaultRound, 53, 'g', 3, "4.46e+03"},
+		{"5459", defaultRound, 53, 'g', 4, "5459"},
+
+		{"1459", defaultRound, 53, 'G', 0, "1E+03"},
+		{"2459", defaultRound, 53, 'G', 1, "2E+03"},
+		{"3459", defaultRound, 53, 'G', 2, "3.5E+03"},
+		{"4459", defaultRound, 53, 'G', 3, "4.46E+03"},
+		{"5459", defaultRound, 53, 'G', 4, "5459"},
+
+		{"3", defaultRound, 10, 'e', 40, "3.0000000000000000000000000000000000000000e+00"},
+		{"3", defaultRound, 10, 'f', 40, "3.0000000000000000000000000000000000000000"},
+		{"3", defaultRound, 10, 'g', 40, "3"},
+
+		{"3e40", defaultRound, 100, 'e', 40, "3.0000000000000000000000000000000000000000e+40"},
+		{"3e40", defaultRound, 100, 'f', 4, "30000000000000000000000000000000000000000.0000"},
+		{"3e40", defaultRound, 100, 'g', 40, "3e+40"},
+
+		// make sure "stupid" exponents don't stall the machine
+		{"1e1000000", defaultRound, 64, 'p', 0, "0x.88b3a28a05eade3ap+3321929"},
+		{"1e646456992", defaultRound, 64, 'p', 0, "0x.e883a0c5c8c7c42ap+2147483644"},
+		{"1e646456993", defaultRound, 64, 'p', 0, "+Inf"},
+		{"1e1000000000", defaultRound, 64, 'p', 0, "+Inf"},
+		{"1e-1000000", defaultRound, 64, 'p', 0, "0x.efb4542cc8ca418ap-3321928"},
+		{"1e-646456993", defaultRound, 64, 'p', 0, "0x.e17c8956983d9d59p-2147483647"},
+		{"1e-646456994", defaultRound, 64, 'p', 0, "0"},
+		{"1e-1000000000", defaultRound, 64, 'p', 0, "0"},
+
+		// minimum and maximum values
+		{"1p2147483646", defaultRound, 64, 'p', 0, "0x.8p+2147483647"},
+		{"0x.8p2147483647", defaultRound, 64, 'p', 0, "0x.8p+2147483647"},
+		{"0x.8p-2147483647", defaultRound, 64, 'p', 0, "0x.8p-2147483647"},
+		{"1p-2147483649", defaultRound, 64, 'p', 0, "0x.8p-2147483648"},
+
+		// TODO(gri) need tests for actual large Floats
+
+		{"0", defaultRound, 53, 'b', 0, "0"},
+		{"-0", defaultRound, 53, 'b', 0, "-0"},
+		{"1.0", defaultRound, 53, 'b', 0, "4503599627370496p-52"},
+		{"-1.0", defaultRound, 53, 'b', 0, "-4503599627370496p-52"},
+		{"4503599627370496", defaultRound, 53, 'b', 0, "4503599627370496p+0"},
+
+		// issue 9939
+		{"3", defaultRound, 350, 'b', 0, "1720123961992553633708115671476565205597423741876210842803191629540192157066363606052513914832594264915968p-348"},
+		{"03", defaultRound, 350, 'b', 0, "1720123961992553633708115671476565205597423741876210842803191629540192157066363606052513914832594264915968p-348"},
+		{"3.", defaultRound, 350, 'b', 0, "1720123961992553633708115671476565205597423741876210842803191629540192157066363606052513914832594264915968p-348"},
+		{"3.0", defaultRound, 350, 'b', 0, "1720123961992553633708115671476565205597423741876210842803191629540192157066363606052513914832594264915968p-348"},
+		{"3.00", defaultRound, 350, 'b', 0, "1720123961992553633708115671476565205597423741876210842803191629540192157066363606052513914832594264915968p-348"},
+		{"3.000", defaultRound, 350, 'b', 0, "1720123961992553633708115671476565205597423741876210842803191629540192157066363606052513914832594264915968p-348"},
+
+		{"3", defaultRound, 350, 'p', 0, "0x.cp+2"},
+		{"03", defaultRound, 350, 'p', 0, "0x.cp+2"},
+		{"3.", defaultRound, 350, 'p', 0, "0x.cp+2"},
+		{"3.0", defaultRound, 350, 'p', 0, "0x.cp+2"},
+		{"3.00", defaultRound, 350, 'p', 0, "0x.cp+2"},
+		{"3.000", defaultRound, 350, 'p', 0, "0x.cp+2"},
+
+		{"0", defaultRound, 64, 'p', 0, "0"},
+		{"-0", defaultRound, 64, 'p', 0, "-0"},
+		{"1024.0", defaultRound, 64, 'p', 0, "0x.8p+11"},
+		{"-1024.0", defaultRound, 64, 'p', 0, "-0x.8p+11"},
+
+		{"0", defaultRound, 64, 'x', -1, "0x0p+00"},
+		{"0", defaultRound, 64, 'x', 0, "0x0p+00"},
+		{"0", defaultRound, 64, 'x', 1, "0x0.0p+00"},
+		{"0", defaultRound, 64, 'x', 5, "0x0.00000p+00"},
+		{"3.25", defaultRound, 64, 'x', 0, "0x1p+02"},
+		{"-3.25", defaultRound, 64, 'x', 0, "-0x1p+02"},
+		{"3.25", defaultRound, 64, 'x', 1, "0x1.ap+01"},
+		{"-3.25", defaultRound, 64, 'x', 1, "-0x1.ap+01"},
+		{"3.25", defaultRound, 64, 'x', -1, "0x1.ap+01"},
+		{"-3.25", defaultRound, 64, 'x', -1, "-0x1.ap+01"},
+		{"1024.0", defaultRound, 64, 'x', 0, "0x1p+10"},
+		{"-1024.0", defaultRound, 64, 'x', 0, "-0x1p+10"},
+		{"1024.0", defaultRound, 64, 'x', 5, "0x1.00000p+10"},
+		{"8191.0", defaultRound, 53, 'x', -1, "0x1.fffp+12"},
+		{"8191.5", defaultRound, 53, 'x', -1, "0x1.fff8p+12"},
+		{"8191.53125", defaultRound, 53, 'x', -1, "0x1.fff88p+12"},
+		{"8191.53125", defaultRound, 53, 'x', 4, "0x1.fff8p+12"},
+		{"8191.53125", defaultRound, 53, 'x', 3, "0x1.000p+13"},
+		{"8191.53125", defaultRound, 53, 'x', 0, "0x1p+13"},
+		{"8191.533203125", defaultRound, 53, 'x', -1, "0x1.fff888p+12"},
+		{"8191.533203125", defaultRound, 53, 'x', 5, "0x1.fff88p+12"},
+		{"8191.533203125", defaultRound, 53, 'x', 4, "0x1.fff9p+12"},
+
+		{"8191.53125", defaultRound, 53, 'x', -1, "0x1.fff88p+12"},
+		{"8191.53125", ToNearestEven, 53, 'x', 5, "0x1.fff88p+12"},
+		{"8191.53125", ToNearestAway, 53, 'x', 5, "0x1.fff88p+12"},
+		{"8191.53125", ToZero, 53, 'x', 5, "0x1.fff88p+12"},
+		{"8191.53125", AwayFromZero, 53, 'x', 5, "0x1.fff88p+12"},
+		{"8191.53125", ToNegativeInf, 53, 'x', 5, "0x1.fff88p+12"},
+		{"8191.53125", ToPositiveInf, 53, 'x', 5, "0x1.fff88p+12"},
+
+		{"8191.53125", defaultRound, 53, 'x', 4, "0x1.fff8p+12"},
+		{"8191.53125", defaultRound, 53, 'x', 3, "0x1.000p+13"},
+		{"8191.53125", defaultRound, 53, 'x', 0, "0x1p+13"},
+		{"8191.533203125", defaultRound, 53, 'x', -1, "0x1.fff888p+12"},
+		{"8191.533203125", defaultRound, 53, 'x', 6, "0x1.fff888p+12"},
+		{"8191.533203125", defaultRound, 53, 'x', 5, "0x1.fff88p+12"},
+		{"8191.533203125", defaultRound, 53, 'x', 4, "0x1.fff9p+12"},
+
+		{"8191.53125", ToNearestEven, 53, 'x', 4, "0x1.fff8p+12"},
+		{"8191.53125", ToNearestAway, 53, 'x', 4, "0x1.fff9p+12"},
+		{"8191.53125", ToZero, 53, 'x', 4, "0x1.fff8p+12"},
+		{"8191.53125", ToZero, 53, 'x', 2, "0x1.ffp+12"},
+		{"8191.53125", AwayFromZero, 53, 'x', 4, "0x1.fff9p+12"},
+		{"8191.53125", ToNegativeInf, 53, 'x', 4, "0x1.fff8p+12"},
+		{"-8191.53125", ToNegativeInf, 53, 'x', 4, "-0x1.fff9p+12"},
+		{"8191.53125", ToPositiveInf, 53, 'x', 4, "0x1.fff9p+12"},
+		{"-8191.53125", ToPositiveInf, 53, 'x', 4, "-0x1.fff8p+12"},
+
+		// issue 34343
+		{"0x.8p-2147483648", ToNearestEven, 4, 'p', -1, "0x.8p-2147483648"},
+		{"0x.8p-2147483648", ToNearestEven, 4, 'x', -1, "0x1p-2147483649"},
+	} {
+		f, _, err := ParseFloat(test.x, 0, test.prec, ToNearestEven)
+		if err != nil {
+			t.Errorf("%v: %s", test, err)
+			continue
+		}
+		if test.round != defaultRound {
+			f.SetMode(test.round)
+		}
+
+		got := f.Text(test.format, test.digits)
+		if got != test.want {
+			t.Errorf("%v: got %s; want %s", test, got, test.want)
+		}
+
+		// compare with strconv.FormatFloat output if possible
+		// ('p' format is not supported by strconv.FormatFloat,
+		// and its output for 0.0 prints a biased exponent value
+		// as in 0p-1074 which makes no sense to emulate here)
+		if test.prec == 53 && test.format != 'p' && f.Sign() != 0 && (test.round == ToNearestEven || test.round == defaultRound) {
+			f64, acc := f.Float64()
+			if acc != Exact {
+				t.Errorf("%v: expected exact conversion to float64", test)
+				continue
+			}
+			got := strconv.FormatFloat(f64, test.format, test.digits, 64)
+			if got != test.want {
+				t.Errorf("%v: got %s; want %s", test, got, test.want)
+			}
+		}
+	}
+}
+
+func TestFloatFormat(t *testing.T) {
+	for _, test := range []struct {
+		format string
+		value  any // float32, float64, or string (== 512bit *Float)
+		want   string
+	}{
+		// from fmt/fmt_test.go
+		{"%+.3e", 0.0, "+0.000e+00"},
+		{"%+.3e", 1.0, "+1.000e+00"},
+		{"%+.3f", -1.0, "-1.000"},
+		{"%+.3F", -1.0, "-1.000"},
+		{"%+.3F", float32(-1.0), "-1.000"},
+		{"%+07.2f", 1.0, "+001.00"},
+		{"%+07.2f", -1.0, "-001.00"},
+		{"%+10.2f", +1.0, "     +1.00"},
+		{"%+10.2f", -1.0, "     -1.00"},
+		{"% .3E", -1.0, "-1.000E+00"},
+		{"% .3e", 1.0, " 1.000e+00"},
+		{"%+.3g", 0.0, "+0"},
+		{"%+.3g", 1.0, "+1"},
+		{"%+.3g", -1.0, "-1"},
+		{"% .3g", -1.0, "-1"},
+		{"% .3g", 1.0, " 1"},
+		{"%b", float32(1.0), "8388608p-23"},
+		{"%b", 1.0, "4503599627370496p-52"},
+
+		// from fmt/fmt_test.go: old test/fmt_test.go
+		{"%e", 1.0, "1.000000e+00"},
+		{"%e", 1234.5678e3, "1.234568e+06"},
+		{"%e", 1234.5678e-8, "1.234568e-05"},
+		{"%e", -7.0, "-7.000000e+00"},
+		{"%e", -1e-9, "-1.000000e-09"},
+		{"%f", 1234.5678e3, "1234567.800000"},
+		{"%f", 1234.5678e-8, "0.000012"},
+		{"%f", -7.0, "-7.000000"},
+		{"%f", -1e-9, "-0.000000"},
+		{"%g", 1234.5678e3, "1.2345678e+06"},
+		{"%g", float32(1234.5678e3), "1.2345678e+06"},
+		{"%g", 1234.5678e-8, "1.2345678e-05"},
+		{"%g", -7.0, "-7"},
+		{"%g", -1e-9, "-1e-09"},
+		{"%g", float32(-1e-9), "-1e-09"},
+		{"%E", 1.0, "1.000000E+00"},
+		{"%E", 1234.5678e3, "1.234568E+06"},
+		{"%E", 1234.5678e-8, "1.234568E-05"},
+		{"%E", -7.0, "-7.000000E+00"},
+		{"%E", -1e-9, "-1.000000E-09"},
+		{"%G", 1234.5678e3, "1.2345678E+06"},
+		{"%G", float32(1234.5678e3), "1.2345678E+06"},
+		{"%G", 1234.5678e-8, "1.2345678E-05"},
+		{"%G", -7.0, "-7"},
+		{"%G", -1e-9, "-1E-09"},
+		{"%G", float32(-1e-9), "-1E-09"},
+
+		{"%20.6e", 1.2345e3, "        1.234500e+03"},
+		{"%20.6e", 1.2345e-3, "        1.234500e-03"},
+		{"%20e", 1.2345e3, "        1.234500e+03"},
+		{"%20e", 1.2345e-3, "        1.234500e-03"},
+		{"%20.8e", 1.2345e3, "      1.23450000e+03"},
+		{"%20f", 1.23456789e3, "         1234.567890"},
+		{"%20f", 1.23456789e-3, "            0.001235"},
+		{"%20f", 12345678901.23456789, "  12345678901.234568"},
+		{"%-20f", 1.23456789e3, "1234.567890         "},
+		{"%20.8f", 1.23456789e3, "       1234.56789000"},
+		{"%20.8f", 1.23456789e-3, "          0.00123457"},
+		{"%g", 1.23456789e3, "1234.56789"},
+		{"%g", 1.23456789e-3, "0.00123456789"},
+		{"%g", 1.23456789e20, "1.23456789e+20"},
+		{"%20e", math.Inf(1), "                +Inf"},
+		{"%-20f", math.Inf(-1), "-Inf                "},
+
+		// from fmt/fmt_test.go: comparison of padding rules with C printf
+		{"%.2f", 1.0, "1.00"},
+		{"%.2f", -1.0, "-1.00"},
+		{"% .2f", 1.0, " 1.00"},
+		{"% .2f", -1.0, "-1.00"},
+		{"%+.2f", 1.0, "+1.00"},
+		{"%+.2f", -1.0, "-1.00"},
+		{"%7.2f", 1.0, "   1.00"},
+		{"%7.2f", -1.0, "  -1.00"},
+		{"% 7.2f", 1.0, "   1.00"},
+		{"% 7.2f", -1.0, "  -1.00"},
+		{"%+7.2f", 1.0, "  +1.00"},
+		{"%+7.2f", -1.0, "  -1.00"},
+		{"%07.2f", 1.0, "0001.00"},
+		{"%07.2f", -1.0, "-001.00"},
+		{"% 07.2f", 1.0, " 001.00"},
+		{"% 07.2f", -1.0, "-001.00"},
+		{"%+07.2f", 1.0, "+001.00"},
+		{"%+07.2f", -1.0, "-001.00"},
+
+		// from fmt/fmt_test.go: zero padding does not apply to infinities
+		{"%020f", math.Inf(-1), "                -Inf"},
+		{"%020f", math.Inf(+1), "                +Inf"},
+		{"% 020f", math.Inf(-1), "                -Inf"},
+		{"% 020f", math.Inf(+1), "                 Inf"},
+		{"%+020f", math.Inf(-1), "                -Inf"},
+		{"%+020f", math.Inf(+1), "                +Inf"},
+		{"%20f", -1.0, "           -1.000000"},
+
+		// handle %v like %g
+		{"%v", 0.0, "0"},
+		{"%v", -7.0, "-7"},
+		{"%v", -1e-9, "-1e-09"},
+		{"%v", float32(-1e-9), "-1e-09"},
+		{"%010v", 0.0, "0000000000"},
+
+		// *Float cases
+		{"%.20f", "1e-20", "0.00000000000000000001"},
+		{"%.20f", "-1e-20", "-0.00000000000000000001"},
+		{"%30.20f", "-1e-20", "       -0.00000000000000000001"},
+		{"%030.20f", "-1e-20", "-00000000.00000000000000000001"},
+		{"%030.20f", "+1e-20", "000000000.00000000000000000001"},
+		{"% 030.20f", "+1e-20", " 00000000.00000000000000000001"},
+
+		// erroneous formats
+		{"%s", 1.0, "%!s(*big.Float=1)"},
+	} {
+		value := new(Float)
+		switch v := test.value.(type) {
+		case float32:
+			value.SetPrec(24).SetFloat64(float64(v))
+		case float64:
+			value.SetPrec(53).SetFloat64(v)
+		case string:
+			value.SetPrec(512).Parse(v, 0)
+		default:
+			t.Fatalf("unsupported test value: %v (%T)", v, v)
+		}
+
+		if got := fmt.Sprintf(test.format, value); got != test.want {
+			t.Errorf("%v: got %q; want %q", test, got, test.want)
+		}
+	}
+}
+
+func BenchmarkParseFloatSmallExp(b *testing.B) {
+	for i := 0; i < b.N; i++ {
+		for _, s := range []string{
+			"1e0",
+			"1e-1",
+			"1e-2",
+			"1e-3",
+			"1e-4",
+			"1e-5",
+			"1e-10",
+			"1e-20",
+			"1e-50",
+			"1e1",
+			"1e2",
+			"1e3",
+			"1e4",
+			"1e5",
+			"1e10",
+			"1e20",
+			"1e50",
+		} {
+			var x Float
+			_, _, err := x.Parse(s, 0)
+			if err != nil {
+				b.Fatalf("%s: %v", s, err)
+			}
+		}
+	}
+}
+
+func BenchmarkParseFloatLargeExp(b *testing.B) {
+	for i := 0; i < b.N; i++ {
+		for _, s := range []string{
+			"1e0",
+			"1e-10",
+			"1e-20",
+			"1e-30",
+			"1e-40",
+			"1e-50",
+			"1e-100",
+			"1e-500",
+			"1e-1000",
+			"1e-5000",
+			"1e-10000",
+			"1e10",
+			"1e20",
+			"1e30",
+			"1e40",
+			"1e50",
+			"1e100",
+			"1e500",
+			"1e1000",
+			"1e5000",
+			"1e10000",
+		} {
+			var x Float
+			_, _, err := x.Parse(s, 0)
+			if err != nil {
+				b.Fatalf("%s: %v", s, err)
+			}
+		}
+	}
+}
+
+func TestFloatScan(t *testing.T) {
+	var floatScanTests = []struct {
+		input     string
+		format    string
+		output    string
+		remaining int
+		wantErr   bool
+	}{
+		0: {"10.0", "%f", "10", 0, false},
+		1: {"23.98+2.0", "%v", "23.98", 4, false},
+		2: {"-1+1", "%v", "-1", 2, false},
+		3: {" 00000", "%v", "0", 0, false},
+		4: {"-123456p-78", "%b", "-4.084816388e-19", 0, false},
+		5: {"+123", "%b", "123", 0, false},
+		6: {"-1.234e+56", "%e", "-1.234e+56", 0, false},
+		7: {"-1.234E-56", "%E", "-1.234e-56", 0, false},
+		8: {"-1.234e+567", "%g", "-1.234e+567", 0, false},
+		9: {"+1234567891011.234", "%G", "1.234567891e+12", 0, false},
+
+		// Scan doesn't handle ±Inf.
+		10: {"Inf", "%v", "", 3, true},
+		11: {"-Inf", "%v", "", 3, true},
+		12: {"-Inf", "%v", "", 3, true},
+	}
+
+	var buf bytes.Buffer
+	for i, test := range floatScanTests {
+		x := new(Float)
+		buf.Reset()
+		buf.WriteString(test.input)
+		_, err := fmt.Fscanf(&buf, test.format, x)
+		if test.wantErr {
+			if err == nil {
+				t.Errorf("#%d want non-nil err", i)
+			}
+			continue
+		}
+
+		if err != nil {
+			t.Errorf("#%d error: %s", i, err)
+		}
+
+		if x.String() != test.output {
+			t.Errorf("#%d got %s; want %s", i, x.String(), test.output)
+		}
+		if buf.Len() != test.remaining {
+			t.Errorf("#%d got %d bytes remaining; want %d", i, buf.Len(), test.remaining)
+		}
+	}
+}
diff --git a/src/math/big/floatexample_test.go b/src/math/big/floatexample_test.go
new file mode 100644
index 0000000..0c6668c
--- /dev/null
+++ b/src/math/big/floatexample_test.go
@@ -0,0 +1,141 @@
+// Copyright 2015 The Go Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+package big_test
+
+import (
+	"fmt"
+	"math"
+	"math/big"
+)
+
+func ExampleFloat_Add() {
+	// Operate on numbers of different precision.
+	var x, y, z big.Float
+	x.SetInt64(1000)          // x is automatically set to 64bit precision
+	y.SetFloat64(2.718281828) // y is automatically set to 53bit precision
+	z.SetPrec(32)
+	z.Add(&x, &y)
+	fmt.Printf("x = %.10g (%s, prec = %d, acc = %s)\n", &x, x.Text('p', 0), x.Prec(), x.Acc())
+	fmt.Printf("y = %.10g (%s, prec = %d, acc = %s)\n", &y, y.Text('p', 0), y.Prec(), y.Acc())
+	fmt.Printf("z = %.10g (%s, prec = %d, acc = %s)\n", &z, z.Text('p', 0), z.Prec(), z.Acc())
+	// Output:
+	// x = 1000 (0x.fap+10, prec = 64, acc = Exact)
+	// y = 2.718281828 (0x.adf85458248cd8p+2, prec = 53, acc = Exact)
+	// z = 1002.718282 (0x.faadf854p+10, prec = 32, acc = Below)
+}
+
+func ExampleFloat_shift() {
+	// Implement Float "shift" by modifying the (binary) exponents directly.
+	for s := -5; s <= 5; s++ {
+		x := big.NewFloat(0.5)
+		x.SetMantExp(x, x.MantExp(nil)+s) // shift x by s
+		fmt.Println(x)
+	}
+	// Output:
+	// 0.015625
+	// 0.03125
+	// 0.0625
+	// 0.125
+	// 0.25
+	// 0.5
+	// 1
+	// 2
+	// 4
+	// 8
+	// 16
+}
+
+func ExampleFloat_Cmp() {
+	inf := math.Inf(1)
+	zero := 0.0
+
+	operands := []float64{-inf, -1.2, -zero, 0, +1.2, +inf}
+
+	fmt.Println("   x     y  cmp")
+	fmt.Println("---------------")
+	for _, x64 := range operands {
+		x := big.NewFloat(x64)
+		for _, y64 := range operands {
+			y := big.NewFloat(y64)
+			fmt.Printf("%4g  %4g  %3d\n", x, y, x.Cmp(y))
+		}
+		fmt.Println()
+	}
+
+	// Output:
+	//    x     y  cmp
+	// ---------------
+	// -Inf  -Inf    0
+	// -Inf  -1.2   -1
+	// -Inf    -0   -1
+	// -Inf     0   -1
+	// -Inf   1.2   -1
+	// -Inf  +Inf   -1
+	//
+	// -1.2  -Inf    1
+	// -1.2  -1.2    0
+	// -1.2    -0   -1
+	// -1.2     0   -1
+	// -1.2   1.2   -1
+	// -1.2  +Inf   -1
+	//
+	//   -0  -Inf    1
+	//   -0  -1.2    1
+	//   -0    -0    0
+	//   -0     0    0
+	//   -0   1.2   -1
+	//   -0  +Inf   -1
+	//
+	//    0  -Inf    1
+	//    0  -1.2    1
+	//    0    -0    0
+	//    0     0    0
+	//    0   1.2   -1
+	//    0  +Inf   -1
+	//
+	//  1.2  -Inf    1
+	//  1.2  -1.2    1
+	//  1.2    -0    1
+	//  1.2     0    1
+	//  1.2   1.2    0
+	//  1.2  +Inf   -1
+	//
+	// +Inf  -Inf    1
+	// +Inf  -1.2    1
+	// +Inf    -0    1
+	// +Inf     0    1
+	// +Inf   1.2    1
+	// +Inf  +Inf    0
+}
+
+func ExampleRoundingMode() {
+	operands := []float64{2.6, 2.5, 2.1, -2.1, -2.5, -2.6}
+
+	fmt.Print("   x")
+	for mode := big.ToNearestEven; mode <= big.ToPositiveInf; mode++ {
+		fmt.Printf("  %s", mode)
+	}
+	fmt.Println()
+
+	for _, f64 := range operands {
+		fmt.Printf("%4g", f64)
+		for mode := big.ToNearestEven; mode <= big.ToPositiveInf; mode++ {
+			// sample operands above require 2 bits to represent mantissa
+			// set binary precision to 2 to round them to integer values
+			f := new(big.Float).SetPrec(2).SetMode(mode).SetFloat64(f64)
+			fmt.Printf("  %*g", len(mode.String()), f)
+		}
+		fmt.Println()
+	}
+
+	// Output:
+	//    x  ToNearestEven  ToNearestAway  ToZero  AwayFromZero  ToNegativeInf  ToPositiveInf
+	//  2.6              3              3       2             3              2              3
+	//  2.5              2              3       2             3              2              3
+	//  2.1              2              2       2             3              2              3
+	// -2.1             -2             -2      -2            -3             -3             -2
+	// -2.5             -2             -3      -2            -3             -3             -2
+	// -2.6             -3             -3      -2            -3             -3             -2
+}
diff --git a/src/math/big/floatmarsh.go b/src/math/big/floatmarsh.go
new file mode 100644
index 0000000..2a78c69
--- /dev/null
+++ b/src/math/big/floatmarsh.go
@@ -0,0 +1,131 @@
+// Copyright 2015 The Go Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+// This file implements encoding/decoding of Floats.
+
+package big
+
+import (
+	"encoding/binary"
+	"errors"
+	"fmt"
+)
+
+// Gob codec version. Permits backward-compatible changes to the encoding.
+const floatGobVersion byte = 1
+
+// GobEncode implements the gob.GobEncoder interface.
+// The Float value and all its attributes (precision,
+// rounding mode, accuracy) are marshaled.
+func (x *Float) GobEncode() ([]byte, error) {
+	if x == nil {
+		return nil, nil
+	}
+
+	// determine max. space (bytes) required for encoding
+	sz := 1 + 1 + 4 // version + mode|acc|form|neg (3+2+2+1bit) + prec
+	n := 0          // number of mantissa words
+	if x.form == finite {
+		// add space for mantissa and exponent
+		n = int((x.prec + (_W - 1)) / _W) // required mantissa length in words for given precision
+		// actual mantissa slice could be shorter (trailing 0's) or longer (unused bits):
+		// - if shorter, only encode the words present
+		// - if longer, cut off unused words when encoding in bytes
+		//   (in practice, this should never happen since rounding
+		//   takes care of it, but be safe and do it always)
+		if len(x.mant) < n {
+			n = len(x.mant)
+		}
+		// len(x.mant) >= n
+		sz += 4 + n*_S // exp + mant
+	}
+	buf := make([]byte, sz)
+
+	buf[0] = floatGobVersion
+	b := byte(x.mode&7)<<5 | byte((x.acc+1)&3)<<3 | byte(x.form&3)<<1
+	if x.neg {
+		b |= 1
+	}
+	buf[1] = b
+	binary.BigEndian.PutUint32(buf[2:], x.prec)
+
+	if x.form == finite {
+		binary.BigEndian.PutUint32(buf[6:], uint32(x.exp))
+		x.mant[len(x.mant)-n:].bytes(buf[10:]) // cut off unused trailing words
+	}
+
+	return buf, nil
+}
+
+// GobDecode implements the gob.GobDecoder interface.
+// The result is rounded per the precision and rounding mode of
+// z unless z's precision is 0, in which case z is set exactly
+// to the decoded value.
+func (z *Float) GobDecode(buf []byte) error {
+	if len(buf) == 0 {
+		// Other side sent a nil or default value.
+		*z = Float{}
+		return nil
+	}
+	if len(buf) < 6 {
+		return errors.New("Float.GobDecode: buffer too small")
+	}
+
+	if buf[0] != floatGobVersion {
+		return fmt.Errorf("Float.GobDecode: encoding version %d not supported", buf[0])
+	}
+
+	oldPrec := z.prec
+	oldMode := z.mode
+
+	b := buf[1]
+	z.mode = RoundingMode((b >> 5) & 7)
+	z.acc = Accuracy((b>>3)&3) - 1
+	z.form = form((b >> 1) & 3)
+	z.neg = b&1 != 0
+	z.prec = binary.BigEndian.Uint32(buf[2:])
+
+	if z.form == finite {
+		if len(buf) < 10 {
+			return errors.New("Float.GobDecode: buffer too small for finite form float")
+		}
+		z.exp = int32(binary.BigEndian.Uint32(buf[6:]))
+		z.mant = z.mant.setBytes(buf[10:])
+	}
+
+	if oldPrec != 0 {
+		z.mode = oldMode
+		z.SetPrec(uint(oldPrec))
+	}
+
+	if msg := z.validate0(); msg != "" {
+		return errors.New("Float.GobDecode: " + msg)
+	}
+
+	return nil
+}
+
+// MarshalText implements the encoding.TextMarshaler interface.
+// Only the Float value is marshaled (in full precision), other
+// attributes such as precision or accuracy are ignored.
+func (x *Float) MarshalText() (text []byte, err error) {
+	if x == nil {
+		return []byte("<nil>"), nil
+	}
+	var buf []byte
+	return x.Append(buf, 'g', -1), nil
+}
+
+// UnmarshalText implements the encoding.TextUnmarshaler interface.
+// The result is rounded per the precision and rounding mode of z.
+// If z's precision is 0, it is changed to 64 before rounding takes
+// effect.
+func (z *Float) UnmarshalText(text []byte) error {
+	// TODO(gri): get rid of the []byte/string conversion
+	_, _, err := z.Parse(string(text), 0)
+	if err != nil {
+		err = fmt.Errorf("math/big: cannot unmarshal %q into a *big.Float (%v)", text, err)
+	}
+	return err
+}
diff --git a/src/math/big/floatmarsh_test.go b/src/math/big/floatmarsh_test.go
new file mode 100644
index 0000000..20def68
--- /dev/null
+++ b/src/math/big/floatmarsh_test.go
@@ -0,0 +1,173 @@
+// Copyright 2015 The Go Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+package big
+
+import (
+	"bytes"
+	"encoding/gob"
+	"encoding/json"
+	"io"
+	"strings"
+	"testing"
+)
+
+var floatVals = []string{
+	"0",
+	"1",
+	"0.1",
+	"2.71828",
+	"1234567890",
+	"3.14e1234",
+	"3.14e-1234",
+	"0.738957395793475734757349579759957975985497e100",
+	"0.73895739579347546656564656573475734957975995797598589749859834759476745986795497e100",
+	"inf",
+	"Inf",
+}
+
+func TestFloatGobEncoding(t *testing.T) {
+	var medium bytes.Buffer
+	enc := gob.NewEncoder(&medium)
+	dec := gob.NewDecoder(&medium)
+	for _, test := range floatVals {
+		for _, sign := range []string{"", "+", "-"} {
+			for _, prec := range []uint{0, 1, 2, 10, 53, 64, 100, 1000} {
+				for _, mode := range []RoundingMode{ToNearestEven, ToNearestAway, ToZero, AwayFromZero, ToNegativeInf, ToPositiveInf} {
+					medium.Reset() // empty buffer for each test case (in case of failures)
+					x := sign + test
+
+					var tx Float
+					_, _, err := tx.SetPrec(prec).SetMode(mode).Parse(x, 0)
+					if err != nil {
+						t.Errorf("parsing of %s (%dbits, %v) failed (invalid test case): %v", x, prec, mode, err)
+						continue
+					}
+
+					// If tx was set to prec == 0, tx.Parse(x, 0) assumes precision 64. Correct it.
+					if prec == 0 {
+						tx.SetPrec(0)
+					}
+
+					if err := enc.Encode(&tx); err != nil {
+						t.Errorf("encoding of %v (%dbits, %v) failed: %v", &tx, prec, mode, err)
+						continue
+					}
+
+					var rx Float
+					if err := dec.Decode(&rx); err != nil {
+						t.Errorf("decoding of %v (%dbits, %v) failed: %v", &tx, prec, mode, err)
+						continue
+					}
+
+					if rx.Cmp(&tx) != 0 {
+						t.Errorf("transmission of %s failed: got %s want %s", x, rx.String(), tx.String())
+						continue
+					}
+
+					if rx.Prec() != prec {
+						t.Errorf("transmission of %s's prec failed: got %d want %d", x, rx.Prec(), prec)
+					}
+
+					if rx.Mode() != mode {
+						t.Errorf("transmission of %s's mode failed: got %s want %s", x, rx.Mode(), mode)
+					}
+
+					if rx.Acc() != tx.Acc() {
+						t.Errorf("transmission of %s's accuracy failed: got %s want %s", x, rx.Acc(), tx.Acc())
+					}
+				}
+			}
+		}
+	}
+}
+
+func TestFloatCorruptGob(t *testing.T) {
+	var buf bytes.Buffer
+	tx := NewFloat(4 / 3).SetPrec(1000).SetMode(ToPositiveInf)
+	if err := gob.NewEncoder(&buf).Encode(tx); err != nil {
+		t.Fatal(err)
+	}
+	b := buf.Bytes()
+
+	var rx Float
+	if err := gob.NewDecoder(bytes.NewReader(b)).Decode(&rx); err != nil {
+		t.Fatal(err)
+	}
+
+	if err := gob.NewDecoder(bytes.NewReader(b[:10])).Decode(&rx); err != io.ErrUnexpectedEOF {
+		t.Errorf("got %v want EOF", err)
+	}
+
+	b[1] = 0
+	if err := gob.NewDecoder(bytes.NewReader(b)).Decode(&rx); err == nil {
+		t.Fatal("got nil want version error")
+	}
+}
+
+func TestFloatJSONEncoding(t *testing.T) {
+	for _, test := range floatVals {
+		for _, sign := range []string{"", "+", "-"} {
+			for _, prec := range []uint{0, 1, 2, 10, 53, 64, 100, 1000} {
+				if prec > 53 && testing.Short() {
+					continue
+				}
+				x := sign + test
+				var tx Float
+				_, _, err := tx.SetPrec(prec).Parse(x, 0)
+				if err != nil {
+					t.Errorf("parsing of %s (prec = %d) failed (invalid test case): %v", x, prec, err)
+					continue
+				}
+				b, err := json.Marshal(&tx)
+				if err != nil {
+					t.Errorf("marshaling of %v (prec = %d) failed: %v", &tx, prec, err)
+					continue
+				}
+				var rx Float
+				rx.SetPrec(prec)
+				if err := json.Unmarshal(b, &rx); err != nil {
+					t.Errorf("unmarshaling of %v (prec = %d) failed: %v", &tx, prec, err)
+					continue
+				}
+				if rx.Cmp(&tx) != 0 {
+					t.Errorf("JSON encoding of %v (prec = %d) failed: got %v want %v", &tx, prec, &rx, &tx)
+				}
+			}
+		}
+	}
+}
+
+func TestFloatGobDecodeShortBuffer(t *testing.T) {
+	for _, tc := range [][]byte{
+		[]byte{0x1, 0x0, 0x0, 0x0},
+		[]byte{0x1, 0xfa, 0x0, 0x0, 0x0, 0x0},
+	} {
+		err := NewFloat(0).GobDecode(tc)
+		if err == nil {
+			t.Error("expected GobDecode to return error for malformed input")
+		}
+	}
+}
+
+func TestFloatGobDecodeInvalid(t *testing.T) {
+	for _, tc := range []struct {
+		buf []byte
+		msg string
+	}{
+		{
+			[]byte{0x1, 0x2a, 0x20, 0x20, 0x20, 0x20, 0x0, 0x20, 0x20, 0x20, 0x0, 0x20, 0x20, 0x20, 0x20, 0x0, 0x0, 0x0, 0x0, 0xc},
+			"Float.GobDecode: msb not set in last word",
+		},
+		{
+			[]byte{1, 2, 0, 0, 0, 0, 0, 0, 0, 0, 0},
+			"Float.GobDecode: nonzero finite number with empty mantissa",
+		},
+	} {
+		err := NewFloat(0).GobDecode(tc.buf)
+		if err == nil || !strings.HasPrefix(err.Error(), tc.msg) {
+			t.Errorf("expected GobDecode error prefix: %s, got: %v", tc.msg, err)
+		}
+	}
+}
diff --git a/src/math/big/ftoa.go b/src/math/big/ftoa.go
new file mode 100644
index 0000000..5506e6e
--- /dev/null
+++ b/src/math/big/ftoa.go
@@ -0,0 +1,536 @@
+// Copyright 2015 The Go Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+// This file implements Float-to-string conversion functions.
+// It is closely following the corresponding implementation
+// in strconv/ftoa.go, but modified and simplified for Float.
+
+package big
+
+import (
+	"bytes"
+	"fmt"
+	"strconv"
+)
+
+// Text converts the floating-point number x to a string according
+// to the given format and precision prec. The format is one of:
+//
+//	'e'	-d.dddde±dd, decimal exponent, at least two (possibly 0) exponent digits
+//	'E'	-d.ddddE±dd, decimal exponent, at least two (possibly 0) exponent digits
+//	'f'	-ddddd.dddd, no exponent
+//	'g'	like 'e' for large exponents, like 'f' otherwise
+//	'G'	like 'E' for large exponents, like 'f' otherwise
+//	'x'	-0xd.dddddp±dd, hexadecimal mantissa, decimal power of two exponent
+//	'p'	-0x.dddp±dd, hexadecimal mantissa, decimal power of two exponent (non-standard)
+//	'b'	-ddddddp±dd, decimal mantissa, decimal power of two exponent (non-standard)
+//
+// For the power-of-two exponent formats, the mantissa is printed in normalized form:
+//
+//	'x'	hexadecimal mantissa in [1, 2), or 0
+//	'p'	hexadecimal mantissa in [½, 1), or 0
+//	'b'	decimal integer mantissa using x.Prec() bits, or 0
+//
+// Note that the 'x' form is the one used by most other languages and libraries.
+//
+// If format is a different character, Text returns a "%" followed by the
+// unrecognized format character.
+//
+// The precision prec controls the number of digits (excluding the exponent)
+// printed by the 'e', 'E', 'f', 'g', 'G', and 'x' formats.
+// For 'e', 'E', 'f', and 'x', it is the number of digits after the decimal point.
+// For 'g' and 'G' it is the total number of digits. A negative precision selects
+// the smallest number of decimal digits necessary to identify the value x uniquely
+// using x.Prec() mantissa bits.
+// The prec value is ignored for the 'b' and 'p' formats.
+func (x *Float) Text(format byte, prec int) string {
+	cap := 10 // TODO(gri) determine a good/better value here
+	if prec > 0 {
+		cap += prec
+	}
+	return string(x.Append(make([]byte, 0, cap), format, prec))
+}
+
+// String formats x like x.Text('g', 10).
+// (String must be called explicitly, Float.Format does not support %s verb.)
+func (x *Float) String() string {
+	return x.Text('g', 10)
+}
+
+// Append appends to buf the string form of the floating-point number x,
+// as generated by x.Text, and returns the extended buffer.
+func (x *Float) Append(buf []byte, fmt byte, prec int) []byte {
+	// sign
+	if x.neg {
+		buf = append(buf, '-')
+	}
+
+	// Inf
+	if x.form == inf {
+		if !x.neg {
+			buf = append(buf, '+')
+		}
+		return append(buf, "Inf"...)
+	}
+
+	// pick off easy formats
+	switch fmt {
+	case 'b':
+		return x.fmtB(buf)
+	case 'p':
+		return x.fmtP(buf)
+	case 'x':
+		return x.fmtX(buf, prec)
+	}
+
+	// Algorithm:
+	//   1) convert Float to multiprecision decimal
+	//   2) round to desired precision
+	//   3) read digits out and format
+
+	// 1) convert Float to multiprecision decimal
+	var d decimal // == 0.0
+	if x.form == finite {
+		// x != 0
+		d.init(x.mant, int(x.exp)-x.mant.bitLen())
+	}
+
+	// 2) round to desired precision
+	shortest := false
+	if prec < 0 {
+		shortest = true
+		roundShortest(&d, x)
+		// Precision for shortest representation mode.
+		switch fmt {
+		case 'e', 'E':
+			prec = len(d.mant) - 1
+		case 'f':
+			prec = max(len(d.mant)-d.exp, 0)
+		case 'g', 'G':
+			prec = len(d.mant)
+		}
+	} else {
+		// round appropriately
+		switch fmt {
+		case 'e', 'E':
+			// one digit before and number of digits after decimal point
+			d.round(1 + prec)
+		case 'f':
+			// number of digits before and after decimal point
+			d.round(d.exp + prec)
+		case 'g', 'G':
+			if prec == 0 {
+				prec = 1
+			}
+			d.round(prec)
+		}
+	}
+
+	// 3) read digits out and format
+	switch fmt {
+	case 'e', 'E':
+		return fmtE(buf, fmt, prec, d)
+	case 'f':
+		return fmtF(buf, prec, d)
+	case 'g', 'G':
+		// trim trailing fractional zeros in %e format
+		eprec := prec
+		if eprec > len(d.mant) && len(d.mant) >= d.exp {
+			eprec = len(d.mant)
+		}
+		// %e is used if the exponent from the conversion
+		// is less than -4 or greater than or equal to the precision.
+		// If precision was the shortest possible, use eprec = 6 for
+		// this decision.
+		if shortest {
+			eprec = 6
+		}
+		exp := d.exp - 1
+		if exp < -4 || exp >= eprec {
+			if prec > len(d.mant) {
+				prec = len(d.mant)
+			}
+			return fmtE(buf, fmt+'e'-'g', prec-1, d)
+		}
+		if prec > d.exp {
+			prec = len(d.mant)
+		}
+		return fmtF(buf, max(prec-d.exp, 0), d)
+	}
+
+	// unknown format
+	if x.neg {
+		buf = buf[:len(buf)-1] // sign was added prematurely - remove it again
+	}
+	return append(buf, '%', fmt)
+}
+
+func roundShortest(d *decimal, x *Float) {
+	// if the mantissa is zero, the number is zero - stop now
+	if len(d.mant) == 0 {
+		return
+	}
+
+	// Approach: All numbers in the interval [x - 1/2ulp, x + 1/2ulp]
+	// (possibly exclusive) round to x for the given precision of x.
+	// Compute the lower and upper bound in decimal form and find the
+	// shortest decimal number d such that lower <= d <= upper.
+
+	// TODO(gri) strconv/ftoa.do describes a shortcut in some cases.
+	// See if we can use it (in adjusted form) here as well.
+
+	// 1) Compute normalized mantissa mant and exponent exp for x such
+	// that the lsb of mant corresponds to 1/2 ulp for the precision of
+	// x (i.e., for mant we want x.prec + 1 bits).
+	mant := nat(nil).set(x.mant)
+	exp := int(x.exp) - mant.bitLen()
+	s := mant.bitLen() - int(x.prec+1)
+	switch {
+	case s < 0:
+		mant = mant.shl(mant, uint(-s))
+	case s > 0:
+		mant = mant.shr(mant, uint(+s))
+	}
+	exp += s
+	// x = mant * 2**exp with lsb(mant) == 1/2 ulp of x.prec
+
+	// 2) Compute lower bound by subtracting 1/2 ulp.
+	var lower decimal
+	var tmp nat
+	lower.init(tmp.sub(mant, natOne), exp)
+
+	// 3) Compute upper bound by adding 1/2 ulp.
+	var upper decimal
+	upper.init(tmp.add(mant, natOne), exp)
+
+	// The upper and lower bounds are possible outputs only if
+	// the original mantissa is even, so that ToNearestEven rounding
+	// would round to the original mantissa and not the neighbors.
+	inclusive := mant[0]&2 == 0 // test bit 1 since original mantissa was shifted by 1
+
+	// Now we can figure out the minimum number of digits required.
+	// Walk along until d has distinguished itself from upper and lower.
+	for i, m := range d.mant {
+		l := lower.at(i)
+		u := upper.at(i)
+
+		// Okay to round down (truncate) if lower has a different digit
+		// or if lower is inclusive and is exactly the result of rounding
+		// down (i.e., and we have reached the final digit of lower).
+		okdown := l != m || inclusive && i+1 == len(lower.mant)
+
+		// Okay to round up if upper has a different digit and either upper
+		// is inclusive or upper is bigger than the result of rounding up.
+		okup := m != u && (inclusive || m+1 < u || i+1 < len(upper.mant))
+
+		// If it's okay to do either, then round to the nearest one.
+		// If it's okay to do only one, do it.
+		switch {
+		case okdown && okup:
+			d.round(i + 1)
+			return
+		case okdown:
+			d.roundDown(i + 1)
+			return
+		case okup:
+			d.roundUp(i + 1)
+			return
+		}
+	}
+}
+
+// %e: d.ddddde±dd
+func fmtE(buf []byte, fmt byte, prec int, d decimal) []byte {
+	// first digit
+	ch := byte('0')
+	if len(d.mant) > 0 {
+		ch = d.mant[0]
+	}
+	buf = append(buf, ch)
+
+	// .moredigits
+	if prec > 0 {
+		buf = append(buf, '.')
+		i := 1
+		m := min(len(d.mant), prec+1)
+		if i < m {
+			buf = append(buf, d.mant[i:m]...)
+			i = m
+		}
+		for ; i <= prec; i++ {
+			buf = append(buf, '0')
+		}
+	}
+
+	// e±
+	buf = append(buf, fmt)
+	var exp int64
+	if len(d.mant) > 0 {
+		exp = int64(d.exp) - 1 // -1 because first digit was printed before '.'
+	}
+	if exp < 0 {
+		ch = '-'
+		exp = -exp
+	} else {
+		ch = '+'
+	}
+	buf = append(buf, ch)
+
+	// dd...d
+	if exp < 10 {
+		buf = append(buf, '0') // at least 2 exponent digits
+	}
+	return strconv.AppendInt(buf, exp, 10)
+}
+
+// %f: ddddddd.ddddd
+func fmtF(buf []byte, prec int, d decimal) []byte {
+	// integer, padded with zeros as needed
+	if d.exp > 0 {
+		m := min(len(d.mant), d.exp)
+		buf = append(buf, d.mant[:m]...)
+		for ; m < d.exp; m++ {
+			buf = append(buf, '0')
+		}
+	} else {
+		buf = append(buf, '0')
+	}
+
+	// fraction
+	if prec > 0 {
+		buf = append(buf, '.')
+		for i := 0; i < prec; i++ {
+			buf = append(buf, d.at(d.exp+i))
+		}
+	}
+
+	return buf
+}
+
+// fmtB appends the string of x in the format mantissa "p" exponent
+// with a decimal mantissa and a binary exponent, or 0" if x is zero,
+// and returns the extended buffer.
+// The mantissa is normalized such that is uses x.Prec() bits in binary
+// representation.
+// The sign of x is ignored, and x must not be an Inf.
+// (The caller handles Inf before invoking fmtB.)
+func (x *Float) fmtB(buf []byte) []byte {
+	if x.form == zero {
+		return append(buf, '0')
+	}
+
+	if debugFloat && x.form != finite {
+		panic("non-finite float")
+	}
+	// x != 0
+
+	// adjust mantissa to use exactly x.prec bits
+	m := x.mant
+	switch w := uint32(len(x.mant)) * _W; {
+	case w < x.prec:
+		m = nat(nil).shl(m, uint(x.prec-w))
+	case w > x.prec:
+		m = nat(nil).shr(m, uint(w-x.prec))
+	}
+
+	buf = append(buf, m.utoa(10)...)
+	buf = append(buf, 'p')
+	e := int64(x.exp) - int64(x.prec)
+	if e >= 0 {
+		buf = append(buf, '+')
+	}
+	return strconv.AppendInt(buf, e, 10)
+}
+
+// fmtX appends the string of x in the format "0x1." mantissa "p" exponent
+// with a hexadecimal mantissa and a binary exponent, or "0x0p0" if x is zero,
+// and returns the extended buffer.
+// A non-zero mantissa is normalized such that 1.0 <= mantissa < 2.0.
+// The sign of x is ignored, and x must not be an Inf.
+// (The caller handles Inf before invoking fmtX.)
+func (x *Float) fmtX(buf []byte, prec int) []byte {
+	if x.form == zero {
+		buf = append(buf, "0x0"...)
+		if prec > 0 {
+			buf = append(buf, '.')
+			for i := 0; i < prec; i++ {
+				buf = append(buf, '0')
+			}
+		}
+		buf = append(buf, "p+00"...)
+		return buf
+	}
+
+	if debugFloat && x.form != finite {
+		panic("non-finite float")
+	}
+
+	// round mantissa to n bits
+	var n uint
+	if prec < 0 {
+		n = 1 + (x.MinPrec()-1+3)/4*4 // round MinPrec up to 1 mod 4
+	} else {
+		n = 1 + 4*uint(prec)
+	}
+	// n%4 == 1
+	x = new(Float).SetPrec(n).SetMode(x.mode).Set(x)
+
+	// adjust mantissa to use exactly n bits
+	m := x.mant
+	switch w := uint(len(x.mant)) * _W; {
+	case w < n:
+		m = nat(nil).shl(m, n-w)
+	case w > n:
+		m = nat(nil).shr(m, w-n)
+	}
+	exp64 := int64(x.exp) - 1 // avoid wrap-around
+
+	hm := m.utoa(16)
+	if debugFloat && hm[0] != '1' {
+		panic("incorrect mantissa: " + string(hm))
+	}
+	buf = append(buf, "0x1"...)
+	if len(hm) > 1 {
+		buf = append(buf, '.')
+		buf = append(buf, hm[1:]...)
+	}
+
+	buf = append(buf, 'p')
+	if exp64 >= 0 {
+		buf = append(buf, '+')
+	} else {
+		exp64 = -exp64
+		buf = append(buf, '-')
+	}
+	// Force at least two exponent digits, to match fmt.
+	if exp64 < 10 {
+		buf = append(buf, '0')
+	}
+	return strconv.AppendInt(buf, exp64, 10)
+}
+
+// fmtP appends the string of x in the format "0x." mantissa "p" exponent
+// with a hexadecimal mantissa and a binary exponent, or "0" if x is zero,
+// and returns the extended buffer.
+// The mantissa is normalized such that 0.5 <= 0.mantissa < 1.0.
+// The sign of x is ignored, and x must not be an Inf.
+// (The caller handles Inf before invoking fmtP.)
+func (x *Float) fmtP(buf []byte) []byte {
+	if x.form == zero {
+		return append(buf, '0')
+	}
+
+	if debugFloat && x.form != finite {
+		panic("non-finite float")
+	}
+	// x != 0
+
+	// remove trailing 0 words early
+	// (no need to convert to hex 0's and trim later)
+	m := x.mant
+	i := 0
+	for i < len(m) && m[i] == 0 {
+		i++
+	}
+	m = m[i:]
+
+	buf = append(buf, "0x."...)
+	buf = append(buf, bytes.TrimRight(m.utoa(16), "0")...)
+	buf = append(buf, 'p')
+	if x.exp >= 0 {
+		buf = append(buf, '+')
+	}
+	return strconv.AppendInt(buf, int64(x.exp), 10)
+}
+
+func min(x, y int) int {
+	if x < y {
+		return x
+	}
+	return y
+}
+
+var _ fmt.Formatter = &floatZero // *Float must implement fmt.Formatter
+
+// Format implements fmt.Formatter. It accepts all the regular
+// formats for floating-point numbers ('b', 'e', 'E', 'f', 'F',
+// 'g', 'G', 'x') as well as 'p' and 'v'. See (*Float).Text for the
+// interpretation of 'p'. The 'v' format is handled like 'g'.
+// Format also supports specification of the minimum precision
+// in digits, the output field width, as well as the format flags
+// '+' and ' ' for sign control, '0' for space or zero padding,
+// and '-' for left or right justification. See the fmt package
+// for details.
+func (x *Float) Format(s fmt.State, format rune) {
+	prec, hasPrec := s.Precision()
+	if !hasPrec {
+		prec = 6 // default precision for 'e', 'f'
+	}
+
+	switch format {
+	case 'e', 'E', 'f', 'b', 'p', 'x':
+		// nothing to do
+	case 'F':
+		// (*Float).Text doesn't support 'F'; handle like 'f'
+		format = 'f'
+	case 'v':
+		// handle like 'g'
+		format = 'g'
+		fallthrough
+	case 'g', 'G':
+		if !hasPrec {
+			prec = -1 // default precision for 'g', 'G'
+		}
+	default:
+		fmt.Fprintf(s, "%%!%c(*big.Float=%s)", format, x.String())
+		return
+	}
+	var buf []byte
+	buf = x.Append(buf, byte(format), prec)
+	if len(buf) == 0 {
+		buf = []byte("?") // should never happen, but don't crash
+	}
+	// len(buf) > 0
+
+	var sign string
+	switch {
+	case buf[0] == '-':
+		sign = "-"
+		buf = buf[1:]
+	case buf[0] == '+':
+		// +Inf
+		sign = "+"
+		if s.Flag(' ') {
+			sign = " "
+		}
+		buf = buf[1:]
+	case s.Flag('+'):
+		sign = "+"
+	case s.Flag(' '):
+		sign = " "
+	}
+
+	var padding int
+	if width, hasWidth := s.Width(); hasWidth && width > len(sign)+len(buf) {
+		padding = width - len(sign) - len(buf)
+	}
+
+	switch {
+	case s.Flag('0') && !x.IsInf():
+		// 0-padding on left
+		writeMultiple(s, sign, 1)
+		writeMultiple(s, "0", padding)
+		s.Write(buf)
+	case s.Flag('-'):
+		// padding on right
+		writeMultiple(s, sign, 1)
+		s.Write(buf)
+		writeMultiple(s, " ", padding)
+	default:
+		// padding on left
+		writeMultiple(s, " ", padding)
+		writeMultiple(s, sign, 1)
+		s.Write(buf)
+	}
+}
diff --git a/src/math/big/gcd_test.go b/src/math/big/gcd_test.go
new file mode 100644
index 0000000..3cca2ec
--- /dev/null
+++ b/src/math/big/gcd_test.go
@@ -0,0 +1,64 @@
+// Copyright 2012 The Go Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+// This file implements a GCD benchmark.
+// Usage: go test math/big -test.bench GCD
+
+package big
+
+import (
+	"math/rand"
+	"testing"
+)
+
+// randInt returns a pseudo-random Int in the range [1<<(size-1), (1<<size) - 1]
+func randInt(r *rand.Rand, size uint) *Int {
+	n := new(Int).Lsh(intOne, size-1)
+	x := new(Int).Rand(r, n)
+	return x.Add(x, n) // make sure result > 1<<(size-1)
+}
+
+func runGCD(b *testing.B, aSize, bSize uint) {
+	if isRaceBuilder && (aSize > 1000 || bSize > 1000) {
+		b.Skip("skipping on race builder")
+	}
+	b.Run("WithoutXY", func(b *testing.B) {
+		runGCDExt(b, aSize, bSize, false)
+	})
+	b.Run("WithXY", func(b *testing.B) {
+		runGCDExt(b, aSize, bSize, true)
+	})
+}
+
+func runGCDExt(b *testing.B, aSize, bSize uint, calcXY bool) {
+	b.StopTimer()
+	var r = rand.New(rand.NewSource(1234))
+	aa := randInt(r, aSize)
+	bb := randInt(r, bSize)
+	var x, y *Int
+	if calcXY {
+		x = new(Int)
+		y = new(Int)
+	}
+	b.StartTimer()
+	for i := 0; i < b.N; i++ {
+		new(Int).GCD(x, y, aa, bb)
+	}
+}
+
+func BenchmarkGCD10x10(b *testing.B)         { runGCD(b, 10, 10) }
+func BenchmarkGCD10x100(b *testing.B)        { runGCD(b, 10, 100) }
+func BenchmarkGCD10x1000(b *testing.B)       { runGCD(b, 10, 1000) }
+func BenchmarkGCD10x10000(b *testing.B)      { runGCD(b, 10, 10000) }
+func BenchmarkGCD10x100000(b *testing.B)     { runGCD(b, 10, 100000) }
+func BenchmarkGCD100x100(b *testing.B)       { runGCD(b, 100, 100) }
+func BenchmarkGCD100x1000(b *testing.B)      { runGCD(b, 100, 1000) }
+func BenchmarkGCD100x10000(b *testing.B)     { runGCD(b, 100, 10000) }
+func BenchmarkGCD100x100000(b *testing.B)    { runGCD(b, 100, 100000) }
+func BenchmarkGCD1000x1000(b *testing.B)     { runGCD(b, 1000, 1000) }
+func BenchmarkGCD1000x10000(b *testing.B)    { runGCD(b, 1000, 10000) }
+func BenchmarkGCD1000x100000(b *testing.B)   { runGCD(b, 1000, 100000) }
+func BenchmarkGCD10000x10000(b *testing.B)   { runGCD(b, 10000, 10000) }
+func BenchmarkGCD10000x100000(b *testing.B)  { runGCD(b, 10000, 100000) }
+func BenchmarkGCD100000x100000(b *testing.B) { runGCD(b, 100000, 100000) }
diff --git a/src/math/big/hilbert_test.go b/src/math/big/hilbert_test.go
new file mode 100644
index 0000000..1a84341
--- /dev/null
+++ b/src/math/big/hilbert_test.go
@@ -0,0 +1,160 @@
+// Copyright 2009 The Go Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+// A little test program and benchmark for rational arithmetics.
+// Computes a Hilbert matrix, its inverse, multiplies them
+// and verifies that the product is the identity matrix.
+
+package big
+
+import (
+	"fmt"
+	"testing"
+)
+
+type matrix struct {
+	n, m int
+	a    []*Rat
+}
+
+func (a *matrix) at(i, j int) *Rat {
+	if !(0 <= i && i < a.n && 0 <= j && j < a.m) {
+		panic("index out of range")
+	}
+	return a.a[i*a.m+j]
+}
+
+func (a *matrix) set(i, j int, x *Rat) {
+	if !(0 <= i && i < a.n && 0 <= j && j < a.m) {
+		panic("index out of range")
+	}
+	a.a[i*a.m+j] = x
+}
+
+func newMatrix(n, m int) *matrix {
+	if !(0 <= n && 0 <= m) {
+		panic("illegal matrix")
+	}
+	a := new(matrix)
+	a.n = n
+	a.m = m
+	a.a = make([]*Rat, n*m)
+	return a
+}
+
+func newUnit(n int) *matrix {
+	a := newMatrix(n, n)
+	for i := 0; i < n; i++ {
+		for j := 0; j < n; j++ {
+			x := NewRat(0, 1)
+			if i == j {
+				x.SetInt64(1)
+			}
+			a.set(i, j, x)
+		}
+	}
+	return a
+}
+
+func newHilbert(n int) *matrix {
+	a := newMatrix(n, n)
+	for i := 0; i < n; i++ {
+		for j := 0; j < n; j++ {
+			a.set(i, j, NewRat(1, int64(i+j+1)))
+		}
+	}
+	return a
+}
+
+func newInverseHilbert(n int) *matrix {
+	a := newMatrix(n, n)
+	for i := 0; i < n; i++ {
+		for j := 0; j < n; j++ {
+			x1 := new(Rat).SetInt64(int64(i + j + 1))
+			x2 := new(Rat).SetInt(new(Int).Binomial(int64(n+i), int64(n-j-1)))
+			x3 := new(Rat).SetInt(new(Int).Binomial(int64(n+j), int64(n-i-1)))
+			x4 := new(Rat).SetInt(new(Int).Binomial(int64(i+j), int64(i)))
+
+			x1.Mul(x1, x2)
+			x1.Mul(x1, x3)
+			x1.Mul(x1, x4)
+			x1.Mul(x1, x4)
+
+			if (i+j)&1 != 0 {
+				x1.Neg(x1)
+			}
+
+			a.set(i, j, x1)
+		}
+	}
+	return a
+}
+
+func (a *matrix) mul(b *matrix) *matrix {
+	if a.m != b.n {
+		panic("illegal matrix multiply")
+	}
+	c := newMatrix(a.n, b.m)
+	for i := 0; i < c.n; i++ {
+		for j := 0; j < c.m; j++ {
+			x := NewRat(0, 1)
+			for k := 0; k < a.m; k++ {
+				x.Add(x, new(Rat).Mul(a.at(i, k), b.at(k, j)))
+			}
+			c.set(i, j, x)
+		}
+	}
+	return c
+}
+
+func (a *matrix) eql(b *matrix) bool {
+	if a.n != b.n || a.m != b.m {
+		return false
+	}
+	for i := 0; i < a.n; i++ {
+		for j := 0; j < a.m; j++ {
+			if a.at(i, j).Cmp(b.at(i, j)) != 0 {
+				return false
+			}
+		}
+	}
+	return true
+}
+
+func (a *matrix) String() string {
+	s := ""
+	for i := 0; i < a.n; i++ {
+		for j := 0; j < a.m; j++ {
+			s += fmt.Sprintf("\t%s", a.at(i, j))
+		}
+		s += "\n"
+	}
+	return s
+}
+
+func doHilbert(t *testing.T, n int) {
+	a := newHilbert(n)
+	b := newInverseHilbert(n)
+	I := newUnit(n)
+	ab := a.mul(b)
+	if !ab.eql(I) {
+		if t == nil {
+			panic("Hilbert failed")
+		}
+		t.Errorf("a   = %s\n", a)
+		t.Errorf("b   = %s\n", b)
+		t.Errorf("a*b = %s\n", ab)
+		t.Errorf("I   = %s\n", I)
+	}
+}
+
+func TestHilbert(t *testing.T) {
+	doHilbert(t, 10)
+}
+
+func BenchmarkHilbert(b *testing.B) {
+	for i := 0; i < b.N; i++ {
+		doHilbert(nil, 10)
+	}
+}
diff --git a/src/math/big/int.go b/src/math/big/int.go
new file mode 100644
index 0000000..2cc3d7b
--- /dev/null
+++ b/src/math/big/int.go
@@ -0,0 +1,1321 @@
+// Copyright 2009 The Go Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+// This file implements signed multi-precision integers.
+
+package big
+
+import (
+	"fmt"
+	"io"
+	"math/rand"
+	"strings"
+)
+
+// An Int represents a signed multi-precision integer.
+// The zero value for an Int represents the value 0.
+//
+// Operations always take pointer arguments (*Int) rather
+// than Int values, and each unique Int value requires
+// its own unique *Int pointer. To "copy" an Int value,
+// an existing (or newly allocated) Int must be set to
+// a new value using the Int.Set method; shallow copies
+// of Ints are not supported and may lead to errors.
+//
+// Note that methods may leak the Int's value through timing side-channels.
+// Because of this and because of the scope and complexity of the
+// implementation, Int is not well-suited to implement cryptographic operations.
+// The standard library avoids exposing non-trivial Int methods to
+// attacker-controlled inputs and the determination of whether a bug in math/big
+// is considered a security vulnerability might depend on the impact on the
+// standard library.
+type Int struct {
+	neg bool // sign
+	abs nat  // absolute value of the integer
+}
+
+var intOne = &Int{false, natOne}
+
+// Sign returns:
+//
+//	-1 if x <  0
+//	 0 if x == 0
+//	+1 if x >  0
+func (x *Int) Sign() int {
+	// This function is used in cryptographic operations. It must not leak
+	// anything but the Int's sign and bit size through side-channels. Any
+	// changes must be reviewed by a security expert.
+	if len(x.abs) == 0 {
+		return 0
+	}
+	if x.neg {
+		return -1
+	}
+	return 1
+}
+
+// SetInt64 sets z to x and returns z.
+func (z *Int) SetInt64(x int64) *Int {
+	neg := false
+	if x < 0 {
+		neg = true
+		x = -x
+	}
+	z.abs = z.abs.setUint64(uint64(x))
+	z.neg = neg
+	return z
+}
+
+// SetUint64 sets z to x and returns z.
+func (z *Int) SetUint64(x uint64) *Int {
+	z.abs = z.abs.setUint64(x)
+	z.neg = false
+	return z
+}
+
+// NewInt allocates and returns a new Int set to x.
+func NewInt(x int64) *Int {
+	// This code is arranged to be inlineable and produce
+	// zero allocations when inlined. See issue 29951.
+	u := uint64(x)
+	if x < 0 {
+		u = -u
+	}
+	var abs []Word
+	if x == 0 {
+	} else if _W == 32 && u>>32 != 0 {
+		abs = []Word{Word(u), Word(u >> 32)}
+	} else {
+		abs = []Word{Word(u)}
+	}
+	return &Int{neg: x < 0, abs: abs}
+}
+
+// Set sets z to x and returns z.
+func (z *Int) Set(x *Int) *Int {
+	if z != x {
+		z.abs = z.abs.set(x.abs)
+		z.neg = x.neg
+	}
+	return z
+}
+
+// Bits provides raw (unchecked but fast) access to x by returning its
+// absolute value as a little-endian Word slice. The result and x share
+// the same underlying array.
+// Bits is intended to support implementation of missing low-level Int
+// functionality outside this package; it should be avoided otherwise.
+func (x *Int) Bits() []Word {
+	// This function is used in cryptographic operations. It must not leak
+	// anything but the Int's sign and bit size through side-channels. Any
+	// changes must be reviewed by a security expert.
+	return x.abs
+}
+
+// SetBits provides raw (unchecked but fast) access to z by setting its
+// value to abs, interpreted as a little-endian Word slice, and returning
+// z. The result and abs share the same underlying array.
+// SetBits is intended to support implementation of missing low-level Int
+// functionality outside this package; it should be avoided otherwise.
+func (z *Int) SetBits(abs []Word) *Int {
+	z.abs = nat(abs).norm()
+	z.neg = false
+	return z
+}
+
+// Abs sets z to |x| (the absolute value of x) and returns z.
+func (z *Int) Abs(x *Int) *Int {
+	z.Set(x)
+	z.neg = false
+	return z
+}
+
+// Neg sets z to -x and returns z.
+func (z *Int) Neg(x *Int) *Int {
+	z.Set(x)
+	z.neg = len(z.abs) > 0 && !z.neg // 0 has no sign
+	return z
+}
+
+// Add sets z to the sum x+y and returns z.
+func (z *Int) Add(x, y *Int) *Int {
+	neg := x.neg
+	if x.neg == y.neg {
+		// x + y == x + y
+		// (-x) + (-y) == -(x + y)
+		z.abs = z.abs.add(x.abs, y.abs)
+	} else {
+		// x + (-y) == x - y == -(y - x)
+		// (-x) + y == y - x == -(x - y)
+		if x.abs.cmp(y.abs) >= 0 {
+			z.abs = z.abs.sub(x.abs, y.abs)
+		} else {
+			neg = !neg
+			z.abs = z.abs.sub(y.abs, x.abs)
+		}
+	}
+	z.neg = len(z.abs) > 0 && neg // 0 has no sign
+	return z
+}
+
+// Sub sets z to the difference x-y and returns z.
+func (z *Int) Sub(x, y *Int) *Int {
+	neg := x.neg
+	if x.neg != y.neg {
+		// x - (-y) == x + y
+		// (-x) - y == -(x + y)
+		z.abs = z.abs.add(x.abs, y.abs)
+	} else {
+		// x - y == x - y == -(y - x)
+		// (-x) - (-y) == y - x == -(x - y)
+		if x.abs.cmp(y.abs) >= 0 {
+			z.abs = z.abs.sub(x.abs, y.abs)
+		} else {
+			neg = !neg
+			z.abs = z.abs.sub(y.abs, x.abs)
+		}
+	}
+	z.neg = len(z.abs) > 0 && neg // 0 has no sign
+	return z
+}
+
+// Mul sets z to the product x*y and returns z.
+func (z *Int) Mul(x, y *Int) *Int {
+	// x * y == x * y
+	// x * (-y) == -(x * y)
+	// (-x) * y == -(x * y)
+	// (-x) * (-y) == x * y
+	if x == y {
+		z.abs = z.abs.sqr(x.abs)
+		z.neg = false
+		return z
+	}
+	z.abs = z.abs.mul(x.abs, y.abs)
+	z.neg = len(z.abs) > 0 && x.neg != y.neg // 0 has no sign
+	return z
+}
+
+// MulRange sets z to the product of all integers
+// in the range [a, b] inclusively and returns z.
+// If a > b (empty range), the result is 1.
+func (z *Int) MulRange(a, b int64) *Int {
+	switch {
+	case a > b:
+		return z.SetInt64(1) // empty range
+	case a <= 0 && b >= 0:
+		return z.SetInt64(0) // range includes 0
+	}
+	// a <= b && (b < 0 || a > 0)
+
+	neg := false
+	if a < 0 {
+		neg = (b-a)&1 == 0
+		a, b = -b, -a
+	}
+
+	z.abs = z.abs.mulRange(uint64(a), uint64(b))
+	z.neg = neg
+	return z
+}
+
+// Binomial sets z to the binomial coefficient C(n, k) and returns z.
+func (z *Int) Binomial(n, k int64) *Int {
+	if k > n {
+		return z.SetInt64(0)
+	}
+	// reduce the number of multiplications by reducing k
+	if k > n-k {
+		k = n - k // C(n, k) == C(n, n-k)
+	}
+	// C(n, k) == n * (n-1) * ... * (n-k+1) / k * (k-1) * ... * 1
+	//         == n * (n-1) * ... * (n-k+1) / 1 * (1+1) * ... * k
+	//
+	// Using the multiplicative formula produces smaller values
+	// at each step, requiring fewer allocations and computations:
+	//
+	// z = 1
+	// for i := 0; i < k; i = i+1 {
+	//     z *= n-i
+	//     z /= i+1
+	// }
+	//
+	// finally to avoid computing i+1 twice per loop:
+	//
+	// z = 1
+	// i := 0
+	// for i < k {
+	//     z *= n-i
+	//     i++
+	//     z /= i
+	// }
+	var N, K, i, t Int
+	N.SetInt64(n)
+	K.SetInt64(k)
+	z.Set(intOne)
+	for i.Cmp(&K) < 0 {
+		z.Mul(z, t.Sub(&N, &i))
+		i.Add(&i, intOne)
+		z.Quo(z, &i)
+	}
+	return z
+}
+
+// Quo sets z to the quotient x/y for y != 0 and returns z.
+// If y == 0, a division-by-zero run-time panic occurs.
+// Quo implements truncated division (like Go); see QuoRem for more details.
+func (z *Int) Quo(x, y *Int) *Int {
+	z.abs, _ = z.abs.div(nil, x.abs, y.abs)
+	z.neg = len(z.abs) > 0 && x.neg != y.neg // 0 has no sign
+	return z
+}
+
+// Rem sets z to the remainder x%y for y != 0 and returns z.
+// If y == 0, a division-by-zero run-time panic occurs.
+// Rem implements truncated modulus (like Go); see QuoRem for more details.
+func (z *Int) Rem(x, y *Int) *Int {
+	_, z.abs = nat(nil).div(z.abs, x.abs, y.abs)
+	z.neg = len(z.abs) > 0 && x.neg // 0 has no sign
+	return z
+}
+
+// QuoRem sets z to the quotient x/y and r to the remainder x%y
+// and returns the pair (z, r) for y != 0.
+// If y == 0, a division-by-zero run-time panic occurs.
+//
+// QuoRem implements T-division and modulus (like Go):
+//
+//	q = x/y      with the result truncated to zero
+//	r = x - y*q
+//
+// (See Daan Leijen, “Division and Modulus for Computer Scientists”.)
+// See DivMod for Euclidean division and modulus (unlike Go).
+func (z *Int) QuoRem(x, y, r *Int) (*Int, *Int) {
+	z.abs, r.abs = z.abs.div(r.abs, x.abs, y.abs)
+	z.neg, r.neg = len(z.abs) > 0 && x.neg != y.neg, len(r.abs) > 0 && x.neg // 0 has no sign
+	return z, r
+}
+
+// Div sets z to the quotient x/y for y != 0 and returns z.
+// If y == 0, a division-by-zero run-time panic occurs.
+// Div implements Euclidean division (unlike Go); see DivMod for more details.
+func (z *Int) Div(x, y *Int) *Int {
+	y_neg := y.neg // z may be an alias for y
+	var r Int
+	z.QuoRem(x, y, &r)
+	if r.neg {
+		if y_neg {
+			z.Add(z, intOne)
+		} else {
+			z.Sub(z, intOne)
+		}
+	}
+	return z
+}
+
+// Mod sets z to the modulus x%y for y != 0 and returns z.
+// If y == 0, a division-by-zero run-time panic occurs.
+// Mod implements Euclidean modulus (unlike Go); see DivMod for more details.
+func (z *Int) Mod(x, y *Int) *Int {
+	y0 := y // save y
+	if z == y || alias(z.abs, y.abs) {
+		y0 = new(Int).Set(y)
+	}
+	var q Int
+	q.QuoRem(x, y, z)
+	if z.neg {
+		if y0.neg {
+			z.Sub(z, y0)
+		} else {
+			z.Add(z, y0)
+		}
+	}
+	return z
+}
+
+// DivMod sets z to the quotient x div y and m to the modulus x mod y
+// and returns the pair (z, m) for y != 0.
+// If y == 0, a division-by-zero run-time panic occurs.
+//
+// DivMod implements Euclidean division and modulus (unlike Go):
+//
+//	q = x div y  such that
+//	m = x - y*q  with 0 <= m < |y|
+//
+// (See Raymond T. Boute, “The Euclidean definition of the functions
+// div and mod”. ACM Transactions on Programming Languages and
+// Systems (TOPLAS), 14(2):127-144, New York, NY, USA, 4/1992.
+// ACM press.)
+// See QuoRem for T-division and modulus (like Go).
+func (z *Int) DivMod(x, y, m *Int) (*Int, *Int) {
+	y0 := y // save y
+	if z == y || alias(z.abs, y.abs) {
+		y0 = new(Int).Set(y)
+	}
+	z.QuoRem(x, y, m)
+	if m.neg {
+		if y0.neg {
+			z.Add(z, intOne)
+			m.Sub(m, y0)
+		} else {
+			z.Sub(z, intOne)
+			m.Add(m, y0)
+		}
+	}
+	return z, m
+}
+
+// Cmp compares x and y and returns:
+//
+//	-1 if x <  y
+//	 0 if x == y
+//	+1 if x >  y
+func (x *Int) Cmp(y *Int) (r int) {
+	// x cmp y == x cmp y
+	// x cmp (-y) == x
+	// (-x) cmp y == y
+	// (-x) cmp (-y) == -(x cmp y)
+	switch {
+	case x == y:
+		// nothing to do
+	case x.neg == y.neg:
+		r = x.abs.cmp(y.abs)
+		if x.neg {
+			r = -r
+		}
+	case x.neg:
+		r = -1
+	default:
+		r = 1
+	}
+	return
+}
+
+// CmpAbs compares the absolute values of x and y and returns:
+//
+//	-1 if |x| <  |y|
+//	 0 if |x| == |y|
+//	+1 if |x| >  |y|
+func (x *Int) CmpAbs(y *Int) int {
+	return x.abs.cmp(y.abs)
+}
+
+// low32 returns the least significant 32 bits of x.
+func low32(x nat) uint32 {
+	if len(x) == 0 {
+		return 0
+	}
+	return uint32(x[0])
+}
+
+// low64 returns the least significant 64 bits of x.
+func low64(x nat) uint64 {
+	if len(x) == 0 {
+		return 0
+	}
+	v := uint64(x[0])
+	if _W == 32 && len(x) > 1 {
+		return uint64(x[1])<<32 | v
+	}
+	return v
+}
+
+// Int64 returns the int64 representation of x.
+// If x cannot be represented in an int64, the result is undefined.
+func (x *Int) Int64() int64 {
+	v := int64(low64(x.abs))
+	if x.neg {
+		v = -v
+	}
+	return v
+}
+
+// Uint64 returns the uint64 representation of x.
+// If x cannot be represented in a uint64, the result is undefined.
+func (x *Int) Uint64() uint64 {
+	return low64(x.abs)
+}
+
+// IsInt64 reports whether x can be represented as an int64.
+func (x *Int) IsInt64() bool {
+	if len(x.abs) <= 64/_W {
+		w := int64(low64(x.abs))
+		return w >= 0 || x.neg && w == -w
+	}
+	return false
+}
+
+// IsUint64 reports whether x can be represented as a uint64.
+func (x *Int) IsUint64() bool {
+	return !x.neg && len(x.abs) <= 64/_W
+}
+
+// Float64 returns the float64 value nearest x,
+// and an indication of any rounding that occurred.
+func (x *Int) Float64() (float64, Accuracy) {
+	n := x.abs.bitLen() // NB: still uses slow crypto impl!
+	if n == 0 {
+		return 0.0, Exact
+	}
+
+	// Fast path: no more than 53 significant bits.
+	if n <= 53 || n < 64 && n-int(x.abs.trailingZeroBits()) <= 53 {
+		f := float64(low64(x.abs))
+		if x.neg {
+			f = -f
+		}
+		return f, Exact
+	}
+
+	return new(Float).SetInt(x).Float64()
+}
+
+// SetString sets z to the value of s, interpreted in the given base,
+// and returns z and a boolean indicating success. The entire string
+// (not just a prefix) must be valid for success. If SetString fails,
+// the value of z is undefined but the returned value is nil.
+//
+// The base argument must be 0 or a value between 2 and MaxBase.
+// For base 0, the number prefix determines the actual base: A prefix of
+// “0b” or “0B” selects base 2, “0”, “0o” or “0O” selects base 8,
+// and “0x” or “0X” selects base 16. Otherwise, the selected base is 10
+// and no prefix is accepted.
+//
+// For bases <= 36, lower and upper case letters are considered the same:
+// The letters 'a' to 'z' and 'A' to 'Z' represent digit values 10 to 35.
+// For bases > 36, the upper case letters 'A' to 'Z' represent the digit
+// values 36 to 61.
+//
+// For base 0, an underscore character “_” may appear between a base
+// prefix and an adjacent digit, and between successive digits; such
+// underscores do not change the value of the number.
+// Incorrect placement of underscores is reported as an error if there
+// are no other errors. If base != 0, underscores are not recognized
+// and act like any other character that is not a valid digit.
+func (z *Int) SetString(s string, base int) (*Int, bool) {
+	return z.setFromScanner(strings.NewReader(s), base)
+}
+
+// setFromScanner implements SetString given an io.ByteScanner.
+// For documentation see comments of SetString.
+func (z *Int) setFromScanner(r io.ByteScanner, base int) (*Int, bool) {
+	if _, _, err := z.scan(r, base); err != nil {
+		return nil, false
+	}
+	// entire content must have been consumed
+	if _, err := r.ReadByte(); err != io.EOF {
+		return nil, false
+	}
+	return z, true // err == io.EOF => scan consumed all content of r
+}
+
+// SetBytes interprets buf as the bytes of a big-endian unsigned
+// integer, sets z to that value, and returns z.
+func (z *Int) SetBytes(buf []byte) *Int {
+	z.abs = z.abs.setBytes(buf)
+	z.neg = false
+	return z
+}
+
+// Bytes returns the absolute value of x as a big-endian byte slice.
+//
+// To use a fixed length slice, or a preallocated one, use FillBytes.
+func (x *Int) Bytes() []byte {
+	// This function is used in cryptographic operations. It must not leak
+	// anything but the Int's sign and bit size through side-channels. Any
+	// changes must be reviewed by a security expert.
+	buf := make([]byte, len(x.abs)*_S)
+	return buf[x.abs.bytes(buf):]
+}
+
+// FillBytes sets buf to the absolute value of x, storing it as a zero-extended
+// big-endian byte slice, and returns buf.
+//
+// If the absolute value of x doesn't fit in buf, FillBytes will panic.
+func (x *Int) FillBytes(buf []byte) []byte {
+	// Clear whole buffer. (This gets optimized into a memclr.)
+	for i := range buf {
+		buf[i] = 0
+	}
+	x.abs.bytes(buf)
+	return buf
+}
+
+// BitLen returns the length of the absolute value of x in bits.
+// The bit length of 0 is 0.
+func (x *Int) BitLen() int {
+	// This function is used in cryptographic operations. It must not leak
+	// anything but the Int's sign and bit size through side-channels. Any
+	// changes must be reviewed by a security expert.
+	return x.abs.bitLen()
+}
+
+// TrailingZeroBits returns the number of consecutive least significant zero
+// bits of |x|.
+func (x *Int) TrailingZeroBits() uint {
+	return x.abs.trailingZeroBits()
+}
+
+// Exp sets z = x**y mod |m| (i.e. the sign of m is ignored), and returns z.
+// If m == nil or m == 0, z = x**y unless y <= 0 then z = 1. If m != 0, y < 0,
+// and x and m are not relatively prime, z is unchanged and nil is returned.
+//
+// Modular exponentiation of inputs of a particular size is not a
+// cryptographically constant-time operation.
+func (z *Int) Exp(x, y, m *Int) *Int {
+	return z.exp(x, y, m, false)
+}
+
+func (z *Int) expSlow(x, y, m *Int) *Int {
+	return z.exp(x, y, m, true)
+}
+
+func (z *Int) exp(x, y, m *Int, slow bool) *Int {
+	// See Knuth, volume 2, section 4.6.3.
+	xWords := x.abs
+	if y.neg {
+		if m == nil || len(m.abs) == 0 {
+			return z.SetInt64(1)
+		}
+		// for y < 0: x**y mod m == (x**(-1))**|y| mod m
+		inverse := new(Int).ModInverse(x, m)
+		if inverse == nil {
+			return nil
+		}
+		xWords = inverse.abs
+	}
+	yWords := y.abs
+
+	var mWords nat
+	if m != nil {
+		if z == m || alias(z.abs, m.abs) {
+			m = new(Int).Set(m)
+		}
+		mWords = m.abs // m.abs may be nil for m == 0
+	}
+
+	z.abs = z.abs.expNN(xWords, yWords, mWords, slow)
+	z.neg = len(z.abs) > 0 && x.neg && len(yWords) > 0 && yWords[0]&1 == 1 // 0 has no sign
+	if z.neg && len(mWords) > 0 {
+		// make modulus result positive
+		z.abs = z.abs.sub(mWords, z.abs) // z == x**y mod |m| && 0 <= z < |m|
+		z.neg = false
+	}
+
+	return z
+}
+
+// GCD sets z to the greatest common divisor of a and b and returns z.
+// If x or y are not nil, GCD sets their value such that z = a*x + b*y.
+//
+// a and b may be positive, zero or negative. (Before Go 1.14 both had
+// to be > 0.) Regardless of the signs of a and b, z is always >= 0.
+//
+// If a == b == 0, GCD sets z = x = y = 0.
+//
+// If a == 0 and b != 0, GCD sets z = |b|, x = 0, y = sign(b) * 1.
+//
+// If a != 0 and b == 0, GCD sets z = |a|, x = sign(a) * 1, y = 0.
+func (z *Int) GCD(x, y, a, b *Int) *Int {
+	if len(a.abs) == 0 || len(b.abs) == 0 {
+		lenA, lenB, negA, negB := len(a.abs), len(b.abs), a.neg, b.neg
+		if lenA == 0 {
+			z.Set(b)
+		} else {
+			z.Set(a)
+		}
+		z.neg = false
+		if x != nil {
+			if lenA == 0 {
+				x.SetUint64(0)
+			} else {
+				x.SetUint64(1)
+				x.neg = negA
+			}
+		}
+		if y != nil {
+			if lenB == 0 {
+				y.SetUint64(0)
+			} else {
+				y.SetUint64(1)
+				y.neg = negB
+			}
+		}
+		return z
+	}
+
+	return z.lehmerGCD(x, y, a, b)
+}
+
+// lehmerSimulate attempts to simulate several Euclidean update steps
+// using the leading digits of A and B.  It returns u0, u1, v0, v1
+// such that A and B can be updated as:
+//
+//	A = u0*A + v0*B
+//	B = u1*A + v1*B
+//
+// Requirements: A >= B and len(B.abs) >= 2
+// Since we are calculating with full words to avoid overflow,
+// we use 'even' to track the sign of the cosequences.
+// For even iterations: u0, v1 >= 0 && u1, v0 <= 0
+// For odd  iterations: u0, v1 <= 0 && u1, v0 >= 0
+func lehmerSimulate(A, B *Int) (u0, u1, v0, v1 Word, even bool) {
+	// initialize the digits
+	var a1, a2, u2, v2 Word
+
+	m := len(B.abs) // m >= 2
+	n := len(A.abs) // n >= m >= 2
+
+	// extract the top Word of bits from A and B
+	h := nlz(A.abs[n-1])
+	a1 = A.abs[n-1]<<h | A.abs[n-2]>>(_W-h)
+	// B may have implicit zero words in the high bits if the lengths differ
+	switch {
+	case n == m:
+		a2 = B.abs[n-1]<<h | B.abs[n-2]>>(_W-h)
+	case n == m+1:
+		a2 = B.abs[n-2] >> (_W - h)
+	default:
+		a2 = 0
+	}
+
+	// Since we are calculating with full words to avoid overflow,
+	// we use 'even' to track the sign of the cosequences.
+	// For even iterations: u0, v1 >= 0 && u1, v0 <= 0
+	// For odd  iterations: u0, v1 <= 0 && u1, v0 >= 0
+	// The first iteration starts with k=1 (odd).
+	even = false
+	// variables to track the cosequences
+	u0, u1, u2 = 0, 1, 0
+	v0, v1, v2 = 0, 0, 1
+
+	// Calculate the quotient and cosequences using Collins' stopping condition.
+	// Note that overflow of a Word is not possible when computing the remainder
+	// sequence and cosequences since the cosequence size is bounded by the input size.
+	// See section 4.2 of Jebelean for details.
+	for a2 >= v2 && a1-a2 >= v1+v2 {
+		q, r := a1/a2, a1%a2
+		a1, a2 = a2, r
+		u0, u1, u2 = u1, u2, u1+q*u2
+		v0, v1, v2 = v1, v2, v1+q*v2
+		even = !even
+	}
+	return
+}
+
+// lehmerUpdate updates the inputs A and B such that:
+//
+//	A = u0*A + v0*B
+//	B = u1*A + v1*B
+//
+// where the signs of u0, u1, v0, v1 are given by even
+// For even == true: u0, v1 >= 0 && u1, v0 <= 0
+// For even == false: u0, v1 <= 0 && u1, v0 >= 0
+// q, r, s, t are temporary variables to avoid allocations in the multiplication.
+func lehmerUpdate(A, B, q, r, s, t *Int, u0, u1, v0, v1 Word, even bool) {
+
+	t.abs = t.abs.setWord(u0)
+	s.abs = s.abs.setWord(v0)
+	t.neg = !even
+	s.neg = even
+
+	t.Mul(A, t)
+	s.Mul(B, s)
+
+	r.abs = r.abs.setWord(u1)
+	q.abs = q.abs.setWord(v1)
+	r.neg = even
+	q.neg = !even
+
+	r.Mul(A, r)
+	q.Mul(B, q)
+
+	A.Add(t, s)
+	B.Add(r, q)
+}
+
+// euclidUpdate performs a single step of the Euclidean GCD algorithm
+// if extended is true, it also updates the cosequence Ua, Ub.
+func euclidUpdate(A, B, Ua, Ub, q, r, s, t *Int, extended bool) {
+	q, r = q.QuoRem(A, B, r)
+
+	*A, *B, *r = *B, *r, *A
+
+	if extended {
+		// Ua, Ub = Ub, Ua - q*Ub
+		t.Set(Ub)
+		s.Mul(Ub, q)
+		Ub.Sub(Ua, s)
+		Ua.Set(t)
+	}
+}
+
+// lehmerGCD sets z to the greatest common divisor of a and b,
+// which both must be != 0, and returns z.
+// If x or y are not nil, their values are set such that z = a*x + b*y.
+// See Knuth, The Art of Computer Programming, Vol. 2, Section 4.5.2, Algorithm L.
+// This implementation uses the improved condition by Collins requiring only one
+// quotient and avoiding the possibility of single Word overflow.
+// See Jebelean, "Improving the multiprecision Euclidean algorithm",
+// Design and Implementation of Symbolic Computation Systems, pp 45-58.
+// The cosequences are updated according to Algorithm 10.45 from
+// Cohen et al. "Handbook of Elliptic and Hyperelliptic Curve Cryptography" pp 192.
+func (z *Int) lehmerGCD(x, y, a, b *Int) *Int {
+	var A, B, Ua, Ub *Int
+
+	A = new(Int).Abs(a)
+	B = new(Int).Abs(b)
+
+	extended := x != nil || y != nil
+
+	if extended {
+		// Ua (Ub) tracks how many times input a has been accumulated into A (B).
+		Ua = new(Int).SetInt64(1)
+		Ub = new(Int)
+	}
+
+	// temp variables for multiprecision update
+	q := new(Int)
+	r := new(Int)
+	s := new(Int)
+	t := new(Int)
+
+	// ensure A >= B
+	if A.abs.cmp(B.abs) < 0 {
+		A, B = B, A
+		Ub, Ua = Ua, Ub
+	}
+
+	// loop invariant A >= B
+	for len(B.abs) > 1 {
+		// Attempt to calculate in single-precision using leading words of A and B.
+		u0, u1, v0, v1, even := lehmerSimulate(A, B)
+
+		// multiprecision Step
+		if v0 != 0 {
+			// Simulate the effect of the single-precision steps using the cosequences.
+			// A = u0*A + v0*B
+			// B = u1*A + v1*B
+			lehmerUpdate(A, B, q, r, s, t, u0, u1, v0, v1, even)
+
+			if extended {
+				// Ua = u0*Ua + v0*Ub
+				// Ub = u1*Ua + v1*Ub
+				lehmerUpdate(Ua, Ub, q, r, s, t, u0, u1, v0, v1, even)
+			}
+
+		} else {
+			// Single-digit calculations failed to simulate any quotients.
+			// Do a standard Euclidean step.
+			euclidUpdate(A, B, Ua, Ub, q, r, s, t, extended)
+		}
+	}
+
+	if len(B.abs) > 0 {
+		// extended Euclidean algorithm base case if B is a single Word
+		if len(A.abs) > 1 {
+			// A is longer than a single Word, so one update is needed.
+			euclidUpdate(A, B, Ua, Ub, q, r, s, t, extended)
+		}
+		if len(B.abs) > 0 {
+			// A and B are both a single Word.
+			aWord, bWord := A.abs[0], B.abs[0]
+			if extended {
+				var ua, ub, va, vb Word
+				ua, ub = 1, 0
+				va, vb = 0, 1
+				even := true
+				for bWord != 0 {
+					q, r := aWord/bWord, aWord%bWord
+					aWord, bWord = bWord, r
+					ua, ub = ub, ua+q*ub
+					va, vb = vb, va+q*vb
+					even = !even
+				}
+
+				t.abs = t.abs.setWord(ua)
+				s.abs = s.abs.setWord(va)
+				t.neg = !even
+				s.neg = even
+
+				t.Mul(Ua, t)
+				s.Mul(Ub, s)
+
+				Ua.Add(t, s)
+			} else {
+				for bWord != 0 {
+					aWord, bWord = bWord, aWord%bWord
+				}
+			}
+			A.abs[0] = aWord
+		}
+	}
+	negA := a.neg
+	if y != nil {
+		// avoid aliasing b needed in the division below
+		if y == b {
+			B.Set(b)
+		} else {
+			B = b
+		}
+		// y = (z - a*x)/b
+		y.Mul(a, Ua) // y can safely alias a
+		if negA {
+			y.neg = !y.neg
+		}
+		y.Sub(A, y)
+		y.Div(y, B)
+	}
+
+	if x != nil {
+		*x = *Ua
+		if negA {
+			x.neg = !x.neg
+		}
+	}
+
+	*z = *A
+
+	return z
+}
+
+// Rand sets z to a pseudo-random number in [0, n) and returns z.
+//
+// As this uses the math/rand package, it must not be used for
+// security-sensitive work. Use crypto/rand.Int instead.
+func (z *Int) Rand(rnd *rand.Rand, n *Int) *Int {
+	// z.neg is not modified before the if check, because z and n might alias.
+	if n.neg || len(n.abs) == 0 {
+		z.neg = false
+		z.abs = nil
+		return z
+	}
+	z.neg = false
+	z.abs = z.abs.random(rnd, n.abs, n.abs.bitLen())
+	return z
+}
+
+// ModInverse sets z to the multiplicative inverse of g in the ring ℤ/nℤ
+// and returns z. If g and n are not relatively prime, g has no multiplicative
+// inverse in the ring ℤ/nℤ.  In this case, z is unchanged and the return value
+// is nil. If n == 0, a division-by-zero run-time panic occurs.
+func (z *Int) ModInverse(g, n *Int) *Int {
+	// GCD expects parameters a and b to be > 0.
+	if n.neg {
+		var n2 Int
+		n = n2.Neg(n)
+	}
+	if g.neg {
+		var g2 Int
+		g = g2.Mod(g, n)
+	}
+	var d, x Int
+	d.GCD(&x, nil, g, n)
+
+	// if and only if d==1, g and n are relatively prime
+	if d.Cmp(intOne) != 0 {
+		return nil
+	}
+
+	// x and y are such that g*x + n*y = 1, therefore x is the inverse element,
+	// but it may be negative, so convert to the range 0 <= z < |n|
+	if x.neg {
+		z.Add(&x, n)
+	} else {
+		z.Set(&x)
+	}
+	return z
+}
+
+func (z nat) modInverse(g, n nat) nat {
+	// TODO(rsc): ModInverse should be implemented in terms of this function.
+	return (&Int{abs: z}).ModInverse(&Int{abs: g}, &Int{abs: n}).abs
+}
+
+// Jacobi returns the Jacobi symbol (x/y), either +1, -1, or 0.
+// The y argument must be an odd integer.
+func Jacobi(x, y *Int) int {
+	if len(y.abs) == 0 || y.abs[0]&1 == 0 {
+		panic(fmt.Sprintf("big: invalid 2nd argument to Int.Jacobi: need odd integer but got %s", y.String()))
+	}
+
+	// We use the formulation described in chapter 2, section 2.4,
+	// "The Yacas Book of Algorithms":
+	// http://yacas.sourceforge.net/Algo.book.pdf
+
+	var a, b, c Int
+	a.Set(x)
+	b.Set(y)
+	j := 1
+
+	if b.neg {
+		if a.neg {
+			j = -1
+		}
+		b.neg = false
+	}
+
+	for {
+		if b.Cmp(intOne) == 0 {
+			return j
+		}
+		if len(a.abs) == 0 {
+			return 0
+		}
+		a.Mod(&a, &b)
+		if len(a.abs) == 0 {
+			return 0
+		}
+		// a > 0
+
+		// handle factors of 2 in 'a'
+		s := a.abs.trailingZeroBits()
+		if s&1 != 0 {
+			bmod8 := b.abs[0] & 7
+			if bmod8 == 3 || bmod8 == 5 {
+				j = -j
+			}
+		}
+		c.Rsh(&a, s) // a = 2^s*c
+
+		// swap numerator and denominator
+		if b.abs[0]&3 == 3 && c.abs[0]&3 == 3 {
+			j = -j
+		}
+		a.Set(&b)
+		b.Set(&c)
+	}
+}
+
+// modSqrt3Mod4 uses the identity
+//
+//	   (a^((p+1)/4))^2  mod p
+//	== u^(p+1)          mod p
+//	== u^2              mod p
+//
+// to calculate the square root of any quadratic residue mod p quickly for 3
+// mod 4 primes.
+func (z *Int) modSqrt3Mod4Prime(x, p *Int) *Int {
+	e := new(Int).Add(p, intOne) // e = p + 1
+	e.Rsh(e, 2)                  // e = (p + 1) / 4
+	z.Exp(x, e, p)               // z = x^e mod p
+	return z
+}
+
+// modSqrt5Mod8Prime uses Atkin's observation that 2 is not a square mod p
+//
+//	alpha ==  (2*a)^((p-5)/8)    mod p
+//	beta  ==  2*a*alpha^2        mod p  is a square root of -1
+//	b     ==  a*alpha*(beta-1)   mod p  is a square root of a
+//
+// to calculate the square root of any quadratic residue mod p quickly for 5
+// mod 8 primes.
+func (z *Int) modSqrt5Mod8Prime(x, p *Int) *Int {
+	// p == 5 mod 8 implies p = e*8 + 5
+	// e is the quotient and 5 the remainder on division by 8
+	e := new(Int).Rsh(p, 3)  // e = (p - 5) / 8
+	tx := new(Int).Lsh(x, 1) // tx = 2*x
+	alpha := new(Int).Exp(tx, e, p)
+	beta := new(Int).Mul(alpha, alpha)
+	beta.Mod(beta, p)
+	beta.Mul(beta, tx)
+	beta.Mod(beta, p)
+	beta.Sub(beta, intOne)
+	beta.Mul(beta, x)
+	beta.Mod(beta, p)
+	beta.Mul(beta, alpha)
+	z.Mod(beta, p)
+	return z
+}
+
+// modSqrtTonelliShanks uses the Tonelli-Shanks algorithm to find the square
+// root of a quadratic residue modulo any prime.
+func (z *Int) modSqrtTonelliShanks(x, p *Int) *Int {
+	// Break p-1 into s*2^e such that s is odd.
+	var s Int
+	s.Sub(p, intOne)
+	e := s.abs.trailingZeroBits()
+	s.Rsh(&s, e)
+
+	// find some non-square n
+	var n Int
+	n.SetInt64(2)
+	for Jacobi(&n, p) != -1 {
+		n.Add(&n, intOne)
+	}
+
+	// Core of the Tonelli-Shanks algorithm. Follows the description in
+	// section 6 of "Square roots from 1; 24, 51, 10 to Dan Shanks" by Ezra
+	// Brown:
+	// https://www.maa.org/sites/default/files/pdf/upload_library/22/Polya/07468342.di020786.02p0470a.pdf
+	var y, b, g, t Int
+	y.Add(&s, intOne)
+	y.Rsh(&y, 1)
+	y.Exp(x, &y, p)  // y = x^((s+1)/2)
+	b.Exp(x, &s, p)  // b = x^s
+	g.Exp(&n, &s, p) // g = n^s
+	r := e
+	for {
+		// find the least m such that ord_p(b) = 2^m
+		var m uint
+		t.Set(&b)
+		for t.Cmp(intOne) != 0 {
+			t.Mul(&t, &t).Mod(&t, p)
+			m++
+		}
+
+		if m == 0 {
+			return z.Set(&y)
+		}
+
+		t.SetInt64(0).SetBit(&t, int(r-m-1), 1).Exp(&g, &t, p)
+		// t = g^(2^(r-m-1)) mod p
+		g.Mul(&t, &t).Mod(&g, p) // g = g^(2^(r-m)) mod p
+		y.Mul(&y, &t).Mod(&y, p)
+		b.Mul(&b, &g).Mod(&b, p)
+		r = m
+	}
+}
+
+// ModSqrt sets z to a square root of x mod p if such a square root exists, and
+// returns z. The modulus p must be an odd prime. If x is not a square mod p,
+// ModSqrt leaves z unchanged and returns nil. This function panics if p is
+// not an odd integer, its behavior is undefined if p is odd but not prime.
+func (z *Int) ModSqrt(x, p *Int) *Int {
+	switch Jacobi(x, p) {
+	case -1:
+		return nil // x is not a square mod p
+	case 0:
+		return z.SetInt64(0) // sqrt(0) mod p = 0
+	case 1:
+		break
+	}
+	if x.neg || x.Cmp(p) >= 0 { // ensure 0 <= x < p
+		x = new(Int).Mod(x, p)
+	}
+
+	switch {
+	case p.abs[0]%4 == 3:
+		// Check whether p is 3 mod 4, and if so, use the faster algorithm.
+		return z.modSqrt3Mod4Prime(x, p)
+	case p.abs[0]%8 == 5:
+		// Check whether p is 5 mod 8, use Atkin's algorithm.
+		return z.modSqrt5Mod8Prime(x, p)
+	default:
+		// Otherwise, use Tonelli-Shanks.
+		return z.modSqrtTonelliShanks(x, p)
+	}
+}
+
+// Lsh sets z = x << n and returns z.
+func (z *Int) Lsh(x *Int, n uint) *Int {
+	z.abs = z.abs.shl(x.abs, n)
+	z.neg = x.neg
+	return z
+}
+
+// Rsh sets z = x >> n and returns z.
+func (z *Int) Rsh(x *Int, n uint) *Int {
+	if x.neg {
+		// (-x) >> s == ^(x-1) >> s == ^((x-1) >> s) == -(((x-1) >> s) + 1)
+		t := z.abs.sub(x.abs, natOne) // no underflow because |x| > 0
+		t = t.shr(t, n)
+		z.abs = t.add(t, natOne)
+		z.neg = true // z cannot be zero if x is negative
+		return z
+	}
+
+	z.abs = z.abs.shr(x.abs, n)
+	z.neg = false
+	return z
+}
+
+// Bit returns the value of the i'th bit of x. That is, it
+// returns (x>>i)&1. The bit index i must be >= 0.
+func (x *Int) Bit(i int) uint {
+	if i == 0 {
+		// optimization for common case: odd/even test of x
+		if len(x.abs) > 0 {
+			return uint(x.abs[0] & 1) // bit 0 is same for -x
+		}
+		return 0
+	}
+	if i < 0 {
+		panic("negative bit index")
+	}
+	if x.neg {
+		t := nat(nil).sub(x.abs, natOne)
+		return t.bit(uint(i)) ^ 1
+	}
+
+	return x.abs.bit(uint(i))
+}
+
+// SetBit sets z to x, with x's i'th bit set to b (0 or 1).
+// That is, if b is 1 SetBit sets z = x | (1 << i);
+// if b is 0 SetBit sets z = x &^ (1 << i). If b is not 0 or 1,
+// SetBit will panic.
+func (z *Int) SetBit(x *Int, i int, b uint) *Int {
+	if i < 0 {
+		panic("negative bit index")
+	}
+	if x.neg {
+		t := z.abs.sub(x.abs, natOne)
+		t = t.setBit(t, uint(i), b^1)
+		z.abs = t.add(t, natOne)
+		z.neg = len(z.abs) > 0
+		return z
+	}
+	z.abs = z.abs.setBit(x.abs, uint(i), b)
+	z.neg = false
+	return z
+}
+
+// And sets z = x & y and returns z.
+func (z *Int) And(x, y *Int) *Int {
+	if x.neg == y.neg {
+		if x.neg {
+			// (-x) & (-y) == ^(x-1) & ^(y-1) == ^((x-1) | (y-1)) == -(((x-1) | (y-1)) + 1)
+			x1 := nat(nil).sub(x.abs, natOne)
+			y1 := nat(nil).sub(y.abs, natOne)
+			z.abs = z.abs.add(z.abs.or(x1, y1), natOne)
+			z.neg = true // z cannot be zero if x and y are negative
+			return z
+		}
+
+		// x & y == x & y
+		z.abs = z.abs.and(x.abs, y.abs)
+		z.neg = false
+		return z
+	}
+
+	// x.neg != y.neg
+	if x.neg {
+		x, y = y, x // & is symmetric
+	}
+
+	// x & (-y) == x & ^(y-1) == x &^ (y-1)
+	y1 := nat(nil).sub(y.abs, natOne)
+	z.abs = z.abs.andNot(x.abs, y1)
+	z.neg = false
+	return z
+}
+
+// AndNot sets z = x &^ y and returns z.
+func (z *Int) AndNot(x, y *Int) *Int {
+	if x.neg == y.neg {
+		if x.neg {
+			// (-x) &^ (-y) == ^(x-1) &^ ^(y-1) == ^(x-1) & (y-1) == (y-1) &^ (x-1)
+			x1 := nat(nil).sub(x.abs, natOne)
+			y1 := nat(nil).sub(y.abs, natOne)
+			z.abs = z.abs.andNot(y1, x1)
+			z.neg = false
+			return z
+		}
+
+		// x &^ y == x &^ y
+		z.abs = z.abs.andNot(x.abs, y.abs)
+		z.neg = false
+		return z
+	}
+
+	if x.neg {
+		// (-x) &^ y == ^(x-1) &^ y == ^(x-1) & ^y == ^((x-1) | y) == -(((x-1) | y) + 1)
+		x1 := nat(nil).sub(x.abs, natOne)
+		z.abs = z.abs.add(z.abs.or(x1, y.abs), natOne)
+		z.neg = true // z cannot be zero if x is negative and y is positive
+		return z
+	}
+
+	// x &^ (-y) == x &^ ^(y-1) == x & (y-1)
+	y1 := nat(nil).sub(y.abs, natOne)
+	z.abs = z.abs.and(x.abs, y1)
+	z.neg = false
+	return z
+}
+
+// Or sets z = x | y and returns z.
+func (z *Int) Or(x, y *Int) *Int {
+	if x.neg == y.neg {
+		if x.neg {
+			// (-x) | (-y) == ^(x-1) | ^(y-1) == ^((x-1) & (y-1)) == -(((x-1) & (y-1)) + 1)
+			x1 := nat(nil).sub(x.abs, natOne)
+			y1 := nat(nil).sub(y.abs, natOne)
+			z.abs = z.abs.add(z.abs.and(x1, y1), natOne)
+			z.neg = true // z cannot be zero if x and y are negative
+			return z
+		}
+
+		// x | y == x | y
+		z.abs = z.abs.or(x.abs, y.abs)
+		z.neg = false
+		return z
+	}
+
+	// x.neg != y.neg
+	if x.neg {
+		x, y = y, x // | is symmetric
+	}
+
+	// x | (-y) == x | ^(y-1) == ^((y-1) &^ x) == -(^((y-1) &^ x) + 1)
+	y1 := nat(nil).sub(y.abs, natOne)
+	z.abs = z.abs.add(z.abs.andNot(y1, x.abs), natOne)
+	z.neg = true // z cannot be zero if one of x or y is negative
+	return z
+}
+
+// Xor sets z = x ^ y and returns z.
+func (z *Int) Xor(x, y *Int) *Int {
+	if x.neg == y.neg {
+		if x.neg {
+			// (-x) ^ (-y) == ^(x-1) ^ ^(y-1) == (x-1) ^ (y-1)
+			x1 := nat(nil).sub(x.abs, natOne)
+			y1 := nat(nil).sub(y.abs, natOne)
+			z.abs = z.abs.xor(x1, y1)
+			z.neg = false
+			return z
+		}
+
+		// x ^ y == x ^ y
+		z.abs = z.abs.xor(x.abs, y.abs)
+		z.neg = false
+		return z
+	}
+
+	// x.neg != y.neg
+	if x.neg {
+		x, y = y, x // ^ is symmetric
+	}
+
+	// x ^ (-y) == x ^ ^(y-1) == ^(x ^ (y-1)) == -((x ^ (y-1)) + 1)
+	y1 := nat(nil).sub(y.abs, natOne)
+	z.abs = z.abs.add(z.abs.xor(x.abs, y1), natOne)
+	z.neg = true // z cannot be zero if only one of x or y is negative
+	return z
+}
+
+// Not sets z = ^x and returns z.
+func (z *Int) Not(x *Int) *Int {
+	if x.neg {
+		// ^(-x) == ^(^(x-1)) == x-1
+		z.abs = z.abs.sub(x.abs, natOne)
+		z.neg = false
+		return z
+	}
+
+	// ^x == -x-1 == -(x+1)
+	z.abs = z.abs.add(x.abs, natOne)
+	z.neg = true // z cannot be zero if x is positive
+	return z
+}
+
+// Sqrt sets z to ⌊√x⌋, the largest integer such that z² ≤ x, and returns z.
+// It panics if x is negative.
+func (z *Int) Sqrt(x *Int) *Int {
+	if x.neg {
+		panic("square root of negative number")
+	}
+	z.neg = false
+	z.abs = z.abs.sqrt(x.abs)
+	return z
+}
diff --git a/src/math/big/int_test.go b/src/math/big/int_test.go
new file mode 100644
index 0000000..cb964a4
--- /dev/null
+++ b/src/math/big/int_test.go
@@ -0,0 +1,2002 @@
+// Copyright 2009 The Go Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+package big
+
+import (
+	"bytes"
+	"encoding/hex"
+	"fmt"
+	"internal/testenv"
+	"math"
+	"math/rand"
+	"strconv"
+	"strings"
+	"testing"
+	"testing/quick"
+)
+
+func isNormalized(x *Int) bool {
+	if len(x.abs) == 0 {
+		return !x.neg
+	}
+	// len(x.abs) > 0
+	return x.abs[len(x.abs)-1] != 0
+}
+
+type funZZ func(z, x, y *Int) *Int
+type argZZ struct {
+	z, x, y *Int
+}
+
+var sumZZ = []argZZ{
+	{NewInt(0), NewInt(0), NewInt(0)},
+	{NewInt(1), NewInt(1), NewInt(0)},
+	{NewInt(1111111110), NewInt(123456789), NewInt(987654321)},
+	{NewInt(-1), NewInt(-1), NewInt(0)},
+	{NewInt(864197532), NewInt(-123456789), NewInt(987654321)},
+	{NewInt(-1111111110), NewInt(-123456789), NewInt(-987654321)},
+}
+
+var prodZZ = []argZZ{
+	{NewInt(0), NewInt(0), NewInt(0)},
+	{NewInt(0), NewInt(1), NewInt(0)},
+	{NewInt(1), NewInt(1), NewInt(1)},
+	{NewInt(-991 * 991), NewInt(991), NewInt(-991)},
+	// TODO(gri) add larger products
+}
+
+func TestSignZ(t *testing.T) {
+	var zero Int
+	for _, a := range sumZZ {
+		s := a.z.Sign()
+		e := a.z.Cmp(&zero)
+		if s != e {
+			t.Errorf("got %d; want %d for z = %v", s, e, a.z)
+		}
+	}
+}
+
+func TestSetZ(t *testing.T) {
+	for _, a := range sumZZ {
+		var z Int
+		z.Set(a.z)
+		if !isNormalized(&z) {
+			t.Errorf("%v is not normalized", z)
+		}
+		if (&z).Cmp(a.z) != 0 {
+			t.Errorf("got z = %v; want %v", z, a.z)
+		}
+	}
+}
+
+func TestAbsZ(t *testing.T) {
+	var zero Int
+	for _, a := range sumZZ {
+		var z Int
+		z.Abs(a.z)
+		var e Int
+		e.Set(a.z)
+		if e.Cmp(&zero) < 0 {
+			e.Sub(&zero, &e)
+		}
+		if z.Cmp(&e) != 0 {
+			t.Errorf("got z = %v; want %v", z, e)
+		}
+	}
+}
+
+func testFunZZ(t *testing.T, msg string, f funZZ, a argZZ) {
+	var z Int
+	f(&z, a.x, a.y)
+	if !isNormalized(&z) {
+		t.Errorf("%s%v is not normalized", msg, z)
+	}
+	if (&z).Cmp(a.z) != 0 {
+		t.Errorf("%v %s %v\n\tgot z = %v; want %v", a.x, msg, a.y, &z, a.z)
+	}
+}
+
+func TestSumZZ(t *testing.T) {
+	AddZZ := func(z, x, y *Int) *Int { return z.Add(x, y) }
+	SubZZ := func(z, x, y *Int) *Int { return z.Sub(x, y) }
+	for _, a := range sumZZ {
+		arg := a
+		testFunZZ(t, "AddZZ", AddZZ, arg)
+
+		arg = argZZ{a.z, a.y, a.x}
+		testFunZZ(t, "AddZZ symmetric", AddZZ, arg)
+
+		arg = argZZ{a.x, a.z, a.y}
+		testFunZZ(t, "SubZZ", SubZZ, arg)
+
+		arg = argZZ{a.y, a.z, a.x}
+		testFunZZ(t, "SubZZ symmetric", SubZZ, arg)
+	}
+}
+
+func TestProdZZ(t *testing.T) {
+	MulZZ := func(z, x, y *Int) *Int { return z.Mul(x, y) }
+	for _, a := range prodZZ {
+		arg := a
+		testFunZZ(t, "MulZZ", MulZZ, arg)
+
+		arg = argZZ{a.z, a.y, a.x}
+		testFunZZ(t, "MulZZ symmetric", MulZZ, arg)
+	}
+}
+
+// mulBytes returns x*y via grade school multiplication. Both inputs
+// and the result are assumed to be in big-endian representation (to
+// match the semantics of Int.Bytes and Int.SetBytes).
+func mulBytes(x, y []byte) []byte {
+	z := make([]byte, len(x)+len(y))
+
+	// multiply
+	k0 := len(z) - 1
+	for j := len(y) - 1; j >= 0; j-- {
+		d := int(y[j])
+		if d != 0 {
+			k := k0
+			carry := 0
+			for i := len(x) - 1; i >= 0; i-- {
+				t := int(z[k]) + int(x[i])*d + carry
+				z[k], carry = byte(t), t>>8
+				k--
+			}
+			z[k] = byte(carry)
+		}
+		k0--
+	}
+
+	// normalize (remove leading 0's)
+	i := 0
+	for i < len(z) && z[i] == 0 {
+		i++
+	}
+
+	return z[i:]
+}
+
+func checkMul(a, b []byte) bool {
+	var x, y, z1 Int
+	x.SetBytes(a)
+	y.SetBytes(b)
+	z1.Mul(&x, &y)
+
+	var z2 Int
+	z2.SetBytes(mulBytes(a, b))
+
+	return z1.Cmp(&z2) == 0
+}
+
+func TestMul(t *testing.T) {
+	if err := quick.Check(checkMul, nil); err != nil {
+		t.Error(err)
+	}
+}
+
+var mulRangesZ = []struct {
+	a, b int64
+	prod string
+}{
+	// entirely positive ranges are covered by mulRangesN
+	{-1, 1, "0"},
+	{-2, -1, "2"},
+	{-3, -2, "6"},
+	{-3, -1, "-6"},
+	{1, 3, "6"},
+	{-10, -10, "-10"},
+	{0, -1, "1"},                      // empty range
+	{-1, -100, "1"},                   // empty range
+	{-1, 1, "0"},                      // range includes 0
+	{-1e9, 0, "0"},                    // range includes 0
+	{-1e9, 1e9, "0"},                  // range includes 0
+	{-10, -1, "3628800"},              // 10!
+	{-20, -2, "-2432902008176640000"}, // -20!
+	{-99, -1,
+		"-933262154439441526816992388562667004907159682643816214685929" +
+			"638952175999932299156089414639761565182862536979208272237582" +
+			"511852109168640000000000000000000000", // -99!
+	},
+}
+
+func TestMulRangeZ(t *testing.T) {
+	var tmp Int
+	// test entirely positive ranges
+	for i, r := range mulRangesN {
+		prod := tmp.MulRange(int64(r.a), int64(r.b)).String()
+		if prod != r.prod {
+			t.Errorf("#%da: got %s; want %s", i, prod, r.prod)
+		}
+	}
+	// test other ranges
+	for i, r := range mulRangesZ {
+		prod := tmp.MulRange(r.a, r.b).String()
+		if prod != r.prod {
+			t.Errorf("#%db: got %s; want %s", i, prod, r.prod)
+		}
+	}
+}
+
+func TestBinomial(t *testing.T) {
+	var z Int
+	for _, test := range []struct {
+		n, k int64
+		want string
+	}{
+		{0, 0, "1"},
+		{0, 1, "0"},
+		{1, 0, "1"},
+		{1, 1, "1"},
+		{1, 10, "0"},
+		{4, 0, "1"},
+		{4, 1, "4"},
+		{4, 2, "6"},
+		{4, 3, "4"},
+		{4, 4, "1"},
+		{10, 1, "10"},
+		{10, 9, "10"},
+		{10, 5, "252"},
+		{11, 5, "462"},
+		{11, 6, "462"},
+		{100, 10, "17310309456440"},
+		{100, 90, "17310309456440"},
+		{1000, 10, "263409560461970212832400"},
+		{1000, 990, "263409560461970212832400"},
+	} {
+		if got := z.Binomial(test.n, test.k).String(); got != test.want {
+			t.Errorf("Binomial(%d, %d) = %s; want %s", test.n, test.k, got, test.want)
+		}
+	}
+}
+
+func BenchmarkBinomial(b *testing.B) {
+	var z Int
+	for i := 0; i < b.N; i++ {
+		z.Binomial(1000, 990)
+	}
+}
+
+// Examples from the Go Language Spec, section "Arithmetic operators"
+var divisionSignsTests = []struct {
+	x, y int64
+	q, r int64 // T-division
+	d, m int64 // Euclidean division
+}{
+	{5, 3, 1, 2, 1, 2},
+	{-5, 3, -1, -2, -2, 1},
+	{5, -3, -1, 2, -1, 2},
+	{-5, -3, 1, -2, 2, 1},
+	{1, 2, 0, 1, 0, 1},
+	{8, 4, 2, 0, 2, 0},
+}
+
+func TestDivisionSigns(t *testing.T) {
+	for i, test := range divisionSignsTests {
+		x := NewInt(test.x)
+		y := NewInt(test.y)
+		q := NewInt(test.q)
+		r := NewInt(test.r)
+		d := NewInt(test.d)
+		m := NewInt(test.m)
+
+		q1 := new(Int).Quo(x, y)
+		r1 := new(Int).Rem(x, y)
+		if !isNormalized(q1) {
+			t.Errorf("#%d Quo: %v is not normalized", i, *q1)
+		}
+		if !isNormalized(r1) {
+			t.Errorf("#%d Rem: %v is not normalized", i, *r1)
+		}
+		if q1.Cmp(q) != 0 || r1.Cmp(r) != 0 {
+			t.Errorf("#%d QuoRem: got (%s, %s), want (%s, %s)", i, q1, r1, q, r)
+		}
+
+		q2, r2 := new(Int).QuoRem(x, y, new(Int))
+		if !isNormalized(q2) {
+			t.Errorf("#%d Quo: %v is not normalized", i, *q2)
+		}
+		if !isNormalized(r2) {
+			t.Errorf("#%d Rem: %v is not normalized", i, *r2)
+		}
+		if q2.Cmp(q) != 0 || r2.Cmp(r) != 0 {
+			t.Errorf("#%d QuoRem: got (%s, %s), want (%s, %s)", i, q2, r2, q, r)
+		}
+
+		d1 := new(Int).Div(x, y)
+		m1 := new(Int).Mod(x, y)
+		if !isNormalized(d1) {
+			t.Errorf("#%d Div: %v is not normalized", i, *d1)
+		}
+		if !isNormalized(m1) {
+			t.Errorf("#%d Mod: %v is not normalized", i, *m1)
+		}
+		if d1.Cmp(d) != 0 || m1.Cmp(m) != 0 {
+			t.Errorf("#%d DivMod: got (%s, %s), want (%s, %s)", i, d1, m1, d, m)
+		}
+
+		d2, m2 := new(Int).DivMod(x, y, new(Int))
+		if !isNormalized(d2) {
+			t.Errorf("#%d Div: %v is not normalized", i, *d2)
+		}
+		if !isNormalized(m2) {
+			t.Errorf("#%d Mod: %v is not normalized", i, *m2)
+		}
+		if d2.Cmp(d) != 0 || m2.Cmp(m) != 0 {
+			t.Errorf("#%d DivMod: got (%s, %s), want (%s, %s)", i, d2, m2, d, m)
+		}
+	}
+}
+
+func norm(x nat) nat {
+	i := len(x)
+	for i > 0 && x[i-1] == 0 {
+		i--
+	}
+	return x[:i]
+}
+
+func TestBits(t *testing.T) {
+	for _, test := range []nat{
+		nil,
+		{0},
+		{1},
+		{0, 1, 2, 3, 4},
+		{4, 3, 2, 1, 0},
+		{4, 3, 2, 1, 0, 0, 0, 0},
+	} {
+		var z Int
+		z.neg = true
+		got := z.SetBits(test)
+		want := norm(test)
+		if got.abs.cmp(want) != 0 {
+			t.Errorf("SetBits(%v) = %v; want %v", test, got.abs, want)
+		}
+
+		if got.neg {
+			t.Errorf("SetBits(%v): got negative result", test)
+		}
+
+		bits := nat(z.Bits())
+		if bits.cmp(want) != 0 {
+			t.Errorf("%v.Bits() = %v; want %v", z.abs, bits, want)
+		}
+	}
+}
+
+func checkSetBytes(b []byte) bool {
+	hex1 := hex.EncodeToString(new(Int).SetBytes(b).Bytes())
+	hex2 := hex.EncodeToString(b)
+
+	for len(hex1) < len(hex2) {
+		hex1 = "0" + hex1
+	}
+
+	for len(hex1) > len(hex2) {
+		hex2 = "0" + hex2
+	}
+
+	return hex1 == hex2
+}
+
+func TestSetBytes(t *testing.T) {
+	if err := quick.Check(checkSetBytes, nil); err != nil {
+		t.Error(err)
+	}
+}
+
+func checkBytes(b []byte) bool {
+	// trim leading zero bytes since Bytes() won't return them
+	// (was issue 12231)
+	for len(b) > 0 && b[0] == 0 {
+		b = b[1:]
+	}
+	b2 := new(Int).SetBytes(b).Bytes()
+	return bytes.Equal(b, b2)
+}
+
+func TestBytes(t *testing.T) {
+	if err := quick.Check(checkBytes, nil); err != nil {
+		t.Error(err)
+	}
+}
+
+func checkQuo(x, y []byte) bool {
+	u := new(Int).SetBytes(x)
+	v := new(Int).SetBytes(y)
+
+	if len(v.abs) == 0 {
+		return true
+	}
+
+	r := new(Int)
+	q, r := new(Int).QuoRem(u, v, r)
+
+	if r.Cmp(v) >= 0 {
+		return false
+	}
+
+	uprime := new(Int).Set(q)
+	uprime.Mul(uprime, v)
+	uprime.Add(uprime, r)
+
+	return uprime.Cmp(u) == 0
+}
+
+var quoTests = []struct {
+	x, y string
+	q, r string
+}{
+	{
+		"476217953993950760840509444250624797097991362735329973741718102894495832294430498335824897858659711275234906400899559094370964723884706254265559534144986498357",
+		"9353930466774385905609975137998169297361893554149986716853295022578535724979483772383667534691121982974895531435241089241440253066816724367338287092081996",
+		"50911",
+		"1",
+	},
+	{
+		"11510768301994997771168",
+		"1328165573307167369775",
+		"8",
+		"885443715537658812968",
+	},
+}
+
+func TestQuo(t *testing.T) {
+	if err := quick.Check(checkQuo, nil); err != nil {
+		t.Error(err)
+	}
+
+	for i, test := range quoTests {
+		x, _ := new(Int).SetString(test.x, 10)
+		y, _ := new(Int).SetString(test.y, 10)
+		expectedQ, _ := new(Int).SetString(test.q, 10)
+		expectedR, _ := new(Int).SetString(test.r, 10)
+
+		r := new(Int)
+		q, r := new(Int).QuoRem(x, y, r)
+
+		if q.Cmp(expectedQ) != 0 || r.Cmp(expectedR) != 0 {
+			t.Errorf("#%d got (%s, %s) want (%s, %s)", i, q, r, expectedQ, expectedR)
+		}
+	}
+}
+
+func TestQuoStepD6(t *testing.T) {
+	// See Knuth, Volume 2, section 4.3.1, exercise 21. This code exercises
+	// a code path which only triggers 1 in 10^{-19} cases.
+
+	u := &Int{false, nat{0, 0, 1 + 1<<(_W-1), _M ^ (1 << (_W - 1))}}
+	v := &Int{false, nat{5, 2 + 1<<(_W-1), 1 << (_W - 1)}}
+
+	r := new(Int)
+	q, r := new(Int).QuoRem(u, v, r)
+	const expectedQ64 = "18446744073709551613"
+	const expectedR64 = "3138550867693340382088035895064302439801311770021610913807"
+	const expectedQ32 = "4294967293"
+	const expectedR32 = "39614081266355540837921718287"
+	if q.String() != expectedQ64 && q.String() != expectedQ32 ||
+		r.String() != expectedR64 && r.String() != expectedR32 {
+		t.Errorf("got (%s, %s) want (%s, %s) or (%s, %s)", q, r, expectedQ64, expectedR64, expectedQ32, expectedR32)
+	}
+}
+
+func BenchmarkQuoRem(b *testing.B) {
+	x, _ := new(Int).SetString("153980389784927331788354528594524332344709972855165340650588877572729725338415474372475094155672066328274535240275856844648695200875763869073572078279316458648124537905600131008790701752441155668003033945258023841165089852359980273279085783159654751552359397986180318708491098942831252291841441726305535546071", 0)
+	y, _ := new(Int).SetString("7746362281539803897849273317883545285945243323447099728551653406505888775727297253384154743724750941556720663282745352402758568446486952008757638690735720782793164586481245379056001310087907017524411556680030339452580238411650898523599802732790857831596547515523593979861803187084910989428312522918414417263055355460715745539358014631136245887418412633787074173796862711588221766398229333338511838891484974940633857861775630560092874987828057333663969469797013996401149696897591265769095952887917296740109742927689053276850469671231961384715398038978492733178835452859452433234470997285516534065058887757272972533841547437247509415567206632827453524027585684464869520087576386907357207827931645864812453790560013100879070175244115566800303394525802384116508985235998027327908578315965475155235939798618031870849109894283125229184144172630553554607112725169432413343763989564437170644270643461665184965150423819594083121075825", 0)
+	q := new(Int)
+	r := new(Int)
+
+	b.ResetTimer()
+	for i := 0; i < b.N; i++ {
+		q.QuoRem(y, x, r)
+	}
+}
+
+var bitLenTests = []struct {
+	in  string
+	out int
+}{
+	{"-1", 1},
+	{"0", 0},
+	{"1", 1},
+	{"2", 2},
+	{"4", 3},
+	{"0xabc", 12},
+	{"0x8000", 16},
+	{"0x80000000", 32},
+	{"0x800000000000", 48},
+	{"0x8000000000000000", 64},
+	{"0x80000000000000000000", 80},
+	{"-0x4000000000000000000000", 87},
+}
+
+func TestBitLen(t *testing.T) {
+	for i, test := range bitLenTests {
+		x, ok := new(Int).SetString(test.in, 0)
+		if !ok {
+			t.Errorf("#%d test input invalid: %s", i, test.in)
+			continue
+		}
+
+		if n := x.BitLen(); n != test.out {
+			t.Errorf("#%d got %d want %d", i, n, test.out)
+		}
+	}
+}
+
+var expTests = []struct {
+	x, y, m string
+	out     string
+}{
+	// y <= 0
+	{"0", "0", "", "1"},
+	{"1", "0", "", "1"},
+	{"-10", "0", "", "1"},
+	{"1234", "-1", "", "1"},
+	{"1234", "-1", "0", "1"},
+	{"17", "-100", "1234", "865"},
+	{"2", "-100", "1234", ""},
+
+	// m == 1
+	{"0", "0", "1", "0"},
+	{"1", "0", "1", "0"},
+	{"-10", "0", "1", "0"},
+	{"1234", "-1", "1", "0"},
+
+	// misc
+	{"5", "1", "3", "2"},
+	{"5", "-7", "", "1"},
+	{"-5", "-7", "", "1"},
+	{"5", "0", "", "1"},
+	{"-5", "0", "", "1"},
+	{"5", "1", "", "5"},
+	{"-5", "1", "", "-5"},
+	{"-5", "1", "7", "2"},
+	{"-2", "3", "2", "0"},
+	{"5", "2", "", "25"},
+	{"1", "65537", "2", "1"},
+	{"0x8000000000000000", "2", "", "0x40000000000000000000000000000000"},
+	{"0x8000000000000000", "2", "6719", "4944"},
+	{"0x8000000000000000", "3", "6719", "5447"},
+	{"0x8000000000000000", "1000", "6719", "1603"},
+	{"0x8000000000000000", "1000000", "6719", "3199"},
+	{"0x8000000000000000", "-1000000", "6719", "3663"}, // 3663 = ModInverse(3199, 6719) Issue #25865
+
+	{"0xffffffffffffffffffffffffffffffff", "0x12345678123456781234567812345678123456789", "0x01112222333344445555666677778889", "0x36168FA1DB3AAE6C8CE647E137F97A"},
+
+	{
+		"2938462938472983472983659726349017249287491026512746239764525612965293865296239471239874193284792387498274256129746192347",
+		"298472983472983471903246121093472394872319615612417471234712061",
+		"29834729834729834729347290846729561262544958723956495615629569234729836259263598127342374289365912465901365498236492183464",
+		"23537740700184054162508175125554701713153216681790245129157191391322321508055833908509185839069455749219131480588829346291",
+	},
+	// test case for issue 8822
+	{
+		"11001289118363089646017359372117963499250546375269047542777928006103246876688756735760905680604646624353196869572752623285140408755420374049317646428185270079555372763503115646054602867593662923894140940837479507194934267532831694565516466765025434902348314525627418515646588160955862839022051353653052947073136084780742729727874803457643848197499548297570026926927502505634297079527299004267769780768565695459945235586892627059178884998772989397505061206395455591503771677500931269477503508150175717121828518985901959919560700853226255420793148986854391552859459511723547532575574664944815966793196961286234040892865",
+		"0xB08FFB20760FFED58FADA86DFEF71AD72AA0FA763219618FE022C197E54708BB1191C66470250FCE8879487507CEE41381CA4D932F81C2B3F1AB20B539D50DCD",
+		"0xAC6BDB41324A9A9BF166DE5E1389582FAF72B6651987EE07FC3192943DB56050A37329CBB4A099ED8193E0757767A13DD52312AB4B03310DCD7F48A9DA04FD50E8083969EDB767B0CF6095179A163AB3661A05FBD5FAAAE82918A9962F0B93B855F97993EC975EEAA80D740ADBF4FF747359D041D5C33EA71D281E446B14773BCA97B43A23FB801676BD207A436C6481F1D2B9078717461A5B9D32E688F87748544523B524B0D57D5EA77A2775D2ECFA032CFBDBF52FB3786160279004E57AE6AF874E7303CE53299CCC041C7BC308D82A5698F3A8D0C38271AE35F8E9DBFBB694B5C803D89F7AE435DE236D525F54759B65E372FCD68EF20FA7111F9E4AFF73",
+		"21484252197776302499639938883777710321993113097987201050501182909581359357618579566746556372589385361683610524730509041328855066514963385522570894839035884713051640171474186548713546686476761306436434146475140156284389181808675016576845833340494848283681088886584219750554408060556769486628029028720727393293111678826356480455433909233520504112074401376133077150471237549474149190242010469539006449596611576612573955754349042329130631128234637924786466585703488460540228477440853493392086251021228087076124706778899179648655221663765993962724699135217212118535057766739392069738618682722216712319320435674779146070442",
+	},
+	{
+		"-0x1BCE04427D8032319A89E5C4136456671AC620883F2C4139E57F91307C485AD2D6204F4F87A58262652DB5DBBAC72B0613E51B835E7153BEC6068F5C8D696B74DBD18FEC316AEF73985CF0475663208EB46B4F17DD9DA55367B03323E5491A70997B90C059FB34809E6EE55BCFBD5F2F52233BFE62E6AA9E4E26A1D4C2439883D14F2633D55D8AA66A1ACD5595E778AC3A280517F1157989E70C1A437B849F1877B779CC3CDDEDE2DAA6594A6C66D181A00A5F777EE60596D8773998F6E988DEAE4CCA60E4DDCF9590543C89F74F603259FCAD71660D30294FBBE6490300F78A9D63FA660DC9417B8B9DDA28BEB3977B621B988E23D4D954F322C3540541BC649ABD504C50FADFD9F0987D58A2BF689313A285E773FF02899A6EF887D1D4A0D2",
+		"0xB08FFB20760FFED58FADA86DFEF71AD72AA0FA763219618FE022C197E54708BB1191C66470250FCE8879487507CEE41381CA4D932F81C2B3F1AB20B539D50DCD",
+		"0xAC6BDB41324A9A9BF166DE5E1389582FAF72B6651987EE07FC3192943DB56050A37329CBB4A099ED8193E0757767A13DD52312AB4B03310DCD7F48A9DA04FD50E8083969EDB767B0CF6095179A163AB3661A05FBD5FAAAE82918A9962F0B93B855F97993EC975EEAA80D740ADBF4FF747359D041D5C33EA71D281E446B14773BCA97B43A23FB801676BD207A436C6481F1D2B9078717461A5B9D32E688F87748544523B524B0D57D5EA77A2775D2ECFA032CFBDBF52FB3786160279004E57AE6AF874E7303CE53299CCC041C7BC308D82A5698F3A8D0C38271AE35F8E9DBFBB694B5C803D89F7AE435DE236D525F54759B65E372FCD68EF20FA7111F9E4AFF73",
+		"21484252197776302499639938883777710321993113097987201050501182909581359357618579566746556372589385361683610524730509041328855066514963385522570894839035884713051640171474186548713546686476761306436434146475140156284389181808675016576845833340494848283681088886584219750554408060556769486628029028720727393293111678826356480455433909233520504112074401376133077150471237549474149190242010469539006449596611576612573955754349042329130631128234637924786466585703488460540228477440853493392086251021228087076124706778899179648655221663765993962724699135217212118535057766739392069738618682722216712319320435674779146070442",
+	},
+
+	// test cases for issue 13907
+	{"0xffffffff00000001", "0xffffffff00000001", "0xffffffff00000001", "0"},
+	{"0xffffffffffffffff00000001", "0xffffffffffffffff00000001", "0xffffffffffffffff00000001", "0"},
+	{"0xffffffffffffffffffffffff00000001", "0xffffffffffffffffffffffff00000001", "0xffffffffffffffffffffffff00000001", "0"},
+	{"0xffffffffffffffffffffffffffffffff00000001", "0xffffffffffffffffffffffffffffffff00000001", "0xffffffffffffffffffffffffffffffff00000001", "0"},
+
+	{
+		"2",
+		"0xB08FFB20760FFED58FADA86DFEF71AD72AA0FA763219618FE022C197E54708BB1191C66470250FCE8879487507CEE41381CA4D932F81C2B3F1AB20B539D50DCD",
+		"0xAC6BDB41324A9A9BF166DE5E1389582FAF72B6651987EE07FC3192943DB56050A37329CBB4A099ED8193E0757767A13DD52312AB4B03310DCD7F48A9DA04FD50E8083969EDB767B0CF6095179A163AB3661A05FBD5FAAAE82918A9962F0B93B855F97993EC975EEAA80D740ADBF4FF747359D041D5C33EA71D281E446B14773BCA97B43A23FB801676BD207A436C6481F1D2B9078717461A5B9D32E688F87748544523B524B0D57D5EA77A2775D2ECFA032CFBDBF52FB3786160279004E57AE6AF874E7303CE53299CCC041C7BC308D82A5698F3A8D0C38271AE35F8E9DBFBB694B5C803D89F7AE435DE236D525F54759B65E372FCD68EF20FA7111F9E4AFF73", // odd
+		"0x6AADD3E3E424D5B713FCAA8D8945B1E055166132038C57BBD2D51C833F0C5EA2007A2324CE514F8E8C2F008A2F36F44005A4039CB55830986F734C93DAF0EB4BAB54A6A8C7081864F44346E9BC6F0A3EB9F2C0146A00C6A05187D0C101E1F2D038CDB70CB5E9E05A2D188AB6CBB46286624D4415E7D4DBFAD3BCC6009D915C406EED38F468B940F41E6BEDC0430DD78E6F19A7DA3A27498A4181E24D738B0072D8F6ADB8C9809A5B033A09785814FD9919F6EF9F83EEA519BEC593855C4C10CBEEC582D4AE0792158823B0275E6AEC35242740468FAF3D5C60FD1E376362B6322F78B7ED0CA1C5BBCD2B49734A56C0967A1D01A100932C837B91D592CE08ABFF",
+	},
+	{
+		"2",
+		"0xB08FFB20760FFED58FADA86DFEF71AD72AA0FA763219618FE022C197E54708BB1191C66470250FCE8879487507CEE41381CA4D932F81C2B3F1AB20B539D50DCD",
+		"0xAC6BDB41324A9A9BF166DE5E1389582FAF72B6651987EE07FC3192943DB56050A37329CBB4A099ED8193E0757767A13DD52312AB4B03310DCD7F48A9DA04FD50E8083969EDB767B0CF6095179A163AB3661A05FBD5FAAAE82918A9962F0B93B855F97993EC975EEAA80D740ADBF4FF747359D041D5C33EA71D281E446B14773BCA97B43A23FB801676BD207A436C6481F1D2B9078717461A5B9D32E688F87748544523B524B0D57D5EA77A2775D2ECFA032CFBDBF52FB3786160279004E57AE6AF874E7303CE53299CCC041C7BC308D82A5698F3A8D0C38271AE35F8E9DBFBB694B5C803D89F7AE435DE236D525F54759B65E372FCD68EF20FA7111F9E4AFF72", // even
+		"0x7858794B5897C29F4ED0B40913416AB6C48588484E6A45F2ED3E26C941D878E923575AAC434EE2750E6439A6976F9BB4D64CEDB2A53CE8D04DD48CADCDF8E46F22747C6B81C6CEA86C0D873FBF7CEF262BAAC43A522BD7F32F3CDAC52B9337C77B3DCFB3DB3EDD80476331E82F4B1DF8EFDC1220C92656DFC9197BDC1877804E28D928A2A284B8DED506CBA304435C9D0133C246C98A7D890D1DE60CBC53A024361DA83A9B8775019083D22AC6820ED7C3C68F8E801DD4EC779EE0A05C6EB682EF9840D285B838369BA7E148FA27691D524FAEAF7C6ECE2A4B99A294B9F2C241857B5B90CC8BFFCFCF18DFA7D676131D5CD3855A5A3E8EBFA0CDFADB4D198B4A",
+	},
+}
+
+func TestExp(t *testing.T) {
+	for i, test := range expTests {
+		x, ok1 := new(Int).SetString(test.x, 0)
+		y, ok2 := new(Int).SetString(test.y, 0)
+
+		var ok3, ok4 bool
+		var out, m *Int
+
+		if len(test.out) == 0 {
+			out, ok3 = nil, true
+		} else {
+			out, ok3 = new(Int).SetString(test.out, 0)
+		}
+
+		if len(test.m) == 0 {
+			m, ok4 = nil, true
+		} else {
+			m, ok4 = new(Int).SetString(test.m, 0)
+		}
+
+		if !ok1 || !ok2 || !ok3 || !ok4 {
+			t.Errorf("#%d: error in input", i)
+			continue
+		}
+
+		z1 := new(Int).Exp(x, y, m)
+		if z1 != nil && !isNormalized(z1) {
+			t.Errorf("#%d: %v is not normalized", i, *z1)
+		}
+		if !(z1 == nil && out == nil || z1.Cmp(out) == 0) {
+			t.Errorf("#%d: got %x want %x", i, z1, out)
+		}
+
+		if m == nil {
+			// The result should be the same as for m == 0;
+			// specifically, there should be no div-zero panic.
+			m = &Int{abs: nat{}} // m != nil && len(m.abs) == 0
+			z2 := new(Int).Exp(x, y, m)
+			if z2.Cmp(z1) != 0 {
+				t.Errorf("#%d: got %x want %x", i, z2, z1)
+			}
+		}
+	}
+}
+
+func BenchmarkExp(b *testing.B) {
+	x, _ := new(Int).SetString("11001289118363089646017359372117963499250546375269047542777928006103246876688756735760905680604646624353196869572752623285140408755420374049317646428185270079555372763503115646054602867593662923894140940837479507194934267532831694565516466765025434902348314525627418515646588160955862839022051353653052947073136084780742729727874803457643848197499548297570026926927502505634297079527299004267769780768565695459945235586892627059178884998772989397505061206395455591503771677500931269477503508150175717121828518985901959919560700853226255420793148986854391552859459511723547532575574664944815966793196961286234040892865", 0)
+	y, _ := new(Int).SetString("0xAC6BDB41324A9A9BF166DE5E1389582FAF72B6651987EE07FC3192943DB56050A37329CBB4A099ED8193E0757767A13DD52312AB4B03310DCD7F48A9DA04FD50E8083969EDB767B0CF6095179A163AB3661A05FBD5FAAAE82918A9962F0B93B855F97993EC975EEAA80D740ADBF4FF747359D041D5C33EA71D281E446B14773BCA97B43A23FB801676BD207A436C6481F1D2B9078717461A5B9D32E688F87748544523B524B0D57D5EA77A2775D2ECFA032CFBDBF52FB3786160279004E57AE6AF874E7303CE53299CCC041C7BC308D82A5698F3A8D0C38271AE35F8E9DBFBB694B5C803D89F7AE435DE236D525F54759B65E372FCD68EF20FA7111F9E4AFF72", 0)
+	n, _ := new(Int).SetString("0xAC6BDB41324A9A9BF166DE5E1389582FAF72B6651987EE07FC3192943DB56050A37329CBB4A099ED8193E0757767A13DD52312AB4B03310DCD7F48A9DA04FD50E8083969EDB767B0CF6095179A163AB3661A05FBD5FAAAE82918A9962F0B93B855F97993EC975EEAA80D740ADBF4FF747359D041D5C33EA71D281E446B14773BCA97B43A23FB801676BD207A436C6481F1D2B9078717461A5B9D32E688F87748544523B524B0D57D5EA77A2775D2ECFA032CFBDBF52FB3786160279004E57AE6AF874E7303CE53299CCC041C7BC308D82A5698F3A8D0C38271AE35F8E9DBFBB694B5C803D89F7AE435DE236D525F54759B65E372FCD68EF20FA7111F9E4AFF73", 0)
+	out := new(Int)
+	for i := 0; i < b.N; i++ {
+		out.Exp(x, y, n)
+	}
+}
+
+func BenchmarkExpMont(b *testing.B) {
+	x, _ := new(Int).SetString("297778224889315382157302278696111964193", 0)
+	y, _ := new(Int).SetString("2548977943381019743024248146923164919440527843026415174732254534318292492375775985739511369575861449426580651447974311336267954477239437734832604782764979371984246675241012538135715981292390886872929238062252506842498360562303324154310849745753254532852868768268023732398278338025070694508489163836616810661033068070127919590264734220833816416141878688318329193389865030063416339367925710474801991305827284114894677717927892032165200876093838921477120036402410731159852999623461591709308405270748511350289172153076023215", 0)
+	var mods = []struct {
+		name string
+		val  string
+	}{
+		{"Odd", "0x82828282828200FFFF28FF2B218281FF82828282828200FFFF28FF2B218281FF82828282828200FFFF28FF2B218281FF"},
+		{"Even1", "0x82828282828200FFFF28FF2B218281FF82828282828200FFFF28FF2B218281FF82828282828200FFFF28FF2B218281FE"},
+		{"Even2", "0x82828282828200FFFF28FF2B218281FF82828282828200FFFF28FF2B218281FF82828282828200FFFF28FF2B218281FC"},
+		{"Even3", "0x82828282828200FFFF28FF2B218281FF82828282828200FFFF28FF2B218281FF82828282828200FFFF28FF2B218281F8"},
+		{"Even4", "0x82828282828200FFFF28FF2B218281FF82828282828200FFFF28FF2B218281FF82828282828200FFFF28FF2B218281F0"},
+		{"Even8", "0x82828282828200FFFF28FF2B218281FF82828282828200FFFF28FF2B218281FF82828282828200FFFF28FF2B21828100"},
+		{"Even32", "0x82828282828200FFFF28FF2B218281FF82828282828200FFFF28FF2B218281FF82828282828200FFFF28FF2B00000000"},
+		{"Even64", "0x82828282828200FFFF28FF2B218281FF82828282828200FFFF28FF2B218281FF82828282828200FF0000000000000000"},
+		{"Even96", "0x82828282828200FFFF28FF2B218281FF82828282828200FFFF28FF2B218281FF82828283000000000000000000000000"},
+		{"Even128", "0x82828282828200FFFF28FF2B218281FF82828282828200FFFF28FF2B218281FF00000000000000000000000000000000"},
+		{"Even255", "0x82828282828200FFFF28FF2B218281FF8000000000000000000000000000000000000000000000000000000000000000"},
+		{"SmallEven1", "0x7E"},
+		{"SmallEven2", "0x7C"},
+		{"SmallEven3", "0x78"},
+		{"SmallEven4", "0x70"},
+	}
+	for _, mod := range mods {
+		n, _ := new(Int).SetString(mod.val, 0)
+		out := new(Int)
+		b.Run(mod.name, func(b *testing.B) {
+			b.ReportAllocs()
+			for i := 0; i < b.N; i++ {
+				out.Exp(x, y, n)
+			}
+		})
+	}
+}
+
+func BenchmarkExp2(b *testing.B) {
+	x, _ := new(Int).SetString("2", 0)
+	y, _ := new(Int).SetString("0xAC6BDB41324A9A9BF166DE5E1389582FAF72B6651987EE07FC3192943DB56050A37329CBB4A099ED8193E0757767A13DD52312AB4B03310DCD7F48A9DA04FD50E8083969EDB767B0CF6095179A163AB3661A05FBD5FAAAE82918A9962F0B93B855F97993EC975EEAA80D740ADBF4FF747359D041D5C33EA71D281E446B14773BCA97B43A23FB801676BD207A436C6481F1D2B9078717461A5B9D32E688F87748544523B524B0D57D5EA77A2775D2ECFA032CFBDBF52FB3786160279004E57AE6AF874E7303CE53299CCC041C7BC308D82A5698F3A8D0C38271AE35F8E9DBFBB694B5C803D89F7AE435DE236D525F54759B65E372FCD68EF20FA7111F9E4AFF72", 0)
+	n, _ := new(Int).SetString("0xAC6BDB41324A9A9BF166DE5E1389582FAF72B6651987EE07FC3192943DB56050A37329CBB4A099ED8193E0757767A13DD52312AB4B03310DCD7F48A9DA04FD50E8083969EDB767B0CF6095179A163AB3661A05FBD5FAAAE82918A9962F0B93B855F97993EC975EEAA80D740ADBF4FF747359D041D5C33EA71D281E446B14773BCA97B43A23FB801676BD207A436C6481F1D2B9078717461A5B9D32E688F87748544523B524B0D57D5EA77A2775D2ECFA032CFBDBF52FB3786160279004E57AE6AF874E7303CE53299CCC041C7BC308D82A5698F3A8D0C38271AE35F8E9DBFBB694B5C803D89F7AE435DE236D525F54759B65E372FCD68EF20FA7111F9E4AFF73", 0)
+	out := new(Int)
+	for i := 0; i < b.N; i++ {
+		out.Exp(x, y, n)
+	}
+}
+
+func checkGcd(aBytes, bBytes []byte) bool {
+	x := new(Int)
+	y := new(Int)
+	a := new(Int).SetBytes(aBytes)
+	b := new(Int).SetBytes(bBytes)
+
+	d := new(Int).GCD(x, y, a, b)
+	x.Mul(x, a)
+	y.Mul(y, b)
+	x.Add(x, y)
+
+	return x.Cmp(d) == 0
+}
+
+// euclidExtGCD is a reference implementation of Euclid's
+// extended GCD algorithm for testing against optimized algorithms.
+// Requirements: a, b > 0
+func euclidExtGCD(a, b *Int) (g, x, y *Int) {
+	A := new(Int).Set(a)
+	B := new(Int).Set(b)
+
+	// A = Ua*a + Va*b
+	// B = Ub*a + Vb*b
+	Ua := new(Int).SetInt64(1)
+	Va := new(Int)
+
+	Ub := new(Int)
+	Vb := new(Int).SetInt64(1)
+
+	q := new(Int)
+	temp := new(Int)
+
+	r := new(Int)
+	for len(B.abs) > 0 {
+		q, r = q.QuoRem(A, B, r)
+
+		A, B, r = B, r, A
+
+		// Ua, Ub = Ub, Ua-q*Ub
+		temp.Set(Ub)
+		Ub.Mul(Ub, q)
+		Ub.Sub(Ua, Ub)
+		Ua.Set(temp)
+
+		// Va, Vb = Vb, Va-q*Vb
+		temp.Set(Vb)
+		Vb.Mul(Vb, q)
+		Vb.Sub(Va, Vb)
+		Va.Set(temp)
+	}
+	return A, Ua, Va
+}
+
+func checkLehmerGcd(aBytes, bBytes []byte) bool {
+	a := new(Int).SetBytes(aBytes)
+	b := new(Int).SetBytes(bBytes)
+
+	if a.Sign() <= 0 || b.Sign() <= 0 {
+		return true // can only test positive arguments
+	}
+
+	d := new(Int).lehmerGCD(nil, nil, a, b)
+	d0, _, _ := euclidExtGCD(a, b)
+
+	return d.Cmp(d0) == 0
+}
+
+func checkLehmerExtGcd(aBytes, bBytes []byte) bool {
+	a := new(Int).SetBytes(aBytes)
+	b := new(Int).SetBytes(bBytes)
+	x := new(Int)
+	y := new(Int)
+
+	if a.Sign() <= 0 || b.Sign() <= 0 {
+		return true // can only test positive arguments
+	}
+
+	d := new(Int).lehmerGCD(x, y, a, b)
+	d0, x0, y0 := euclidExtGCD(a, b)
+
+	return d.Cmp(d0) == 0 && x.Cmp(x0) == 0 && y.Cmp(y0) == 0
+}
+
+var gcdTests = []struct {
+	d, x, y, a, b string
+}{
+	// a <= 0 || b <= 0
+	{"0", "0", "0", "0", "0"},
+	{"7", "0", "1", "0", "7"},
+	{"7", "0", "-1", "0", "-7"},
+	{"11", "1", "0", "11", "0"},
+	{"7", "-1", "-2", "-77", "35"},
+	{"935", "-3", "8", "64515", "24310"},
+	{"935", "-3", "-8", "64515", "-24310"},
+	{"935", "3", "-8", "-64515", "-24310"},
+
+	{"1", "-9", "47", "120", "23"},
+	{"7", "1", "-2", "77", "35"},
+	{"935", "-3", "8", "64515", "24310"},
+	{"935000000000000000", "-3", "8", "64515000000000000000", "24310000000000000000"},
+	{"1", "-221", "22059940471369027483332068679400581064239780177629666810348940098015901108344", "98920366548084643601728869055592650835572950932266967461790948584315647051443", "991"},
+}
+
+func testGcd(t *testing.T, d, x, y, a, b *Int) {
+	var X *Int
+	if x != nil {
+		X = new(Int)
+	}
+	var Y *Int
+	if y != nil {
+		Y = new(Int)
+	}
+
+	D := new(Int).GCD(X, Y, a, b)
+	if D.Cmp(d) != 0 {
+		t.Errorf("GCD(%s, %s, %s, %s): got d = %s, want %s", x, y, a, b, D, d)
+	}
+	if x != nil && X.Cmp(x) != 0 {
+		t.Errorf("GCD(%s, %s, %s, %s): got x = %s, want %s", x, y, a, b, X, x)
+	}
+	if y != nil && Y.Cmp(y) != 0 {
+		t.Errorf("GCD(%s, %s, %s, %s): got y = %s, want %s", x, y, a, b, Y, y)
+	}
+
+	// check results in presence of aliasing (issue #11284)
+	a2 := new(Int).Set(a)
+	b2 := new(Int).Set(b)
+	a2.GCD(X, Y, a2, b2) // result is same as 1st argument
+	if a2.Cmp(d) != 0 {
+		t.Errorf("aliased z = a GCD(%s, %s, %s, %s): got d = %s, want %s", x, y, a, b, a2, d)
+	}
+	if x != nil && X.Cmp(x) != 0 {
+		t.Errorf("aliased z = a GCD(%s, %s, %s, %s): got x = %s, want %s", x, y, a, b, X, x)
+	}
+	if y != nil && Y.Cmp(y) != 0 {
+		t.Errorf("aliased z = a GCD(%s, %s, %s, %s): got y = %s, want %s", x, y, a, b, Y, y)
+	}
+
+	a2 = new(Int).Set(a)
+	b2 = new(Int).Set(b)
+	b2.GCD(X, Y, a2, b2) // result is same as 2nd argument
+	if b2.Cmp(d) != 0 {
+		t.Errorf("aliased z = b GCD(%s, %s, %s, %s): got d = %s, want %s", x, y, a, b, b2, d)
+	}
+	if x != nil && X.Cmp(x) != 0 {
+		t.Errorf("aliased z = b GCD(%s, %s, %s, %s): got x = %s, want %s", x, y, a, b, X, x)
+	}
+	if y != nil && Y.Cmp(y) != 0 {
+		t.Errorf("aliased z = b GCD(%s, %s, %s, %s): got y = %s, want %s", x, y, a, b, Y, y)
+	}
+
+	a2 = new(Int).Set(a)
+	b2 = new(Int).Set(b)
+	D = new(Int).GCD(a2, b2, a2, b2) // x = a, y = b
+	if D.Cmp(d) != 0 {
+		t.Errorf("aliased x = a, y = b GCD(%s, %s, %s, %s): got d = %s, want %s", x, y, a, b, D, d)
+	}
+	if x != nil && a2.Cmp(x) != 0 {
+		t.Errorf("aliased x = a, y = b GCD(%s, %s, %s, %s): got x = %s, want %s", x, y, a, b, a2, x)
+	}
+	if y != nil && b2.Cmp(y) != 0 {
+		t.Errorf("aliased x = a, y = b GCD(%s, %s, %s, %s): got y = %s, want %s", x, y, a, b, b2, y)
+	}
+
+	a2 = new(Int).Set(a)
+	b2 = new(Int).Set(b)
+	D = new(Int).GCD(b2, a2, a2, b2) // x = b, y = a
+	if D.Cmp(d) != 0 {
+		t.Errorf("aliased x = b, y = a GCD(%s, %s, %s, %s): got d = %s, want %s", x, y, a, b, D, d)
+	}
+	if x != nil && b2.Cmp(x) != 0 {
+		t.Errorf("aliased x = b, y = a GCD(%s, %s, %s, %s): got x = %s, want %s", x, y, a, b, b2, x)
+	}
+	if y != nil && a2.Cmp(y) != 0 {
+		t.Errorf("aliased x = b, y = a GCD(%s, %s, %s, %s): got y = %s, want %s", x, y, a, b, a2, y)
+	}
+}
+
+func TestGcd(t *testing.T) {
+	for _, test := range gcdTests {
+		d, _ := new(Int).SetString(test.d, 0)
+		x, _ := new(Int).SetString(test.x, 0)
+		y, _ := new(Int).SetString(test.y, 0)
+		a, _ := new(Int).SetString(test.a, 0)
+		b, _ := new(Int).SetString(test.b, 0)
+
+		testGcd(t, d, nil, nil, a, b)
+		testGcd(t, d, x, nil, a, b)
+		testGcd(t, d, nil, y, a, b)
+		testGcd(t, d, x, y, a, b)
+	}
+
+	if err := quick.Check(checkGcd, nil); err != nil {
+		t.Error(err)
+	}
+
+	if err := quick.Check(checkLehmerGcd, nil); err != nil {
+		t.Error(err)
+	}
+
+	if err := quick.Check(checkLehmerExtGcd, nil); err != nil {
+		t.Error(err)
+	}
+}
+
+type intShiftTest struct {
+	in    string
+	shift uint
+	out   string
+}
+
+var rshTests = []intShiftTest{
+	{"0", 0, "0"},
+	{"-0", 0, "0"},
+	{"0", 1, "0"},
+	{"0", 2, "0"},
+	{"1", 0, "1"},
+	{"1", 1, "0"},
+	{"1", 2, "0"},
+	{"2", 0, "2"},
+	{"2", 1, "1"},
+	{"-1", 0, "-1"},
+	{"-1", 1, "-1"},
+	{"-1", 10, "-1"},
+	{"-100", 2, "-25"},
+	{"-100", 3, "-13"},
+	{"-100", 100, "-1"},
+	{"4294967296", 0, "4294967296"},
+	{"4294967296", 1, "2147483648"},
+	{"4294967296", 2, "1073741824"},
+	{"18446744073709551616", 0, "18446744073709551616"},
+	{"18446744073709551616", 1, "9223372036854775808"},
+	{"18446744073709551616", 2, "4611686018427387904"},
+	{"18446744073709551616", 64, "1"},
+	{"340282366920938463463374607431768211456", 64, "18446744073709551616"},
+	{"340282366920938463463374607431768211456", 128, "1"},
+}
+
+func TestRsh(t *testing.T) {
+	for i, test := range rshTests {
+		in, _ := new(Int).SetString(test.in, 10)
+		expected, _ := new(Int).SetString(test.out, 10)
+		out := new(Int).Rsh(in, test.shift)
+
+		if !isNormalized(out) {
+			t.Errorf("#%d: %v is not normalized", i, *out)
+		}
+		if out.Cmp(expected) != 0 {
+			t.Errorf("#%d: got %s want %s", i, out, expected)
+		}
+	}
+}
+
+func TestRshSelf(t *testing.T) {
+	for i, test := range rshTests {
+		z, _ := new(Int).SetString(test.in, 10)
+		expected, _ := new(Int).SetString(test.out, 10)
+		z.Rsh(z, test.shift)
+
+		if !isNormalized(z) {
+			t.Errorf("#%d: %v is not normalized", i, *z)
+		}
+		if z.Cmp(expected) != 0 {
+			t.Errorf("#%d: got %s want %s", i, z, expected)
+		}
+	}
+}
+
+var lshTests = []intShiftTest{
+	{"0", 0, "0"},
+	{"0", 1, "0"},
+	{"0", 2, "0"},
+	{"1", 0, "1"},
+	{"1", 1, "2"},
+	{"1", 2, "4"},
+	{"2", 0, "2"},
+	{"2", 1, "4"},
+	{"2", 2, "8"},
+	{"-87", 1, "-174"},
+	{"4294967296", 0, "4294967296"},
+	{"4294967296", 1, "8589934592"},
+	{"4294967296", 2, "17179869184"},
+	{"18446744073709551616", 0, "18446744073709551616"},
+	{"9223372036854775808", 1, "18446744073709551616"},
+	{"4611686018427387904", 2, "18446744073709551616"},
+	{"1", 64, "18446744073709551616"},
+	{"18446744073709551616", 64, "340282366920938463463374607431768211456"},
+	{"1", 128, "340282366920938463463374607431768211456"},
+}
+
+func TestLsh(t *testing.T) {
+	for i, test := range lshTests {
+		in, _ := new(Int).SetString(test.in, 10)
+		expected, _ := new(Int).SetString(test.out, 10)
+		out := new(Int).Lsh(in, test.shift)
+
+		if !isNormalized(out) {
+			t.Errorf("#%d: %v is not normalized", i, *out)
+		}
+		if out.Cmp(expected) != 0 {
+			t.Errorf("#%d: got %s want %s", i, out, expected)
+		}
+	}
+}
+
+func TestLshSelf(t *testing.T) {
+	for i, test := range lshTests {
+		z, _ := new(Int).SetString(test.in, 10)
+		expected, _ := new(Int).SetString(test.out, 10)
+		z.Lsh(z, test.shift)
+
+		if !isNormalized(z) {
+			t.Errorf("#%d: %v is not normalized", i, *z)
+		}
+		if z.Cmp(expected) != 0 {
+			t.Errorf("#%d: got %s want %s", i, z, expected)
+		}
+	}
+}
+
+func TestLshRsh(t *testing.T) {
+	for i, test := range rshTests {
+		in, _ := new(Int).SetString(test.in, 10)
+		out := new(Int).Lsh(in, test.shift)
+		out = out.Rsh(out, test.shift)
+
+		if !isNormalized(out) {
+			t.Errorf("#%d: %v is not normalized", i, *out)
+		}
+		if in.Cmp(out) != 0 {
+			t.Errorf("#%d: got %s want %s", i, out, in)
+		}
+	}
+	for i, test := range lshTests {
+		in, _ := new(Int).SetString(test.in, 10)
+		out := new(Int).Lsh(in, test.shift)
+		out.Rsh(out, test.shift)
+
+		if !isNormalized(out) {
+			t.Errorf("#%d: %v is not normalized", i, *out)
+		}
+		if in.Cmp(out) != 0 {
+			t.Errorf("#%d: got %s want %s", i, out, in)
+		}
+	}
+}
+
+// Entries must be sorted by value in ascending order.
+var cmpAbsTests = []string{
+	"0",
+	"1",
+	"2",
+	"10",
+	"10000000",
+	"2783678367462374683678456387645876387564783686583485",
+	"2783678367462374683678456387645876387564783686583486",
+	"32957394867987420967976567076075976570670947609750670956097509670576075067076027578341538",
+}
+
+func TestCmpAbs(t *testing.T) {
+	values := make([]*Int, len(cmpAbsTests))
+	var prev *Int
+	for i, s := range cmpAbsTests {
+		x, ok := new(Int).SetString(s, 0)
+		if !ok {
+			t.Fatalf("SetString(%s, 0) failed", s)
+		}
+		if prev != nil && prev.Cmp(x) >= 0 {
+			t.Fatal("cmpAbsTests entries not sorted in ascending order")
+		}
+		values[i] = x
+		prev = x
+	}
+
+	for i, x := range values {
+		for j, y := range values {
+			// try all combinations of signs for x, y
+			for k := 0; k < 4; k++ {
+				var a, b Int
+				a.Set(x)
+				b.Set(y)
+				if k&1 != 0 {
+					a.Neg(&a)
+				}
+				if k&2 != 0 {
+					b.Neg(&b)
+				}
+
+				got := a.CmpAbs(&b)
+				want := 0
+				switch {
+				case i > j:
+					want = 1
+				case i < j:
+					want = -1
+				}
+				if got != want {
+					t.Errorf("absCmp |%s|, |%s|: got %d; want %d", &a, &b, got, want)
+				}
+			}
+		}
+	}
+}
+
+func TestIntCmpSelf(t *testing.T) {
+	for _, s := range cmpAbsTests {
+		x, ok := new(Int).SetString(s, 0)
+		if !ok {
+			t.Fatalf("SetString(%s, 0) failed", s)
+		}
+		got := x.Cmp(x)
+		want := 0
+		if got != want {
+			t.Errorf("x = %s: x.Cmp(x): got %d; want %d", x, got, want)
+		}
+	}
+}
+
+var int64Tests = []string{
+	// int64
+	"0",
+	"1",
+	"-1",
+	"4294967295",
+	"-4294967295",
+	"4294967296",
+	"-4294967296",
+	"9223372036854775807",
+	"-9223372036854775807",
+	"-9223372036854775808",
+
+	// not int64
+	"0x8000000000000000",
+	"-0x8000000000000001",
+	"38579843757496759476987459679745",
+	"-38579843757496759476987459679745",
+}
+
+func TestInt64(t *testing.T) {
+	for _, s := range int64Tests {
+		var x Int
+		_, ok := x.SetString(s, 0)
+		if !ok {
+			t.Errorf("SetString(%s, 0) failed", s)
+			continue
+		}
+
+		want, err := strconv.ParseInt(s, 0, 64)
+		if err != nil {
+			if err.(*strconv.NumError).Err == strconv.ErrRange {
+				if x.IsInt64() {
+					t.Errorf("IsInt64(%s) succeeded unexpectedly", s)
+				}
+			} else {
+				t.Errorf("ParseInt(%s) failed", s)
+			}
+			continue
+		}
+
+		if !x.IsInt64() {
+			t.Errorf("IsInt64(%s) failed unexpectedly", s)
+		}
+
+		got := x.Int64()
+		if got != want {
+			t.Errorf("Int64(%s) = %d; want %d", s, got, want)
+		}
+	}
+}
+
+var uint64Tests = []string{
+	// uint64
+	"0",
+	"1",
+	"4294967295",
+	"4294967296",
+	"8589934591",
+	"8589934592",
+	"9223372036854775807",
+	"9223372036854775808",
+	"0x08000000000000000",
+
+	// not uint64
+	"0x10000000000000000",
+	"-0x08000000000000000",
+	"-1",
+}
+
+func TestUint64(t *testing.T) {
+	for _, s := range uint64Tests {
+		var x Int
+		_, ok := x.SetString(s, 0)
+		if !ok {
+			t.Errorf("SetString(%s, 0) failed", s)
+			continue
+		}
+
+		want, err := strconv.ParseUint(s, 0, 64)
+		if err != nil {
+			// check for sign explicitly (ErrRange doesn't cover signed input)
+			if s[0] == '-' || err.(*strconv.NumError).Err == strconv.ErrRange {
+				if x.IsUint64() {
+					t.Errorf("IsUint64(%s) succeeded unexpectedly", s)
+				}
+			} else {
+				t.Errorf("ParseUint(%s) failed", s)
+			}
+			continue
+		}
+
+		if !x.IsUint64() {
+			t.Errorf("IsUint64(%s) failed unexpectedly", s)
+		}
+
+		got := x.Uint64()
+		if got != want {
+			t.Errorf("Uint64(%s) = %d; want %d", s, got, want)
+		}
+	}
+}
+
+var bitwiseTests = []struct {
+	x, y                 string
+	and, or, xor, andNot string
+}{
+	{"0x00", "0x00", "0x00", "0x00", "0x00", "0x00"},
+	{"0x00", "0x01", "0x00", "0x01", "0x01", "0x00"},
+	{"0x01", "0x00", "0x00", "0x01", "0x01", "0x01"},
+	{"-0x01", "0x00", "0x00", "-0x01", "-0x01", "-0x01"},
+	{"-0xaf", "-0x50", "-0xf0", "-0x0f", "0xe1", "0x41"},
+	{"0x00", "-0x01", "0x00", "-0x01", "-0x01", "0x00"},
+	{"0x01", "0x01", "0x01", "0x01", "0x00", "0x00"},
+	{"-0x01", "-0x01", "-0x01", "-0x01", "0x00", "0x00"},
+	{"0x07", "0x08", "0x00", "0x0f", "0x0f", "0x07"},
+	{"0x05", "0x0f", "0x05", "0x0f", "0x0a", "0x00"},
+	{"0xff", "-0x0a", "0xf6", "-0x01", "-0xf7", "0x09"},
+	{"0x013ff6", "0x9a4e", "0x1a46", "0x01bffe", "0x01a5b8", "0x0125b0"},
+	{"-0x013ff6", "0x9a4e", "0x800a", "-0x0125b2", "-0x01a5bc", "-0x01c000"},
+	{"-0x013ff6", "-0x9a4e", "-0x01bffe", "-0x1a46", "0x01a5b8", "0x8008"},
+	{
+		"0x1000009dc6e3d9822cba04129bcbe3401",
+		"0xb9bd7d543685789d57cb918e833af352559021483cdb05cc21fd",
+		"0x1000001186210100001000009048c2001",
+		"0xb9bd7d543685789d57cb918e8bfeff7fddb2ebe87dfbbdfe35fd",
+		"0xb9bd7d543685789d57ca918e8ae69d6fcdb2eae87df2b97215fc",
+		"0x8c40c2d8822caa04120b8321400",
+	},
+	{
+		"0x1000009dc6e3d9822cba04129bcbe3401",
+		"-0xb9bd7d543685789d57cb918e833af352559021483cdb05cc21fd",
+		"0x8c40c2d8822caa04120b8321401",
+		"-0xb9bd7d543685789d57ca918e82229142459020483cd2014001fd",
+		"-0xb9bd7d543685789d57ca918e8ae69d6fcdb2eae87df2b97215fe",
+		"0x1000001186210100001000009048c2000",
+	},
+	{
+		"-0x1000009dc6e3d9822cba04129bcbe3401",
+		"-0xb9bd7d543685789d57cb918e833af352559021483cdb05cc21fd",
+		"-0xb9bd7d543685789d57cb918e8bfeff7fddb2ebe87dfbbdfe35fd",
+		"-0x1000001186210100001000009048c2001",
+		"0xb9bd7d543685789d57ca918e8ae69d6fcdb2eae87df2b97215fc",
+		"0xb9bd7d543685789d57ca918e82229142459020483cd2014001fc",
+	},
+}
+
+type bitFun func(z, x, y *Int) *Int
+
+func testBitFun(t *testing.T, msg string, f bitFun, x, y *Int, exp string) {
+	expected := new(Int)
+	expected.SetString(exp, 0)
+
+	out := f(new(Int), x, y)
+	if out.Cmp(expected) != 0 {
+		t.Errorf("%s: got %s want %s", msg, out, expected)
+	}
+}
+
+func testBitFunSelf(t *testing.T, msg string, f bitFun, x, y *Int, exp string) {
+	self := new(Int)
+	self.Set(x)
+	expected := new(Int)
+	expected.SetString(exp, 0)
+
+	self = f(self, self, y)
+	if self.Cmp(expected) != 0 {
+		t.Errorf("%s: got %s want %s", msg, self, expected)
+	}
+}
+
+func altBit(x *Int, i int) uint {
+	z := new(Int).Rsh(x, uint(i))
+	z = z.And(z, NewInt(1))
+	if z.Cmp(new(Int)) != 0 {
+		return 1
+	}
+	return 0
+}
+
+func altSetBit(z *Int, x *Int, i int, b uint) *Int {
+	one := NewInt(1)
+	m := one.Lsh(one, uint(i))
+	switch b {
+	case 1:
+		return z.Or(x, m)
+	case 0:
+		return z.AndNot(x, m)
+	}
+	panic("set bit is not 0 or 1")
+}
+
+func testBitset(t *testing.T, x *Int) {
+	n := x.BitLen()
+	z := new(Int).Set(x)
+	z1 := new(Int).Set(x)
+	for i := 0; i < n+10; i++ {
+		old := z.Bit(i)
+		old1 := altBit(z1, i)
+		if old != old1 {
+			t.Errorf("bitset: inconsistent value for Bit(%s, %d), got %v want %v", z1, i, old, old1)
+		}
+		z := new(Int).SetBit(z, i, 1)
+		z1 := altSetBit(new(Int), z1, i, 1)
+		if z.Bit(i) == 0 {
+			t.Errorf("bitset: bit %d of %s got 0 want 1", i, x)
+		}
+		if z.Cmp(z1) != 0 {
+			t.Errorf("bitset: inconsistent value after SetBit 1, got %s want %s", z, z1)
+		}
+		z.SetBit(z, i, 0)
+		altSetBit(z1, z1, i, 0)
+		if z.Bit(i) != 0 {
+			t.Errorf("bitset: bit %d of %s got 1 want 0", i, x)
+		}
+		if z.Cmp(z1) != 0 {
+			t.Errorf("bitset: inconsistent value after SetBit 0, got %s want %s", z, z1)
+		}
+		altSetBit(z1, z1, i, old)
+		z.SetBit(z, i, old)
+		if z.Cmp(z1) != 0 {
+			t.Errorf("bitset: inconsistent value after SetBit old, got %s want %s", z, z1)
+		}
+	}
+	if z.Cmp(x) != 0 {
+		t.Errorf("bitset: got %s want %s", z, x)
+	}
+}
+
+var bitsetTests = []struct {
+	x string
+	i int
+	b uint
+}{
+	{"0", 0, 0},
+	{"0", 200, 0},
+	{"1", 0, 1},
+	{"1", 1, 0},
+	{"-1", 0, 1},
+	{"-1", 200, 1},
+	{"0x2000000000000000000000000000", 108, 0},
+	{"0x2000000000000000000000000000", 109, 1},
+	{"0x2000000000000000000000000000", 110, 0},
+	{"-0x2000000000000000000000000001", 108, 1},
+	{"-0x2000000000000000000000000001", 109, 0},
+	{"-0x2000000000000000000000000001", 110, 1},
+}
+
+func TestBitSet(t *testing.T) {
+	for _, test := range bitwiseTests {
+		x := new(Int)
+		x.SetString(test.x, 0)
+		testBitset(t, x)
+		x = new(Int)
+		x.SetString(test.y, 0)
+		testBitset(t, x)
+	}
+	for i, test := range bitsetTests {
+		x := new(Int)
+		x.SetString(test.x, 0)
+		b := x.Bit(test.i)
+		if b != test.b {
+			t.Errorf("#%d got %v want %v", i, b, test.b)
+		}
+	}
+	z := NewInt(1)
+	z.SetBit(NewInt(0), 2, 1)
+	if z.Cmp(NewInt(4)) != 0 {
+		t.Errorf("destination leaked into result; got %s want 4", z)
+	}
+}
+
+var tzbTests = []struct {
+	in  string
+	out uint
+}{
+	{"0", 0},
+	{"1", 0},
+	{"-1", 0},
+	{"4", 2},
+	{"-8", 3},
+	{"0x4000000000000000000", 74},
+	{"-0x8000000000000000000", 75},
+}
+
+func TestTrailingZeroBits(t *testing.T) {
+	for i, test := range tzbTests {
+		in, _ := new(Int).SetString(test.in, 0)
+		want := test.out
+		got := in.TrailingZeroBits()
+
+		if got != want {
+			t.Errorf("#%d: got %v want %v", i, got, want)
+		}
+	}
+}
+
+func BenchmarkBitset(b *testing.B) {
+	z := new(Int)
+	z.SetBit(z, 512, 1)
+	b.ResetTimer()
+	for i := b.N - 1; i >= 0; i-- {
+		z.SetBit(z, i&512, 1)
+	}
+}
+
+func BenchmarkBitsetNeg(b *testing.B) {
+	z := NewInt(-1)
+	z.SetBit(z, 512, 0)
+	b.ResetTimer()
+	for i := b.N - 1; i >= 0; i-- {
+		z.SetBit(z, i&512, 0)
+	}
+}
+
+func BenchmarkBitsetOrig(b *testing.B) {
+	z := new(Int)
+	altSetBit(z, z, 512, 1)
+	b.ResetTimer()
+	for i := b.N - 1; i >= 0; i-- {
+		altSetBit(z, z, i&512, 1)
+	}
+}
+
+func BenchmarkBitsetNegOrig(b *testing.B) {
+	z := NewInt(-1)
+	altSetBit(z, z, 512, 0)
+	b.ResetTimer()
+	for i := b.N - 1; i >= 0; i-- {
+		altSetBit(z, z, i&512, 0)
+	}
+}
+
+// tri generates the trinomial 2**(n*2) - 2**n - 1, which is always 3 mod 4 and
+// 7 mod 8, so that 2 is always a quadratic residue.
+func tri(n uint) *Int {
+	x := NewInt(1)
+	x.Lsh(x, n)
+	x2 := new(Int).Lsh(x, n)
+	x2.Sub(x2, x)
+	x2.Sub(x2, intOne)
+	return x2
+}
+
+func BenchmarkModSqrt225_Tonelli(b *testing.B) {
+	p := tri(225)
+	x := NewInt(2)
+	for i := 0; i < b.N; i++ {
+		x.SetUint64(2)
+		x.modSqrtTonelliShanks(x, p)
+	}
+}
+
+func BenchmarkModSqrt225_3Mod4(b *testing.B) {
+	p := tri(225)
+	x := new(Int).SetUint64(2)
+	for i := 0; i < b.N; i++ {
+		x.SetUint64(2)
+		x.modSqrt3Mod4Prime(x, p)
+	}
+}
+
+func BenchmarkModSqrt231_Tonelli(b *testing.B) {
+	p := tri(231)
+	p.Sub(p, intOne)
+	p.Sub(p, intOne) // tri(231) - 2 is a prime == 5 mod 8
+	x := new(Int).SetUint64(7)
+	for i := 0; i < b.N; i++ {
+		x.SetUint64(7)
+		x.modSqrtTonelliShanks(x, p)
+	}
+}
+
+func BenchmarkModSqrt231_5Mod8(b *testing.B) {
+	p := tri(231)
+	p.Sub(p, intOne)
+	p.Sub(p, intOne) // tri(231) - 2 is a prime == 5 mod 8
+	x := new(Int).SetUint64(7)
+	for i := 0; i < b.N; i++ {
+		x.SetUint64(7)
+		x.modSqrt5Mod8Prime(x, p)
+	}
+}
+
+func TestBitwise(t *testing.T) {
+	x := new(Int)
+	y := new(Int)
+	for _, test := range bitwiseTests {
+		x.SetString(test.x, 0)
+		y.SetString(test.y, 0)
+
+		testBitFun(t, "and", (*Int).And, x, y, test.and)
+		testBitFunSelf(t, "and", (*Int).And, x, y, test.and)
+		testBitFun(t, "andNot", (*Int).AndNot, x, y, test.andNot)
+		testBitFunSelf(t, "andNot", (*Int).AndNot, x, y, test.andNot)
+		testBitFun(t, "or", (*Int).Or, x, y, test.or)
+		testBitFunSelf(t, "or", (*Int).Or, x, y, test.or)
+		testBitFun(t, "xor", (*Int).Xor, x, y, test.xor)
+		testBitFunSelf(t, "xor", (*Int).Xor, x, y, test.xor)
+	}
+}
+
+var notTests = []struct {
+	in  string
+	out string
+}{
+	{"0", "-1"},
+	{"1", "-2"},
+	{"7", "-8"},
+	{"0", "-1"},
+	{"-81910", "81909"},
+	{
+		"298472983472983471903246121093472394872319615612417471234712061",
+		"-298472983472983471903246121093472394872319615612417471234712062",
+	},
+}
+
+func TestNot(t *testing.T) {
+	in := new(Int)
+	out := new(Int)
+	expected := new(Int)
+	for i, test := range notTests {
+		in.SetString(test.in, 10)
+		expected.SetString(test.out, 10)
+		out = out.Not(in)
+		if out.Cmp(expected) != 0 {
+			t.Errorf("#%d: got %s want %s", i, out, expected)
+		}
+		out = out.Not(out)
+		if out.Cmp(in) != 0 {
+			t.Errorf("#%d: got %s want %s", i, out, in)
+		}
+	}
+}
+
+var modInverseTests = []struct {
+	element string
+	modulus string
+}{
+	{"1234567", "458948883992"},
+	{"239487239847", "2410312426921032588552076022197566074856950548502459942654116941958108831682612228890093858261341614673227141477904012196503648957050582631942730706805009223062734745341073406696246014589361659774041027169249453200378729434170325843778659198143763193776859869524088940195577346119843545301547043747207749969763750084308926339295559968882457872412993810129130294592999947926365264059284647209730384947211681434464714438488520940127459844288859336526896320919633919"},
+	{"-10", "13"}, // issue #16984
+	{"10", "-13"},
+	{"-17", "-13"},
+}
+
+func TestModInverse(t *testing.T) {
+	var element, modulus, gcd, inverse Int
+	one := NewInt(1)
+	for _, test := range modInverseTests {
+		(&element).SetString(test.element, 10)
+		(&modulus).SetString(test.modulus, 10)
+		(&inverse).ModInverse(&element, &modulus)
+		(&inverse).Mul(&inverse, &element)
+		(&inverse).Mod(&inverse, &modulus)
+		if (&inverse).Cmp(one) != 0 {
+			t.Errorf("ModInverse(%d,%d)*%d%%%d=%d, not 1", &element, &modulus, &element, &modulus, &inverse)
+		}
+	}
+	// exhaustive test for small values
+	for n := 2; n < 100; n++ {
+		(&modulus).SetInt64(int64(n))
+		for x := 1; x < n; x++ {
+			(&element).SetInt64(int64(x))
+			(&gcd).GCD(nil, nil, &element, &modulus)
+			if (&gcd).Cmp(one) != 0 {
+				continue
+			}
+			(&inverse).ModInverse(&element, &modulus)
+			(&inverse).Mul(&inverse, &element)
+			(&inverse).Mod(&inverse, &modulus)
+			if (&inverse).Cmp(one) != 0 {
+				t.Errorf("ModInverse(%d,%d)*%d%%%d=%d, not 1", &element, &modulus, &element, &modulus, &inverse)
+			}
+		}
+	}
+}
+
+func BenchmarkModInverse(b *testing.B) {
+	p := new(Int).SetInt64(1) // Mersenne prime 2**1279 -1
+	p.abs = p.abs.shl(p.abs, 1279)
+	p.Sub(p, intOne)
+	x := new(Int).Sub(p, intOne)
+	z := new(Int)
+	for i := 0; i < b.N; i++ {
+		z.ModInverse(x, p)
+	}
+}
+
+// testModSqrt is a helper for TestModSqrt,
+// which checks that ModSqrt can compute a square-root of elt^2.
+func testModSqrt(t *testing.T, elt, mod, sq, sqrt *Int) bool {
+	var sqChk, sqrtChk, sqrtsq Int
+	sq.Mul(elt, elt)
+	sq.Mod(sq, mod)
+	z := sqrt.ModSqrt(sq, mod)
+	if z != sqrt {
+		t.Errorf("ModSqrt returned wrong value %s", z)
+	}
+
+	// test ModSqrt arguments outside the range [0,mod)
+	sqChk.Add(sq, mod)
+	z = sqrtChk.ModSqrt(&sqChk, mod)
+	if z != &sqrtChk || z.Cmp(sqrt) != 0 {
+		t.Errorf("ModSqrt returned inconsistent value %s", z)
+	}
+	sqChk.Sub(sq, mod)
+	z = sqrtChk.ModSqrt(&sqChk, mod)
+	if z != &sqrtChk || z.Cmp(sqrt) != 0 {
+		t.Errorf("ModSqrt returned inconsistent value %s", z)
+	}
+
+	// test x aliasing z
+	z = sqrtChk.ModSqrt(sqrtChk.Set(sq), mod)
+	if z != &sqrtChk || z.Cmp(sqrt) != 0 {
+		t.Errorf("ModSqrt returned inconsistent value %s", z)
+	}
+
+	// make sure we actually got a square root
+	if sqrt.Cmp(elt) == 0 {
+		return true // we found the "desired" square root
+	}
+	sqrtsq.Mul(sqrt, sqrt) // make sure we found the "other" one
+	sqrtsq.Mod(&sqrtsq, mod)
+	return sq.Cmp(&sqrtsq) == 0
+}
+
+func TestModSqrt(t *testing.T) {
+	var elt, mod, modx4, sq, sqrt Int
+	r := rand.New(rand.NewSource(9))
+	for i, s := range primes[1:] { // skip 2, use only odd primes
+		mod.SetString(s, 10)
+		modx4.Lsh(&mod, 2)
+
+		// test a few random elements per prime
+		for x := 1; x < 5; x++ {
+			elt.Rand(r, &modx4)
+			elt.Sub(&elt, &mod) // test range [-mod, 3*mod)
+			if !testModSqrt(t, &elt, &mod, &sq, &sqrt) {
+				t.Errorf("#%d: failed (sqrt(e) = %s)", i, &sqrt)
+			}
+		}
+
+		if testing.Short() && i > 2 {
+			break
+		}
+	}
+
+	if testing.Short() {
+		return
+	}
+
+	// exhaustive test for small values
+	for n := 3; n < 100; n++ {
+		mod.SetInt64(int64(n))
+		if !mod.ProbablyPrime(10) {
+			continue
+		}
+		isSquare := make([]bool, n)
+
+		// test all the squares
+		for x := 1; x < n; x++ {
+			elt.SetInt64(int64(x))
+			if !testModSqrt(t, &elt, &mod, &sq, &sqrt) {
+				t.Errorf("#%d: failed (sqrt(%d,%d) = %s)", x, &elt, &mod, &sqrt)
+			}
+			isSquare[sq.Uint64()] = true
+		}
+
+		// test all non-squares
+		for x := 1; x < n; x++ {
+			sq.SetInt64(int64(x))
+			z := sqrt.ModSqrt(&sq, &mod)
+			if !isSquare[x] && z != nil {
+				t.Errorf("#%d: failed (sqrt(%d,%d) = nil)", x, &sqrt, &mod)
+			}
+		}
+	}
+}
+
+func TestJacobi(t *testing.T) {
+	testCases := []struct {
+		x, y   int64
+		result int
+	}{
+		{0, 1, 1},
+		{0, -1, 1},
+		{1, 1, 1},
+		{1, -1, 1},
+		{0, 5, 0},
+		{1, 5, 1},
+		{2, 5, -1},
+		{-2, 5, -1},
+		{2, -5, -1},
+		{-2, -5, 1},
+		{3, 5, -1},
+		{5, 5, 0},
+		{-5, 5, 0},
+		{6, 5, 1},
+		{6, -5, 1},
+		{-6, 5, 1},
+		{-6, -5, -1},
+	}
+
+	var x, y Int
+
+	for i, test := range testCases {
+		x.SetInt64(test.x)
+		y.SetInt64(test.y)
+		expected := test.result
+		actual := Jacobi(&x, &y)
+		if actual != expected {
+			t.Errorf("#%d: Jacobi(%d, %d) = %d, but expected %d", i, test.x, test.y, actual, expected)
+		}
+	}
+}
+
+func TestJacobiPanic(t *testing.T) {
+	const failureMsg = "test failure"
+	defer func() {
+		msg := recover()
+		if msg == nil || msg == failureMsg {
+			panic(msg)
+		}
+		t.Log(msg)
+	}()
+	x := NewInt(1)
+	y := NewInt(2)
+	// Jacobi should panic when the second argument is even.
+	Jacobi(x, y)
+	panic(failureMsg)
+}
+
+func TestIssue2607(t *testing.T) {
+	// This code sequence used to hang.
+	n := NewInt(10)
+	n.Rand(rand.New(rand.NewSource(9)), n)
+}
+
+func TestSqrt(t *testing.T) {
+	root := 0
+	r := new(Int)
+	for i := 0; i < 10000; i++ {
+		if (root+1)*(root+1) <= i {
+			root++
+		}
+		n := NewInt(int64(i))
+		r.SetInt64(-2)
+		r.Sqrt(n)
+		if r.Cmp(NewInt(int64(root))) != 0 {
+			t.Errorf("Sqrt(%v) = %v, want %v", n, r, root)
+		}
+	}
+
+	for i := 0; i < 1000; i += 10 {
+		n, _ := new(Int).SetString("1"+strings.Repeat("0", i), 10)
+		r := new(Int).Sqrt(n)
+		root, _ := new(Int).SetString("1"+strings.Repeat("0", i/2), 10)
+		if r.Cmp(root) != 0 {
+			t.Errorf("Sqrt(1e%d) = %v, want 1e%d", i, r, i/2)
+		}
+	}
+
+	// Test aliasing.
+	r.SetInt64(100)
+	r.Sqrt(r)
+	if r.Int64() != 10 {
+		t.Errorf("Sqrt(100) = %v, want 10 (aliased output)", r.Int64())
+	}
+}
+
+// We can't test this together with the other Exp tests above because
+// it requires a different receiver setup.
+func TestIssue22830(t *testing.T) {
+	one := new(Int).SetInt64(1)
+	base, _ := new(Int).SetString("84555555300000000000", 10)
+	mod, _ := new(Int).SetString("66666670001111111111", 10)
+	want, _ := new(Int).SetString("17888885298888888889", 10)
+
+	var tests = []int64{
+		0, 1, -1,
+	}
+
+	for _, n := range tests {
+		m := NewInt(n)
+		if got := m.Exp(base, one, mod); got.Cmp(want) != 0 {
+			t.Errorf("(%v).Exp(%s, 1, %s) = %s, want %s", n, base, mod, got, want)
+		}
+	}
+}
+
+func BenchmarkSqrt(b *testing.B) {
+	n, _ := new(Int).SetString("1"+strings.Repeat("0", 1001), 10)
+	b.ResetTimer()
+	t := new(Int)
+	for i := 0; i < b.N; i++ {
+		t.Sqrt(n)
+	}
+}
+
+func benchmarkIntSqr(b *testing.B, nwords int) {
+	x := new(Int)
+	x.abs = rndNat(nwords)
+	t := new(Int)
+	b.ResetTimer()
+	for i := 0; i < b.N; i++ {
+		t.Mul(x, x)
+	}
+}
+
+func BenchmarkIntSqr(b *testing.B) {
+	for _, n := range sqrBenchSizes {
+		if isRaceBuilder && n > 1e3 {
+			continue
+		}
+		b.Run(fmt.Sprintf("%d", n), func(b *testing.B) {
+			benchmarkIntSqr(b, n)
+		})
+	}
+}
+
+func benchmarkDiv(b *testing.B, aSize, bSize int) {
+	var r = rand.New(rand.NewSource(1234))
+	aa := randInt(r, uint(aSize))
+	bb := randInt(r, uint(bSize))
+	if aa.Cmp(bb) < 0 {
+		aa, bb = bb, aa
+	}
+	x := new(Int)
+	y := new(Int)
+
+	b.ResetTimer()
+	for i := 0; i < b.N; i++ {
+		x.DivMod(aa, bb, y)
+	}
+}
+
+func BenchmarkDiv(b *testing.B) {
+	sizes := []int{
+		10, 20, 50, 100, 200, 500, 1000,
+		1e4, 1e5, 1e6, 1e7,
+	}
+	for _, i := range sizes {
+		j := 2 * i
+		b.Run(fmt.Sprintf("%d/%d", j, i), func(b *testing.B) {
+			benchmarkDiv(b, j, i)
+		})
+	}
+}
+
+func TestFillBytes(t *testing.T) {
+	checkResult := func(t *testing.T, buf []byte, want *Int) {
+		t.Helper()
+		got := new(Int).SetBytes(buf)
+		if got.CmpAbs(want) != 0 {
+			t.Errorf("got 0x%x, want 0x%x: %x", got, want, buf)
+		}
+	}
+	panics := func(f func()) (panic bool) {
+		defer func() { panic = recover() != nil }()
+		f()
+		return
+	}
+
+	for _, n := range []string{
+		"0",
+		"1000",
+		"0xffffffff",
+		"-0xffffffff",
+		"0xffffffffffffffff",
+		"0x10000000000000000",
+		"0xabababababababababababababababababababababababababa",
+		"0xffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffff",
+	} {
+		t.Run(n, func(t *testing.T) {
+			t.Logf(n)
+			x, ok := new(Int).SetString(n, 0)
+			if !ok {
+				panic("invalid test entry")
+			}
+
+			// Perfectly sized buffer.
+			byteLen := (x.BitLen() + 7) / 8
+			buf := make([]byte, byteLen)
+			checkResult(t, x.FillBytes(buf), x)
+
+			// Way larger, checking all bytes get zeroed.
+			buf = make([]byte, 100)
+			for i := range buf {
+				buf[i] = 0xff
+			}
+			checkResult(t, x.FillBytes(buf), x)
+
+			// Too small.
+			if byteLen > 0 {
+				buf = make([]byte, byteLen-1)
+				if !panics(func() { x.FillBytes(buf) }) {
+					t.Errorf("expected panic for small buffer and value %x", x)
+				}
+			}
+		})
+	}
+}
+
+func TestNewIntMinInt64(t *testing.T) {
+	// Test for uint64 cast in NewInt.
+	want := int64(math.MinInt64)
+	if got := NewInt(want).Int64(); got != want {
+		t.Fatalf("wanted %d, got %d", want, got)
+	}
+}
+
+func TestNewIntAllocs(t *testing.T) {
+	testenv.SkipIfOptimizationOff(t)
+	for _, n := range []int64{0, 7, -7, 1 << 30, -1 << 30, 1 << 50, -1 << 50} {
+		x := NewInt(3)
+		got := testing.AllocsPerRun(100, func() {
+			// NewInt should inline, and all its allocations
+			// can happen on the stack. Passing the result of NewInt
+			// to Add should not cause any of those allocations to escape.
+			x.Add(x, NewInt(n))
+		})
+		if got != 0 {
+			t.Errorf("x.Add(x, NewInt(%d)), wanted 0 allocations, got %f", n, got)
+		}
+	}
+}
+
+func TestFloat64(t *testing.T) {
+	for _, test := range []struct {
+		istr string
+		f    float64
+		acc  Accuracy
+	}{
+		{"-1000000000000000000000000000000000000000000000000000000", -1000000000000000078291540404596243842305360299886116864.000000, Below},
+		{"-9223372036854775809", math.MinInt64, Above},
+		{"-9223372036854775808", -9223372036854775808, Exact}, // -2^63
+		{"-9223372036854775807", -9223372036854775807, Below},
+		{"-18014398509481985", -18014398509481984.000000, Above},
+		{"-18014398509481984", -18014398509481984.000000, Exact}, // -2^54
+		{"-18014398509481983", -18014398509481984.000000, Below},
+		{"-9007199254740993", -9007199254740992.000000, Above},
+		{"-9007199254740992", -9007199254740992.000000, Exact}, // -2^53
+		{"-9007199254740991", -9007199254740991.000000, Exact},
+		{"-4503599627370497", -4503599627370497.000000, Exact},
+		{"-4503599627370496", -4503599627370496.000000, Exact}, // -2^52
+		{"-4503599627370495", -4503599627370495.000000, Exact},
+		{"-12345", -12345, Exact},
+		{"-1", -1, Exact},
+		{"0", 0, Exact},
+		{"1", 1, Exact},
+		{"12345", 12345, Exact},
+		{"0x1010000000000000", 0x1010000000000000, Exact}, // >2^53 but exact nonetheless
+		{"9223372036854775807", 9223372036854775808, Above},
+		{"9223372036854775808", 9223372036854775808, Exact}, // +2^63
+		{"1000000000000000000000000000000000000000000000000000000", 1000000000000000078291540404596243842305360299886116864.000000, Above},
+	} {
+		i, ok := new(Int).SetString(test.istr, 0)
+		if !ok {
+			t.Errorf("SetString(%s) failed", test.istr)
+			continue
+		}
+
+		// Test against expectation.
+		f, acc := i.Float64()
+		if f != test.f || acc != test.acc {
+			t.Errorf("%s: got %f (%s); want %f (%s)", test.istr, f, acc, test.f, test.acc)
+		}
+
+		// Cross-check the fast path against the big.Float implementation.
+		f2, acc2 := new(Float).SetInt(i).Float64()
+		if f != f2 || acc != acc2 {
+			t.Errorf("%s: got %f (%s); Float.Float64 gives %f (%s)", test.istr, f, acc, f2, acc2)
+		}
+	}
+}
diff --git a/src/math/big/intconv.go b/src/math/big/intconv.go
new file mode 100644
index 0000000..04e8c24
--- /dev/null
+++ b/src/math/big/intconv.go
@@ -0,0 +1,255 @@
+// Copyright 2015 The Go Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+// This file implements int-to-string conversion functions.
+
+package big
+
+import (
+	"errors"
+	"fmt"
+	"io"
+)
+
+// Text returns the string representation of x in the given base.
+// Base must be between 2 and 62, inclusive. The result uses the
+// lower-case letters 'a' to 'z' for digit values 10 to 35, and
+// the upper-case letters 'A' to 'Z' for digit values 36 to 61.
+// No prefix (such as "0x") is added to the string. If x is a nil
+// pointer it returns "<nil>".
+func (x *Int) Text(base int) string {
+	if x == nil {
+		return "<nil>"
+	}
+	return string(x.abs.itoa(x.neg, base))
+}
+
+// Append appends the string representation of x, as generated by
+// x.Text(base), to buf and returns the extended buffer.
+func (x *Int) Append(buf []byte, base int) []byte {
+	if x == nil {
+		return append(buf, "<nil>"...)
+	}
+	return append(buf, x.abs.itoa(x.neg, base)...)
+}
+
+// String returns the decimal representation of x as generated by
+// x.Text(10).
+func (x *Int) String() string {
+	return x.Text(10)
+}
+
+// write count copies of text to s.
+func writeMultiple(s fmt.State, text string, count int) {
+	if len(text) > 0 {
+		b := []byte(text)
+		for ; count > 0; count-- {
+			s.Write(b)
+		}
+	}
+}
+
+var _ fmt.Formatter = intOne // *Int must implement fmt.Formatter
+
+// Format implements fmt.Formatter. It accepts the formats
+// 'b' (binary), 'o' (octal with 0 prefix), 'O' (octal with 0o prefix),
+// 'd' (decimal), 'x' (lowercase hexadecimal), and
+// 'X' (uppercase hexadecimal).
+// Also supported are the full suite of package fmt's format
+// flags for integral types, including '+' and ' ' for sign
+// control, '#' for leading zero in octal and for hexadecimal,
+// a leading "0x" or "0X" for "%#x" and "%#X" respectively,
+// specification of minimum digits precision, output field
+// width, space or zero padding, and '-' for left or right
+// justification.
+func (x *Int) Format(s fmt.State, ch rune) {
+	// determine base
+	var base int
+	switch ch {
+	case 'b':
+		base = 2
+	case 'o', 'O':
+		base = 8
+	case 'd', 's', 'v':
+		base = 10
+	case 'x', 'X':
+		base = 16
+	default:
+		// unknown format
+		fmt.Fprintf(s, "%%!%c(big.Int=%s)", ch, x.String())
+		return
+	}
+
+	if x == nil {
+		fmt.Fprint(s, "<nil>")
+		return
+	}
+
+	// determine sign character
+	sign := ""
+	switch {
+	case x.neg:
+		sign = "-"
+	case s.Flag('+'): // supersedes ' ' when both specified
+		sign = "+"
+	case s.Flag(' '):
+		sign = " "
+	}
+
+	// determine prefix characters for indicating output base
+	prefix := ""
+	if s.Flag('#') {
+		switch ch {
+		case 'b': // binary
+			prefix = "0b"
+		case 'o': // octal
+			prefix = "0"
+		case 'x': // hexadecimal
+			prefix = "0x"
+		case 'X':
+			prefix = "0X"
+		}
+	}
+	if ch == 'O' {
+		prefix = "0o"
+	}
+
+	digits := x.abs.utoa(base)
+	if ch == 'X' {
+		// faster than bytes.ToUpper
+		for i, d := range digits {
+			if 'a' <= d && d <= 'z' {
+				digits[i] = 'A' + (d - 'a')
+			}
+		}
+	}
+
+	// number of characters for the three classes of number padding
+	var left int  // space characters to left of digits for right justification ("%8d")
+	var zeros int // zero characters (actually cs[0]) as left-most digits ("%.8d")
+	var right int // space characters to right of digits for left justification ("%-8d")
+
+	// determine number padding from precision: the least number of digits to output
+	precision, precisionSet := s.Precision()
+	if precisionSet {
+		switch {
+		case len(digits) < precision:
+			zeros = precision - len(digits) // count of zero padding
+		case len(digits) == 1 && digits[0] == '0' && precision == 0:
+			return // print nothing if zero value (x == 0) and zero precision ("." or ".0")
+		}
+	}
+
+	// determine field pad from width: the least number of characters to output
+	length := len(sign) + len(prefix) + zeros + len(digits)
+	if width, widthSet := s.Width(); widthSet && length < width { // pad as specified
+		switch d := width - length; {
+		case s.Flag('-'):
+			// pad on the right with spaces; supersedes '0' when both specified
+			right = d
+		case s.Flag('0') && !precisionSet:
+			// pad with zeros unless precision also specified
+			zeros = d
+		default:
+			// pad on the left with spaces
+			left = d
+		}
+	}
+
+	// print number as [left pad][sign][prefix][zero pad][digits][right pad]
+	writeMultiple(s, " ", left)
+	writeMultiple(s, sign, 1)
+	writeMultiple(s, prefix, 1)
+	writeMultiple(s, "0", zeros)
+	s.Write(digits)
+	writeMultiple(s, " ", right)
+}
+
+// scan sets z to the integer value corresponding to the longest possible prefix
+// read from r representing a signed integer number in a given conversion base.
+// It returns z, the actual conversion base used, and an error, if any. In the
+// error case, the value of z is undefined but the returned value is nil. The
+// syntax follows the syntax of integer literals in Go.
+//
+// The base argument must be 0 or a value from 2 through MaxBase. If the base
+// is 0, the string prefix determines the actual conversion base. A prefix of
+// “0b” or “0B” selects base 2; a “0”, “0o”, or “0O” prefix selects
+// base 8, and a “0x” or “0X” prefix selects base 16. Otherwise the selected
+// base is 10.
+func (z *Int) scan(r io.ByteScanner, base int) (*Int, int, error) {
+	// determine sign
+	neg, err := scanSign(r)
+	if err != nil {
+		return nil, 0, err
+	}
+
+	// determine mantissa
+	z.abs, base, _, err = z.abs.scan(r, base, false)
+	if err != nil {
+		return nil, base, err
+	}
+	z.neg = len(z.abs) > 0 && neg // 0 has no sign
+
+	return z, base, nil
+}
+
+func scanSign(r io.ByteScanner) (neg bool, err error) {
+	var ch byte
+	if ch, err = r.ReadByte(); err != nil {
+		return false, err
+	}
+	switch ch {
+	case '-':
+		neg = true
+	case '+':
+		// nothing to do
+	default:
+		r.UnreadByte()
+	}
+	return
+}
+
+// byteReader is a local wrapper around fmt.ScanState;
+// it implements the ByteReader interface.
+type byteReader struct {
+	fmt.ScanState
+}
+
+func (r byteReader) ReadByte() (byte, error) {
+	ch, size, err := r.ReadRune()
+	if size != 1 && err == nil {
+		err = fmt.Errorf("invalid rune %#U", ch)
+	}
+	return byte(ch), err
+}
+
+func (r byteReader) UnreadByte() error {
+	return r.UnreadRune()
+}
+
+var _ fmt.Scanner = intOne // *Int must implement fmt.Scanner
+
+// Scan is a support routine for fmt.Scanner; it sets z to the value of
+// the scanned number. It accepts the formats 'b' (binary), 'o' (octal),
+// 'd' (decimal), 'x' (lowercase hexadecimal), and 'X' (uppercase hexadecimal).
+func (z *Int) Scan(s fmt.ScanState, ch rune) error {
+	s.SkipSpace() // skip leading space characters
+	base := 0
+	switch ch {
+	case 'b':
+		base = 2
+	case 'o':
+		base = 8
+	case 'd':
+		base = 10
+	case 'x', 'X':
+		base = 16
+	case 's', 'v':
+		// let scan determine the base
+	default:
+		return errors.New("Int.Scan: invalid verb")
+	}
+	_, _, err := z.scan(byteReader{s}, base)
+	return err
+}
diff --git a/src/math/big/intconv_test.go b/src/math/big/intconv_test.go
new file mode 100644
index 0000000..5ba2926
--- /dev/null
+++ b/src/math/big/intconv_test.go
@@ -0,0 +1,431 @@
+// Copyright 2015 The Go Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+package big
+
+import (
+	"bytes"
+	"fmt"
+	"testing"
+)
+
+var stringTests = []struct {
+	in   string
+	out  string
+	base int
+	val  int64
+	ok   bool
+}{
+	// invalid inputs
+	{in: ""},
+	{in: "a"},
+	{in: "z"},
+	{in: "+"},
+	{in: "-"},
+	{in: "0b"},
+	{in: "0o"},
+	{in: "0x"},
+	{in: "0y"},
+	{in: "2", base: 2},
+	{in: "0b2", base: 0},
+	{in: "08"},
+	{in: "8", base: 8},
+	{in: "0xg", base: 0},
+	{in: "g", base: 16},
+
+	// invalid inputs with separators
+	// (smoke tests only - a comprehensive set of tests is in natconv_test.go)
+	{in: "_"},
+	{in: "0_"},
+	{in: "_0"},
+	{in: "-1__0"},
+	{in: "0x10_"},
+	{in: "1_000", base: 10}, // separators are not permitted for bases != 0
+	{in: "d_e_a_d", base: 16},
+
+	// valid inputs
+	{"0", "0", 0, 0, true},
+	{"0", "0", 10, 0, true},
+	{"0", "0", 16, 0, true},
+	{"+0", "0", 0, 0, true},
+	{"-0", "0", 0, 0, true},
+	{"10", "10", 0, 10, true},
+	{"10", "10", 10, 10, true},
+	{"10", "10", 16, 16, true},
+	{"-10", "-10", 16, -16, true},
+	{"+10", "10", 16, 16, true},
+	{"0b10", "2", 0, 2, true},
+	{"0o10", "8", 0, 8, true},
+	{"0x10", "16", 0, 16, true},
+	{in: "0x10", base: 16},
+	{"-0x10", "-16", 0, -16, true},
+	{"+0x10", "16", 0, 16, true},
+	{"00", "0", 0, 0, true},
+	{"0", "0", 8, 0, true},
+	{"07", "7", 0, 7, true},
+	{"7", "7", 8, 7, true},
+	{"023", "19", 0, 19, true},
+	{"23", "23", 8, 19, true},
+	{"cafebabe", "cafebabe", 16, 0xcafebabe, true},
+	{"0b0", "0", 0, 0, true},
+	{"-111", "-111", 2, -7, true},
+	{"-0b111", "-7", 0, -7, true},
+	{"0b1001010111", "599", 0, 0x257, true},
+	{"1001010111", "1001010111", 2, 0x257, true},
+	{"A", "a", 36, 10, true},
+	{"A", "A", 37, 36, true},
+	{"ABCXYZ", "abcxyz", 36, 623741435, true},
+	{"ABCXYZ", "ABCXYZ", 62, 33536793425, true},
+
+	// valid input with separators
+	// (smoke tests only - a comprehensive set of tests is in natconv_test.go)
+	{"1_000", "1000", 0, 1000, true},
+	{"0b_1010", "10", 0, 10, true},
+	{"+0o_660", "432", 0, 0660, true},
+	{"-0xF00D_1E", "-15731998", 0, -0xf00d1e, true},
+}
+
+func TestIntText(t *testing.T) {
+	z := new(Int)
+	for _, test := range stringTests {
+		if !test.ok {
+			continue
+		}
+
+		_, ok := z.SetString(test.in, test.base)
+		if !ok {
+			t.Errorf("%v: failed to parse", test)
+			continue
+		}
+
+		base := test.base
+		if base == 0 {
+			base = 10
+		}
+
+		if got := z.Text(base); got != test.out {
+			t.Errorf("%v: got %s; want %s", test, got, test.out)
+		}
+	}
+}
+
+func TestAppendText(t *testing.T) {
+	z := new(Int)
+	var buf []byte
+	for _, test := range stringTests {
+		if !test.ok {
+			continue
+		}
+
+		_, ok := z.SetString(test.in, test.base)
+		if !ok {
+			t.Errorf("%v: failed to parse", test)
+			continue
+		}
+
+		base := test.base
+		if base == 0 {
+			base = 10
+		}
+
+		i := len(buf)
+		buf = z.Append(buf, base)
+		if got := string(buf[i:]); got != test.out {
+			t.Errorf("%v: got %s; want %s", test, got, test.out)
+		}
+	}
+}
+
+func format(base int) string {
+	switch base {
+	case 2:
+		return "%b"
+	case 8:
+		return "%o"
+	case 16:
+		return "%x"
+	}
+	return "%d"
+}
+
+func TestGetString(t *testing.T) {
+	z := new(Int)
+	for i, test := range stringTests {
+		if !test.ok {
+			continue
+		}
+		z.SetInt64(test.val)
+
+		if test.base == 10 {
+			if got := z.String(); got != test.out {
+				t.Errorf("#%da got %s; want %s", i, got, test.out)
+			}
+		}
+
+		f := format(test.base)
+		got := fmt.Sprintf(f, z)
+		if f == "%d" {
+			if got != fmt.Sprintf("%d", test.val) {
+				t.Errorf("#%db got %s; want %d", i, got, test.val)
+			}
+		} else {
+			if got != test.out {
+				t.Errorf("#%dc got %s; want %s", i, got, test.out)
+			}
+		}
+	}
+}
+
+func TestSetString(t *testing.T) {
+	tmp := new(Int)
+	for i, test := range stringTests {
+		// initialize to a non-zero value so that issues with parsing
+		// 0 are detected
+		tmp.SetInt64(1234567890)
+		n1, ok1 := new(Int).SetString(test.in, test.base)
+		n2, ok2 := tmp.SetString(test.in, test.base)
+		expected := NewInt(test.val)
+		if ok1 != test.ok || ok2 != test.ok {
+			t.Errorf("#%d (input '%s') ok incorrect (should be %t)", i, test.in, test.ok)
+			continue
+		}
+		if !ok1 {
+			if n1 != nil {
+				t.Errorf("#%d (input '%s') n1 != nil", i, test.in)
+			}
+			continue
+		}
+		if !ok2 {
+			if n2 != nil {
+				t.Errorf("#%d (input '%s') n2 != nil", i, test.in)
+			}
+			continue
+		}
+
+		if ok1 && !isNormalized(n1) {
+			t.Errorf("#%d (input '%s'): %v is not normalized", i, test.in, *n1)
+		}
+		if ok2 && !isNormalized(n2) {
+			t.Errorf("#%d (input '%s'): %v is not normalized", i, test.in, *n2)
+		}
+
+		if n1.Cmp(expected) != 0 {
+			t.Errorf("#%d (input '%s') got: %s want: %d", i, test.in, n1, test.val)
+		}
+		if n2.Cmp(expected) != 0 {
+			t.Errorf("#%d (input '%s') got: %s want: %d", i, test.in, n2, test.val)
+		}
+	}
+}
+
+var formatTests = []struct {
+	input  string
+	format string
+	output string
+}{
+	{"<nil>", "%x", "<nil>"},
+	{"<nil>", "%#x", "<nil>"},
+	{"<nil>", "%#y", "%!y(big.Int=<nil>)"},
+
+	{"10", "%b", "1010"},
+	{"10", "%o", "12"},
+	{"10", "%d", "10"},
+	{"10", "%v", "10"},
+	{"10", "%x", "a"},
+	{"10", "%X", "A"},
+	{"-10", "%X", "-A"},
+	{"10", "%y", "%!y(big.Int=10)"},
+	{"-10", "%y", "%!y(big.Int=-10)"},
+
+	{"10", "%#b", "0b1010"},
+	{"10", "%#o", "012"},
+	{"10", "%O", "0o12"},
+	{"-10", "%#b", "-0b1010"},
+	{"-10", "%#o", "-012"},
+	{"-10", "%O", "-0o12"},
+	{"10", "%#d", "10"},
+	{"10", "%#v", "10"},
+	{"10", "%#x", "0xa"},
+	{"10", "%#X", "0XA"},
+	{"-10", "%#X", "-0XA"},
+	{"10", "%#y", "%!y(big.Int=10)"},
+	{"-10", "%#y", "%!y(big.Int=-10)"},
+
+	{"1234", "%d", "1234"},
+	{"1234", "%3d", "1234"},
+	{"1234", "%4d", "1234"},
+	{"-1234", "%d", "-1234"},
+	{"1234", "% 5d", " 1234"},
+	{"1234", "%+5d", "+1234"},
+	{"1234", "%-5d", "1234 "},
+	{"1234", "%x", "4d2"},
+	{"1234", "%X", "4D2"},
+	{"-1234", "%3x", "-4d2"},
+	{"-1234", "%4x", "-4d2"},
+	{"-1234", "%5x", " -4d2"},
+	{"-1234", "%-5x", "-4d2 "},
+	{"1234", "%03d", "1234"},
+	{"1234", "%04d", "1234"},
+	{"1234", "%05d", "01234"},
+	{"1234", "%06d", "001234"},
+	{"-1234", "%06d", "-01234"},
+	{"1234", "%+06d", "+01234"},
+	{"1234", "% 06d", " 01234"},
+	{"1234", "%-6d", "1234  "},
+	{"1234", "%-06d", "1234  "},
+	{"-1234", "%-06d", "-1234 "},
+
+	{"1234", "%.3d", "1234"},
+	{"1234", "%.4d", "1234"},
+	{"1234", "%.5d", "01234"},
+	{"1234", "%.6d", "001234"},
+	{"-1234", "%.3d", "-1234"},
+	{"-1234", "%.4d", "-1234"},
+	{"-1234", "%.5d", "-01234"},
+	{"-1234", "%.6d", "-001234"},
+
+	{"1234", "%8.3d", "    1234"},
+	{"1234", "%8.4d", "    1234"},
+	{"1234", "%8.5d", "   01234"},
+	{"1234", "%8.6d", "  001234"},
+	{"-1234", "%8.3d", "   -1234"},
+	{"-1234", "%8.4d", "   -1234"},
+	{"-1234", "%8.5d", "  -01234"},
+	{"-1234", "%8.6d", " -001234"},
+
+	{"1234", "%+8.3d", "   +1234"},
+	{"1234", "%+8.4d", "   +1234"},
+	{"1234", "%+8.5d", "  +01234"},
+	{"1234", "%+8.6d", " +001234"},
+	{"-1234", "%+8.3d", "   -1234"},
+	{"-1234", "%+8.4d", "   -1234"},
+	{"-1234", "%+8.5d", "  -01234"},
+	{"-1234", "%+8.6d", " -001234"},
+
+	{"1234", "% 8.3d", "    1234"},
+	{"1234", "% 8.4d", "    1234"},
+	{"1234", "% 8.5d", "   01234"},
+	{"1234", "% 8.6d", "  001234"},
+	{"-1234", "% 8.3d", "   -1234"},
+	{"-1234", "% 8.4d", "   -1234"},
+	{"-1234", "% 8.5d", "  -01234"},
+	{"-1234", "% 8.6d", " -001234"},
+
+	{"1234", "%.3x", "4d2"},
+	{"1234", "%.4x", "04d2"},
+	{"1234", "%.5x", "004d2"},
+	{"1234", "%.6x", "0004d2"},
+	{"-1234", "%.3x", "-4d2"},
+	{"-1234", "%.4x", "-04d2"},
+	{"-1234", "%.5x", "-004d2"},
+	{"-1234", "%.6x", "-0004d2"},
+
+	{"1234", "%8.3x", "     4d2"},
+	{"1234", "%8.4x", "    04d2"},
+	{"1234", "%8.5x", "   004d2"},
+	{"1234", "%8.6x", "  0004d2"},
+	{"-1234", "%8.3x", "    -4d2"},
+	{"-1234", "%8.4x", "   -04d2"},
+	{"-1234", "%8.5x", "  -004d2"},
+	{"-1234", "%8.6x", " -0004d2"},
+
+	{"1234", "%+8.3x", "    +4d2"},
+	{"1234", "%+8.4x", "   +04d2"},
+	{"1234", "%+8.5x", "  +004d2"},
+	{"1234", "%+8.6x", " +0004d2"},
+	{"-1234", "%+8.3x", "    -4d2"},
+	{"-1234", "%+8.4x", "   -04d2"},
+	{"-1234", "%+8.5x", "  -004d2"},
+	{"-1234", "%+8.6x", " -0004d2"},
+
+	{"1234", "% 8.3x", "     4d2"},
+	{"1234", "% 8.4x", "    04d2"},
+	{"1234", "% 8.5x", "   004d2"},
+	{"1234", "% 8.6x", "  0004d2"},
+	{"1234", "% 8.7x", " 00004d2"},
+	{"1234", "% 8.8x", " 000004d2"},
+	{"-1234", "% 8.3x", "    -4d2"},
+	{"-1234", "% 8.4x", "   -04d2"},
+	{"-1234", "% 8.5x", "  -004d2"},
+	{"-1234", "% 8.6x", " -0004d2"},
+	{"-1234", "% 8.7x", "-00004d2"},
+	{"-1234", "% 8.8x", "-000004d2"},
+
+	{"1234", "%-8.3d", "1234    "},
+	{"1234", "%-8.4d", "1234    "},
+	{"1234", "%-8.5d", "01234   "},
+	{"1234", "%-8.6d", "001234  "},
+	{"1234", "%-8.7d", "0001234 "},
+	{"1234", "%-8.8d", "00001234"},
+	{"-1234", "%-8.3d", "-1234   "},
+	{"-1234", "%-8.4d", "-1234   "},
+	{"-1234", "%-8.5d", "-01234  "},
+	{"-1234", "%-8.6d", "-001234 "},
+	{"-1234", "%-8.7d", "-0001234"},
+	{"-1234", "%-8.8d", "-00001234"},
+
+	{"16777215", "%b", "111111111111111111111111"}, // 2**24 - 1
+
+	{"0", "%.d", ""},
+	{"0", "%.0d", ""},
+	{"0", "%3.d", ""},
+}
+
+func TestFormat(t *testing.T) {
+	for i, test := range formatTests {
+		var x *Int
+		if test.input != "<nil>" {
+			var ok bool
+			x, ok = new(Int).SetString(test.input, 0)
+			if !ok {
+				t.Errorf("#%d failed reading input %s", i, test.input)
+			}
+		}
+		output := fmt.Sprintf(test.format, x)
+		if output != test.output {
+			t.Errorf("#%d got %q; want %q, {%q, %q, %q}", i, output, test.output, test.input, test.format, test.output)
+		}
+	}
+}
+
+var scanTests = []struct {
+	input     string
+	format    string
+	output    string
+	remaining int
+}{
+	{"1010", "%b", "10", 0},
+	{"0b1010", "%v", "10", 0},
+	{"12", "%o", "10", 0},
+	{"012", "%v", "10", 0},
+	{"10", "%d", "10", 0},
+	{"10", "%v", "10", 0},
+	{"a", "%x", "10", 0},
+	{"0xa", "%v", "10", 0},
+	{"A", "%X", "10", 0},
+	{"-A", "%X", "-10", 0},
+	{"+0b1011001", "%v", "89", 0},
+	{"0xA", "%v", "10", 0},
+	{"0 ", "%v", "0", 1},
+	{"2+3", "%v", "2", 2},
+	{"0XABC 12", "%v", "2748", 3},
+}
+
+func TestScan(t *testing.T) {
+	var buf bytes.Buffer
+	for i, test := range scanTests {
+		x := new(Int)
+		buf.Reset()
+		buf.WriteString(test.input)
+		if _, err := fmt.Fscanf(&buf, test.format, x); err != nil {
+			t.Errorf("#%d error: %s", i, err)
+		}
+		if x.String() != test.output {
+			t.Errorf("#%d got %s; want %s", i, x.String(), test.output)
+		}
+		if buf.Len() != test.remaining {
+			t.Errorf("#%d got %d bytes remaining; want %d", i, buf.Len(), test.remaining)
+		}
+	}
+}
diff --git a/src/math/big/intmarsh.go b/src/math/big/intmarsh.go
new file mode 100644
index 0000000..ce429ff
--- /dev/null
+++ b/src/math/big/intmarsh.go
@@ -0,0 +1,83 @@
+// Copyright 2015 The Go Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+// This file implements encoding/decoding of Ints.
+
+package big
+
+import (
+	"bytes"
+	"fmt"
+)
+
+// Gob codec version. Permits backward-compatible changes to the encoding.
+const intGobVersion byte = 1
+
+// GobEncode implements the gob.GobEncoder interface.
+func (x *Int) GobEncode() ([]byte, error) {
+	if x == nil {
+		return nil, nil
+	}
+	buf := make([]byte, 1+len(x.abs)*_S) // extra byte for version and sign bit
+	i := x.abs.bytes(buf) - 1            // i >= 0
+	b := intGobVersion << 1              // make space for sign bit
+	if x.neg {
+		b |= 1
+	}
+	buf[i] = b
+	return buf[i:], nil
+}
+
+// GobDecode implements the gob.GobDecoder interface.
+func (z *Int) GobDecode(buf []byte) error {
+	if len(buf) == 0 {
+		// Other side sent a nil or default value.
+		*z = Int{}
+		return nil
+	}
+	b := buf[0]
+	if b>>1 != intGobVersion {
+		return fmt.Errorf("Int.GobDecode: encoding version %d not supported", b>>1)
+	}
+	z.neg = b&1 != 0
+	z.abs = z.abs.setBytes(buf[1:])
+	return nil
+}
+
+// MarshalText implements the encoding.TextMarshaler interface.
+func (x *Int) MarshalText() (text []byte, err error) {
+	if x == nil {
+		return []byte("<nil>"), nil
+	}
+	return x.abs.itoa(x.neg, 10), nil
+}
+
+// UnmarshalText implements the encoding.TextUnmarshaler interface.
+func (z *Int) UnmarshalText(text []byte) error {
+	if _, ok := z.setFromScanner(bytes.NewReader(text), 0); !ok {
+		return fmt.Errorf("math/big: cannot unmarshal %q into a *big.Int", text)
+	}
+	return nil
+}
+
+// The JSON marshalers are only here for API backward compatibility
+// (programs that explicitly look for these two methods). JSON works
+// fine with the TextMarshaler only.
+
+// MarshalJSON implements the json.Marshaler interface.
+func (x *Int) MarshalJSON() ([]byte, error) {
+	if x == nil {
+		return []byte("null"), nil
+	}
+	return x.abs.itoa(x.neg, 10), nil
+}
+
+// UnmarshalJSON implements the json.Unmarshaler interface.
+func (z *Int) UnmarshalJSON(text []byte) error {
+	// Ignore null, like in the main JSON package.
+	if string(text) == "null" {
+		return nil
+	}
+	return z.UnmarshalText(text)
+}
diff --git a/src/math/big/intmarsh_test.go b/src/math/big/intmarsh_test.go
new file mode 100644
index 0000000..8e7d29f
--- /dev/null
+++ b/src/math/big/intmarsh_test.go
@@ -0,0 +1,134 @@
+// Copyright 2015 The Go Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+package big
+
+import (
+	"bytes"
+	"encoding/gob"
+	"encoding/json"
+	"encoding/xml"
+	"testing"
+)
+
+var encodingTests = []string{
+	"0",
+	"1",
+	"2",
+	"10",
+	"1000",
+	"1234567890",
+	"298472983472983471903246121093472394872319615612417471234712061",
+}
+
+func TestIntGobEncoding(t *testing.T) {
+	var medium bytes.Buffer
+	enc := gob.NewEncoder(&medium)
+	dec := gob.NewDecoder(&medium)
+	for _, test := range encodingTests {
+		for _, sign := range []string{"", "+", "-"} {
+			x := sign + test
+			medium.Reset() // empty buffer for each test case (in case of failures)
+			var tx Int
+			tx.SetString(x, 10)
+			if err := enc.Encode(&tx); err != nil {
+				t.Errorf("encoding of %s failed: %s", &tx, err)
+				continue
+			}
+			var rx Int
+			if err := dec.Decode(&rx); err != nil {
+				t.Errorf("decoding of %s failed: %s", &tx, err)
+				continue
+			}
+			if rx.Cmp(&tx) != 0 {
+				t.Errorf("transmission of %s failed: got %s want %s", &tx, &rx, &tx)
+			}
+		}
+	}
+}
+
+// Sending a nil Int pointer (inside a slice) on a round trip through gob should yield a zero.
+// TODO: top-level nils.
+func TestGobEncodingNilIntInSlice(t *testing.T) {
+	buf := new(bytes.Buffer)
+	enc := gob.NewEncoder(buf)
+	dec := gob.NewDecoder(buf)
+
+	var in = make([]*Int, 1)
+	err := enc.Encode(&in)
+	if err != nil {
+		t.Errorf("gob encode failed: %q", err)
+	}
+	var out []*Int
+	err = dec.Decode(&out)
+	if err != nil {
+		t.Fatalf("gob decode failed: %q", err)
+	}
+	if len(out) != 1 {
+		t.Fatalf("wrong len; want 1 got %d", len(out))
+	}
+	var zero Int
+	if out[0].Cmp(&zero) != 0 {
+		t.Fatalf("transmission of (*Int)(nil) failed: got %s want 0", out)
+	}
+}
+
+func TestIntJSONEncoding(t *testing.T) {
+	for _, test := range encodingTests {
+		for _, sign := range []string{"", "+", "-"} {
+			x := sign + test
+			var tx Int
+			tx.SetString(x, 10)
+			b, err := json.Marshal(&tx)
+			if err != nil {
+				t.Errorf("marshaling of %s failed: %s", &tx, err)
+				continue
+			}
+			var rx Int
+			if err := json.Unmarshal(b, &rx); err != nil {
+				t.Errorf("unmarshaling of %s failed: %s", &tx, err)
+				continue
+			}
+			if rx.Cmp(&tx) != 0 {
+				t.Errorf("JSON encoding of %s failed: got %s want %s", &tx, &rx, &tx)
+			}
+		}
+	}
+}
+
+func TestIntJSONEncodingNil(t *testing.T) {
+	var x *Int
+	b, err := x.MarshalJSON()
+	if err != nil {
+		t.Fatalf("marshaling of nil failed: %s", err)
+	}
+	got := string(b)
+	want := "null"
+	if got != want {
+		t.Fatalf("marshaling of nil failed: got %s want %s", got, want)
+	}
+}
+
+func TestIntXMLEncoding(t *testing.T) {
+	for _, test := range encodingTests {
+		for _, sign := range []string{"", "+", "-"} {
+			x := sign + test
+			var tx Int
+			tx.SetString(x, 0)
+			b, err := xml.Marshal(&tx)
+			if err != nil {
+				t.Errorf("marshaling of %s failed: %s", &tx, err)
+				continue
+			}
+			var rx Int
+			if err := xml.Unmarshal(b, &rx); err != nil {
+				t.Errorf("unmarshaling of %s failed: %s", &tx, err)
+				continue
+			}
+			if rx.Cmp(&tx) != 0 {
+				t.Errorf("XML encoding of %s failed: got %s want %s", &tx, &rx, &tx)
+			}
+		}
+	}
+}
diff --git a/src/math/big/link_test.go b/src/math/big/link_test.go
new file mode 100644
index 0000000..6e33aa5
--- /dev/null
+++ b/src/math/big/link_test.go
@@ -0,0 +1,63 @@
+// Copyright 2020 The Go Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+package big
+
+import (
+	"bytes"
+	"internal/testenv"
+	"os"
+	"os/exec"
+	"path/filepath"
+	"testing"
+)
+
+// Tests that the linker is able to remove references to Float, Rat,
+// and Int if unused (notably, not used by init).
+func TestLinkerGC(t *testing.T) {
+	if testing.Short() {
+		t.Skip("skipping in short mode")
+	}
+	t.Parallel()
+	tmp := t.TempDir()
+	goBin := testenv.GoToolPath(t)
+	goFile := filepath.Join(tmp, "x.go")
+	file := []byte(`package main
+import _ "math/big"
+func main() {}
+`)
+	if err := os.WriteFile(goFile, file, 0644); err != nil {
+		t.Fatal(err)
+	}
+	cmd := exec.Command(goBin, "build", "-o", "x.exe", "x.go")
+	cmd.Dir = tmp
+	if out, err := cmd.CombinedOutput(); err != nil {
+		t.Fatalf("compile: %v, %s", err, out)
+	}
+
+	cmd = exec.Command(goBin, "tool", "nm", "x.exe")
+	cmd.Dir = tmp
+	nm, err := cmd.CombinedOutput()
+	if err != nil {
+		t.Fatalf("nm: %v, %s", err, nm)
+	}
+	const want = "runtime.main"
+	if !bytes.Contains(nm, []byte(want)) {
+		// Test the test.
+		t.Errorf("expected symbol %q not found", want)
+	}
+	bad := []string{
+		"math/big.(*Float)",
+		"math/big.(*Rat)",
+		"math/big.(*Int)",
+	}
+	for _, sym := range bad {
+		if bytes.Contains(nm, []byte(sym)) {
+			t.Errorf("unexpected symbol %q found", sym)
+		}
+	}
+	if t.Failed() {
+		t.Logf("Got: %s", nm)
+	}
+}
diff --git a/src/math/big/nat.go b/src/math/big/nat.go
new file mode 100644
index 0000000..90ce6d1
--- /dev/null
+++ b/src/math/big/nat.go
@@ -0,0 +1,1429 @@
+// Copyright 2009 The Go Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+// This file implements unsigned multi-precision integers (natural
+// numbers). They are the building blocks for the implementation
+// of signed integers, rationals, and floating-point numbers.
+//
+// Caution: This implementation relies on the function "alias"
+//          which assumes that (nat) slice capacities are never
+//          changed (no 3-operand slice expressions). If that
+//          changes, alias needs to be updated for correctness.
+
+package big
+
+import (
+	"encoding/binary"
+	"math/bits"
+	"math/rand"
+	"sync"
+)
+
+// An unsigned integer x of the form
+//
+//	x = x[n-1]*_B^(n-1) + x[n-2]*_B^(n-2) + ... + x[1]*_B + x[0]
+//
+// with 0 <= x[i] < _B and 0 <= i < n is stored in a slice of length n,
+// with the digits x[i] as the slice elements.
+//
+// A number is normalized if the slice contains no leading 0 digits.
+// During arithmetic operations, denormalized values may occur but are
+// always normalized before returning the final result. The normalized
+// representation of 0 is the empty or nil slice (length = 0).
+type nat []Word
+
+var (
+	natOne  = nat{1}
+	natTwo  = nat{2}
+	natFive = nat{5}
+	natTen  = nat{10}
+)
+
+func (z nat) String() string {
+	return "0x" + string(z.itoa(false, 16))
+}
+
+func (z nat) clear() {
+	for i := range z {
+		z[i] = 0
+	}
+}
+
+func (z nat) norm() nat {
+	i := len(z)
+	for i > 0 && z[i-1] == 0 {
+		i--
+	}
+	return z[0:i]
+}
+
+func (z nat) make(n int) nat {
+	if n <= cap(z) {
+		return z[:n] // reuse z
+	}
+	if n == 1 {
+		// Most nats start small and stay that way; don't over-allocate.
+		return make(nat, 1)
+	}
+	// Choosing a good value for e has significant performance impact
+	// because it increases the chance that a value can be reused.
+	const e = 4 // extra capacity
+	return make(nat, n, n+e)
+}
+
+func (z nat) setWord(x Word) nat {
+	if x == 0 {
+		return z[:0]
+	}
+	z = z.make(1)
+	z[0] = x
+	return z
+}
+
+func (z nat) setUint64(x uint64) nat {
+	// single-word value
+	if w := Word(x); uint64(w) == x {
+		return z.setWord(w)
+	}
+	// 2-word value
+	z = z.make(2)
+	z[1] = Word(x >> 32)
+	z[0] = Word(x)
+	return z
+}
+
+func (z nat) set(x nat) nat {
+	z = z.make(len(x))
+	copy(z, x)
+	return z
+}
+
+func (z nat) add(x, y nat) nat {
+	m := len(x)
+	n := len(y)
+
+	switch {
+	case m < n:
+		return z.add(y, x)
+	case m == 0:
+		// n == 0 because m >= n; result is 0
+		return z[:0]
+	case n == 0:
+		// result is x
+		return z.set(x)
+	}
+	// m > 0
+
+	z = z.make(m + 1)
+	c := addVV(z[0:n], x, y)
+	if m > n {
+		c = addVW(z[n:m], x[n:], c)
+	}
+	z[m] = c
+
+	return z.norm()
+}
+
+func (z nat) sub(x, y nat) nat {
+	m := len(x)
+	n := len(y)
+
+	switch {
+	case m < n:
+		panic("underflow")
+	case m == 0:
+		// n == 0 because m >= n; result is 0
+		return z[:0]
+	case n == 0:
+		// result is x
+		return z.set(x)
+	}
+	// m > 0
+
+	z = z.make(m)
+	c := subVV(z[0:n], x, y)
+	if m > n {
+		c = subVW(z[n:], x[n:], c)
+	}
+	if c != 0 {
+		panic("underflow")
+	}
+
+	return z.norm()
+}
+
+func (x nat) cmp(y nat) (r int) {
+	m := len(x)
+	n := len(y)
+	if m != n || m == 0 {
+		switch {
+		case m < n:
+			r = -1
+		case m > n:
+			r = 1
+		}
+		return
+	}
+
+	i := m - 1
+	for i > 0 && x[i] == y[i] {
+		i--
+	}
+
+	switch {
+	case x[i] < y[i]:
+		r = -1
+	case x[i] > y[i]:
+		r = 1
+	}
+	return
+}
+
+func (z nat) mulAddWW(x nat, y, r Word) nat {
+	m := len(x)
+	if m == 0 || y == 0 {
+		return z.setWord(r) // result is r
+	}
+	// m > 0
+
+	z = z.make(m + 1)
+	z[m] = mulAddVWW(z[0:m], x, y, r)
+
+	return z.norm()
+}
+
+// basicMul multiplies x and y and leaves the result in z.
+// The (non-normalized) result is placed in z[0 : len(x) + len(y)].
+func basicMul(z, x, y nat) {
+	z[0 : len(x)+len(y)].clear() // initialize z
+	for i, d := range y {
+		if d != 0 {
+			z[len(x)+i] = addMulVVW(z[i:i+len(x)], x, d)
+		}
+	}
+}
+
+// montgomery computes z mod m = x*y*2**(-n*_W) mod m,
+// assuming k = -1/m mod 2**_W.
+// z is used for storing the result which is returned;
+// z must not alias x, y or m.
+// See Gueron, "Efficient Software Implementations of Modular Exponentiation".
+// https://eprint.iacr.org/2011/239.pdf
+// In the terminology of that paper, this is an "Almost Montgomery Multiplication":
+// x and y are required to satisfy 0 <= z < 2**(n*_W) and then the result
+// z is guaranteed to satisfy 0 <= z < 2**(n*_W), but it may not be < m.
+func (z nat) montgomery(x, y, m nat, k Word, n int) nat {
+	// This code assumes x, y, m are all the same length, n.
+	// (required by addMulVVW and the for loop).
+	// It also assumes that x, y are already reduced mod m,
+	// or else the result will not be properly reduced.
+	if len(x) != n || len(y) != n || len(m) != n {
+		panic("math/big: mismatched montgomery number lengths")
+	}
+	z = z.make(n * 2)
+	z.clear()
+	var c Word
+	for i := 0; i < n; i++ {
+		d := y[i]
+		c2 := addMulVVW(z[i:n+i], x, d)
+		t := z[i] * k
+		c3 := addMulVVW(z[i:n+i], m, t)
+		cx := c + c2
+		cy := cx + c3
+		z[n+i] = cy
+		if cx < c2 || cy < c3 {
+			c = 1
+		} else {
+			c = 0
+		}
+	}
+	if c != 0 {
+		subVV(z[:n], z[n:], m)
+	} else {
+		copy(z[:n], z[n:])
+	}
+	return z[:n]
+}
+
+// Fast version of z[0:n+n>>1].add(z[0:n+n>>1], x[0:n]) w/o bounds checks.
+// Factored out for readability - do not use outside karatsuba.
+func karatsubaAdd(z, x nat, n int) {
+	if c := addVV(z[0:n], z, x); c != 0 {
+		addVW(z[n:n+n>>1], z[n:], c)
+	}
+}
+
+// Like karatsubaAdd, but does subtract.
+func karatsubaSub(z, x nat, n int) {
+	if c := subVV(z[0:n], z, x); c != 0 {
+		subVW(z[n:n+n>>1], z[n:], c)
+	}
+}
+
+// Operands that are shorter than karatsubaThreshold are multiplied using
+// "grade school" multiplication; for longer operands the Karatsuba algorithm
+// is used.
+var karatsubaThreshold = 40 // computed by calibrate_test.go
+
+// karatsuba multiplies x and y and leaves the result in z.
+// Both x and y must have the same length n and n must be a
+// power of 2. The result vector z must have len(z) >= 6*n.
+// The (non-normalized) result is placed in z[0 : 2*n].
+func karatsuba(z, x, y nat) {
+	n := len(y)
+
+	// Switch to basic multiplication if numbers are odd or small.
+	// (n is always even if karatsubaThreshold is even, but be
+	// conservative)
+	if n&1 != 0 || n < karatsubaThreshold || n < 2 {
+		basicMul(z, x, y)
+		return
+	}
+	// n&1 == 0 && n >= karatsubaThreshold && n >= 2
+
+	// Karatsuba multiplication is based on the observation that
+	// for two numbers x and y with:
+	//
+	//   x = x1*b + x0
+	//   y = y1*b + y0
+	//
+	// the product x*y can be obtained with 3 products z2, z1, z0
+	// instead of 4:
+	//
+	//   x*y = x1*y1*b*b + (x1*y0 + x0*y1)*b + x0*y0
+	//       =    z2*b*b +              z1*b +    z0
+	//
+	// with:
+	//
+	//   xd = x1 - x0
+	//   yd = y0 - y1
+	//
+	//   z1 =      xd*yd                    + z2 + z0
+	//      = (x1-x0)*(y0 - y1)             + z2 + z0
+	//      = x1*y0 - x1*y1 - x0*y0 + x0*y1 + z2 + z0
+	//      = x1*y0 -    z2 -    z0 + x0*y1 + z2 + z0
+	//      = x1*y0                 + x0*y1
+
+	// split x, y into "digits"
+	n2 := n >> 1              // n2 >= 1
+	x1, x0 := x[n2:], x[0:n2] // x = x1*b + y0
+	y1, y0 := y[n2:], y[0:n2] // y = y1*b + y0
+
+	// z is used for the result and temporary storage:
+	//
+	//   6*n     5*n     4*n     3*n     2*n     1*n     0*n
+	// z = [z2 copy|z0 copy| xd*yd | yd:xd | x1*y1 | x0*y0 ]
+	//
+	// For each recursive call of karatsuba, an unused slice of
+	// z is passed in that has (at least) half the length of the
+	// caller's z.
+
+	// compute z0 and z2 with the result "in place" in z
+	karatsuba(z, x0, y0)     // z0 = x0*y0
+	karatsuba(z[n:], x1, y1) // z2 = x1*y1
+
+	// compute xd (or the negative value if underflow occurs)
+	s := 1 // sign of product xd*yd
+	xd := z[2*n : 2*n+n2]
+	if subVV(xd, x1, x0) != 0 { // x1-x0
+		s = -s
+		subVV(xd, x0, x1) // x0-x1
+	}
+
+	// compute yd (or the negative value if underflow occurs)
+	yd := z[2*n+n2 : 3*n]
+	if subVV(yd, y0, y1) != 0 { // y0-y1
+		s = -s
+		subVV(yd, y1, y0) // y1-y0
+	}
+
+	// p = (x1-x0)*(y0-y1) == x1*y0 - x1*y1 - x0*y0 + x0*y1 for s > 0
+	// p = (x0-x1)*(y0-y1) == x0*y0 - x0*y1 - x1*y0 + x1*y1 for s < 0
+	p := z[n*3:]
+	karatsuba(p, xd, yd)
+
+	// save original z2:z0
+	// (ok to use upper half of z since we're done recurring)
+	r := z[n*4:]
+	copy(r, z[:n*2])
+
+	// add up all partial products
+	//
+	//   2*n     n     0
+	// z = [ z2  | z0  ]
+	//   +    [ z0  ]
+	//   +    [ z2  ]
+	//   +    [  p  ]
+	//
+	karatsubaAdd(z[n2:], r, n)
+	karatsubaAdd(z[n2:], r[n:], n)
+	if s > 0 {
+		karatsubaAdd(z[n2:], p, n)
+	} else {
+		karatsubaSub(z[n2:], p, n)
+	}
+}
+
+// alias reports whether x and y share the same base array.
+//
+// Note: alias assumes that the capacity of underlying arrays
+// is never changed for nat values; i.e. that there are
+// no 3-operand slice expressions in this code (or worse,
+// reflect-based operations to the same effect).
+func alias(x, y nat) bool {
+	return cap(x) > 0 && cap(y) > 0 && &x[0:cap(x)][cap(x)-1] == &y[0:cap(y)][cap(y)-1]
+}
+
+// addAt implements z += x<<(_W*i); z must be long enough.
+// (we don't use nat.add because we need z to stay the same
+// slice, and we don't need to normalize z after each addition)
+func addAt(z, x nat, i int) {
+	if n := len(x); n > 0 {
+		if c := addVV(z[i:i+n], z[i:], x); c != 0 {
+			j := i + n
+			if j < len(z) {
+				addVW(z[j:], z[j:], c)
+			}
+		}
+	}
+}
+
+func max(x, y int) int {
+	if x > y {
+		return x
+	}
+	return y
+}
+
+// karatsubaLen computes an approximation to the maximum k <= n such that
+// k = p<<i for a number p <= threshold and an i >= 0. Thus, the
+// result is the largest number that can be divided repeatedly by 2 before
+// becoming about the value of threshold.
+func karatsubaLen(n, threshold int) int {
+	i := uint(0)
+	for n > threshold {
+		n >>= 1
+		i++
+	}
+	return n << i
+}
+
+func (z nat) mul(x, y nat) nat {
+	m := len(x)
+	n := len(y)
+
+	switch {
+	case m < n:
+		return z.mul(y, x)
+	case m == 0 || n == 0:
+		return z[:0]
+	case n == 1:
+		return z.mulAddWW(x, y[0], 0)
+	}
+	// m >= n > 1
+
+	// determine if z can be reused
+	if alias(z, x) || alias(z, y) {
+		z = nil // z is an alias for x or y - cannot reuse
+	}
+
+	// use basic multiplication if the numbers are small
+	if n < karatsubaThreshold {
+		z = z.make(m + n)
+		basicMul(z, x, y)
+		return z.norm()
+	}
+	// m >= n && n >= karatsubaThreshold && n >= 2
+
+	// determine Karatsuba length k such that
+	//
+	//   x = xh*b + x0  (0 <= x0 < b)
+	//   y = yh*b + y0  (0 <= y0 < b)
+	//   b = 1<<(_W*k)  ("base" of digits xi, yi)
+	//
+	k := karatsubaLen(n, karatsubaThreshold)
+	// k <= n
+
+	// multiply x0 and y0 via Karatsuba
+	x0 := x[0:k]              // x0 is not normalized
+	y0 := y[0:k]              // y0 is not normalized
+	z = z.make(max(6*k, m+n)) // enough space for karatsuba of x0*y0 and full result of x*y
+	karatsuba(z, x0, y0)
+	z = z[0 : m+n]  // z has final length but may be incomplete
+	z[2*k:].clear() // upper portion of z is garbage (and 2*k <= m+n since k <= n <= m)
+
+	// If xh != 0 or yh != 0, add the missing terms to z. For
+	//
+	//   xh = xi*b^i + ... + x2*b^2 + x1*b (0 <= xi < b)
+	//   yh =                         y1*b (0 <= y1 < b)
+	//
+	// the missing terms are
+	//
+	//   x0*y1*b and xi*y0*b^i, xi*y1*b^(i+1) for i > 0
+	//
+	// since all the yi for i > 1 are 0 by choice of k: If any of them
+	// were > 0, then yh >= b^2 and thus y >= b^2. Then k' = k*2 would
+	// be a larger valid threshold contradicting the assumption about k.
+	//
+	if k < n || m != n {
+		tp := getNat(3 * k)
+		t := *tp
+
+		// add x0*y1*b
+		x0 := x0.norm()
+		y1 := y[k:]       // y1 is normalized because y is
+		t = t.mul(x0, y1) // update t so we don't lose t's underlying array
+		addAt(z, t, k)
+
+		// add xi*y0<<i, xi*y1*b<<(i+k)
+		y0 := y0.norm()
+		for i := k; i < len(x); i += k {
+			xi := x[i:]
+			if len(xi) > k {
+				xi = xi[:k]
+			}
+			xi = xi.norm()
+			t = t.mul(xi, y0)
+			addAt(z, t, i)
+			t = t.mul(xi, y1)
+			addAt(z, t, i+k)
+		}
+
+		putNat(tp)
+	}
+
+	return z.norm()
+}
+
+// basicSqr sets z = x*x and is asymptotically faster than basicMul
+// by about a factor of 2, but slower for small arguments due to overhead.
+// Requirements: len(x) > 0, len(z) == 2*len(x)
+// The (non-normalized) result is placed in z.
+func basicSqr(z, x nat) {
+	n := len(x)
+	tp := getNat(2 * n)
+	t := *tp // temporary variable to hold the products
+	t.clear()
+	z[1], z[0] = mulWW(x[0], x[0]) // the initial square
+	for i := 1; i < n; i++ {
+		d := x[i]
+		// z collects the squares x[i] * x[i]
+		z[2*i+1], z[2*i] = mulWW(d, d)
+		// t collects the products x[i] * x[j] where j < i
+		t[2*i] = addMulVVW(t[i:2*i], x[0:i], d)
+	}
+	t[2*n-1] = shlVU(t[1:2*n-1], t[1:2*n-1], 1) // double the j < i products
+	addVV(z, z, t)                              // combine the result
+	putNat(tp)
+}
+
+// karatsubaSqr squares x and leaves the result in z.
+// len(x) must be a power of 2 and len(z) >= 6*len(x).
+// The (non-normalized) result is placed in z[0 : 2*len(x)].
+//
+// The algorithm and the layout of z are the same as for karatsuba.
+func karatsubaSqr(z, x nat) {
+	n := len(x)
+
+	if n&1 != 0 || n < karatsubaSqrThreshold || n < 2 {
+		basicSqr(z[:2*n], x)
+		return
+	}
+
+	n2 := n >> 1
+	x1, x0 := x[n2:], x[0:n2]
+
+	karatsubaSqr(z, x0)
+	karatsubaSqr(z[n:], x1)
+
+	// s = sign(xd*yd) == -1 for xd != 0; s == 1 for xd == 0
+	xd := z[2*n : 2*n+n2]
+	if subVV(xd, x1, x0) != 0 {
+		subVV(xd, x0, x1)
+	}
+
+	p := z[n*3:]
+	karatsubaSqr(p, xd)
+
+	r := z[n*4:]
+	copy(r, z[:n*2])
+
+	karatsubaAdd(z[n2:], r, n)
+	karatsubaAdd(z[n2:], r[n:], n)
+	karatsubaSub(z[n2:], p, n) // s == -1 for p != 0; s == 1 for p == 0
+}
+
+// Operands that are shorter than basicSqrThreshold are squared using
+// "grade school" multiplication; for operands longer than karatsubaSqrThreshold
+// we use the Karatsuba algorithm optimized for x == y.
+var basicSqrThreshold = 20      // computed by calibrate_test.go
+var karatsubaSqrThreshold = 260 // computed by calibrate_test.go
+
+// z = x*x
+func (z nat) sqr(x nat) nat {
+	n := len(x)
+	switch {
+	case n == 0:
+		return z[:0]
+	case n == 1:
+		d := x[0]
+		z = z.make(2)
+		z[1], z[0] = mulWW(d, d)
+		return z.norm()
+	}
+
+	if alias(z, x) {
+		z = nil // z is an alias for x - cannot reuse
+	}
+
+	if n < basicSqrThreshold {
+		z = z.make(2 * n)
+		basicMul(z, x, x)
+		return z.norm()
+	}
+	if n < karatsubaSqrThreshold {
+		z = z.make(2 * n)
+		basicSqr(z, x)
+		return z.norm()
+	}
+
+	// Use Karatsuba multiplication optimized for x == y.
+	// The algorithm and layout of z are the same as for mul.
+
+	// z = (x1*b + x0)^2 = x1^2*b^2 + 2*x1*x0*b + x0^2
+
+	k := karatsubaLen(n, karatsubaSqrThreshold)
+
+	x0 := x[0:k]
+	z = z.make(max(6*k, 2*n))
+	karatsubaSqr(z, x0) // z = x0^2
+	z = z[0 : 2*n]
+	z[2*k:].clear()
+
+	if k < n {
+		tp := getNat(2 * k)
+		t := *tp
+		x0 := x0.norm()
+		x1 := x[k:]
+		t = t.mul(x0, x1)
+		addAt(z, t, k)
+		addAt(z, t, k) // z = 2*x1*x0*b + x0^2
+		t = t.sqr(x1)
+		addAt(z, t, 2*k) // z = x1^2*b^2 + 2*x1*x0*b + x0^2
+		putNat(tp)
+	}
+
+	return z.norm()
+}
+
+// mulRange computes the product of all the unsigned integers in the
+// range [a, b] inclusively. If a > b (empty range), the result is 1.
+func (z nat) mulRange(a, b uint64) nat {
+	switch {
+	case a == 0:
+		// cut long ranges short (optimization)
+		return z.setUint64(0)
+	case a > b:
+		return z.setUint64(1)
+	case a == b:
+		return z.setUint64(a)
+	case a+1 == b:
+		return z.mul(nat(nil).setUint64(a), nat(nil).setUint64(b))
+	}
+	m := (a + b) / 2
+	return z.mul(nat(nil).mulRange(a, m), nat(nil).mulRange(m+1, b))
+}
+
+// getNat returns a *nat of len n. The contents may not be zero.
+// The pool holds *nat to avoid allocation when converting to interface{}.
+func getNat(n int) *nat {
+	var z *nat
+	if v := natPool.Get(); v != nil {
+		z = v.(*nat)
+	}
+	if z == nil {
+		z = new(nat)
+	}
+	*z = z.make(n)
+	if n > 0 {
+		(*z)[0] = 0xfedcb // break code expecting zero
+	}
+	return z
+}
+
+func putNat(x *nat) {
+	natPool.Put(x)
+}
+
+var natPool sync.Pool
+
+// bitLen returns the length of x in bits.
+// Unlike most methods, it works even if x is not normalized.
+func (x nat) bitLen() int {
+	// This function is used in cryptographic operations. It must not leak
+	// anything but the Int's sign and bit size through side-channels. Any
+	// changes must be reviewed by a security expert.
+	if i := len(x) - 1; i >= 0 {
+		// bits.Len uses a lookup table for the low-order bits on some
+		// architectures. Neutralize any input-dependent behavior by setting all
+		// bits after the first one bit.
+		top := uint(x[i])
+		top |= top >> 1
+		top |= top >> 2
+		top |= top >> 4
+		top |= top >> 8
+		top |= top >> 16
+		top |= top >> 16 >> 16 // ">> 32" doesn't compile on 32-bit architectures
+		return i*_W + bits.Len(top)
+	}
+	return 0
+}
+
+// trailingZeroBits returns the number of consecutive least significant zero
+// bits of x.
+func (x nat) trailingZeroBits() uint {
+	if len(x) == 0 {
+		return 0
+	}
+	var i uint
+	for x[i] == 0 {
+		i++
+	}
+	// x[i] != 0
+	return i*_W + uint(bits.TrailingZeros(uint(x[i])))
+}
+
+// isPow2 returns i, true when x == 2**i and 0, false otherwise.
+func (x nat) isPow2() (uint, bool) {
+	var i uint
+	for x[i] == 0 {
+		i++
+	}
+	if i == uint(len(x))-1 && x[i]&(x[i]-1) == 0 {
+		return i*_W + uint(bits.TrailingZeros(uint(x[i]))), true
+	}
+	return 0, false
+}
+
+func same(x, y nat) bool {
+	return len(x) == len(y) && len(x) > 0 && &x[0] == &y[0]
+}
+
+// z = x << s
+func (z nat) shl(x nat, s uint) nat {
+	if s == 0 {
+		if same(z, x) {
+			return z
+		}
+		if !alias(z, x) {
+			return z.set(x)
+		}
+	}
+
+	m := len(x)
+	if m == 0 {
+		return z[:0]
+	}
+	// m > 0
+
+	n := m + int(s/_W)
+	z = z.make(n + 1)
+	z[n] = shlVU(z[n-m:n], x, s%_W)
+	z[0 : n-m].clear()
+
+	return z.norm()
+}
+
+// z = x >> s
+func (z nat) shr(x nat, s uint) nat {
+	if s == 0 {
+		if same(z, x) {
+			return z
+		}
+		if !alias(z, x) {
+			return z.set(x)
+		}
+	}
+
+	m := len(x)
+	n := m - int(s/_W)
+	if n <= 0 {
+		return z[:0]
+	}
+	// n > 0
+
+	z = z.make(n)
+	shrVU(z, x[m-n:], s%_W)
+
+	return z.norm()
+}
+
+func (z nat) setBit(x nat, i uint, b uint) nat {
+	j := int(i / _W)
+	m := Word(1) << (i % _W)
+	n := len(x)
+	switch b {
+	case 0:
+		z = z.make(n)
+		copy(z, x)
+		if j >= n {
+			// no need to grow
+			return z
+		}
+		z[j] &^= m
+		return z.norm()
+	case 1:
+		if j >= n {
+			z = z.make(j + 1)
+			z[n:].clear()
+		} else {
+			z = z.make(n)
+		}
+		copy(z, x)
+		z[j] |= m
+		// no need to normalize
+		return z
+	}
+	panic("set bit is not 0 or 1")
+}
+
+// bit returns the value of the i'th bit, with lsb == bit 0.
+func (x nat) bit(i uint) uint {
+	j := i / _W
+	if j >= uint(len(x)) {
+		return 0
+	}
+	// 0 <= j < len(x)
+	return uint(x[j] >> (i % _W) & 1)
+}
+
+// sticky returns 1 if there's a 1 bit within the
+// i least significant bits, otherwise it returns 0.
+func (x nat) sticky(i uint) uint {
+	j := i / _W
+	if j >= uint(len(x)) {
+		if len(x) == 0 {
+			return 0
+		}
+		return 1
+	}
+	// 0 <= j < len(x)
+	for _, x := range x[:j] {
+		if x != 0 {
+			return 1
+		}
+	}
+	if x[j]<<(_W-i%_W) != 0 {
+		return 1
+	}
+	return 0
+}
+
+func (z nat) and(x, y nat) nat {
+	m := len(x)
+	n := len(y)
+	if m > n {
+		m = n
+	}
+	// m <= n
+
+	z = z.make(m)
+	for i := 0; i < m; i++ {
+		z[i] = x[i] & y[i]
+	}
+
+	return z.norm()
+}
+
+// trunc returns z = x mod 2ⁿ.
+func (z nat) trunc(x nat, n uint) nat {
+	w := (n + _W - 1) / _W
+	if uint(len(x)) < w {
+		return z.set(x)
+	}
+	z = z.make(int(w))
+	copy(z, x)
+	if n%_W != 0 {
+		z[len(z)-1] &= 1<<(n%_W) - 1
+	}
+	return z.norm()
+}
+
+func (z nat) andNot(x, y nat) nat {
+	m := len(x)
+	n := len(y)
+	if n > m {
+		n = m
+	}
+	// m >= n
+
+	z = z.make(m)
+	for i := 0; i < n; i++ {
+		z[i] = x[i] &^ y[i]
+	}
+	copy(z[n:m], x[n:m])
+
+	return z.norm()
+}
+
+func (z nat) or(x, y nat) nat {
+	m := len(x)
+	n := len(y)
+	s := x
+	if m < n {
+		n, m = m, n
+		s = y
+	}
+	// m >= n
+
+	z = z.make(m)
+	for i := 0; i < n; i++ {
+		z[i] = x[i] | y[i]
+	}
+	copy(z[n:m], s[n:m])
+
+	return z.norm()
+}
+
+func (z nat) xor(x, y nat) nat {
+	m := len(x)
+	n := len(y)
+	s := x
+	if m < n {
+		n, m = m, n
+		s = y
+	}
+	// m >= n
+
+	z = z.make(m)
+	for i := 0; i < n; i++ {
+		z[i] = x[i] ^ y[i]
+	}
+	copy(z[n:m], s[n:m])
+
+	return z.norm()
+}
+
+// random creates a random integer in [0..limit), using the space in z if
+// possible. n is the bit length of limit.
+func (z nat) random(rand *rand.Rand, limit nat, n int) nat {
+	if alias(z, limit) {
+		z = nil // z is an alias for limit - cannot reuse
+	}
+	z = z.make(len(limit))
+
+	bitLengthOfMSW := uint(n % _W)
+	if bitLengthOfMSW == 0 {
+		bitLengthOfMSW = _W
+	}
+	mask := Word((1 << bitLengthOfMSW) - 1)
+
+	for {
+		switch _W {
+		case 32:
+			for i := range z {
+				z[i] = Word(rand.Uint32())
+			}
+		case 64:
+			for i := range z {
+				z[i] = Word(rand.Uint32()) | Word(rand.Uint32())<<32
+			}
+		default:
+			panic("unknown word size")
+		}
+		z[len(limit)-1] &= mask
+		if z.cmp(limit) < 0 {
+			break
+		}
+	}
+
+	return z.norm()
+}
+
+// If m != 0 (i.e., len(m) != 0), expNN sets z to x**y mod m;
+// otherwise it sets z to x**y. The result is the value of z.
+func (z nat) expNN(x, y, m nat, slow bool) nat {
+	if alias(z, x) || alias(z, y) {
+		// We cannot allow in-place modification of x or y.
+		z = nil
+	}
+
+	// x**y mod 1 == 0
+	if len(m) == 1 && m[0] == 1 {
+		return z.setWord(0)
+	}
+	// m == 0 || m > 1
+
+	// x**0 == 1
+	if len(y) == 0 {
+		return z.setWord(1)
+	}
+	// y > 0
+
+	// 0**y = 0
+	if len(x) == 0 {
+		return z.setWord(0)
+	}
+	// x > 0
+
+	// 1**y = 1
+	if len(x) == 1 && x[0] == 1 {
+		return z.setWord(1)
+	}
+	// x > 1
+
+	// x**1 == x
+	if len(y) == 1 && y[0] == 1 {
+		if len(m) != 0 {
+			return z.rem(x, m)
+		}
+		return z.set(x)
+	}
+	// y > 1
+
+	if len(m) != 0 {
+		// We likely end up being as long as the modulus.
+		z = z.make(len(m))
+
+		// If the exponent is large, we use the Montgomery method for odd values,
+		// and a 4-bit, windowed exponentiation for powers of two,
+		// and a CRT-decomposed Montgomery method for the remaining values
+		// (even values times non-trivial odd values, which decompose into one
+		// instance of each of the first two cases).
+		if len(y) > 1 && !slow {
+			if m[0]&1 == 1 {
+				return z.expNNMontgomery(x, y, m)
+			}
+			if logM, ok := m.isPow2(); ok {
+				return z.expNNWindowed(x, y, logM)
+			}
+			return z.expNNMontgomeryEven(x, y, m)
+		}
+	}
+
+	z = z.set(x)
+	v := y[len(y)-1] // v > 0 because y is normalized and y > 0
+	shift := nlz(v) + 1
+	v <<= shift
+	var q nat
+
+	const mask = 1 << (_W - 1)
+
+	// We walk through the bits of the exponent one by one. Each time we
+	// see a bit, we square, thus doubling the power. If the bit is a one,
+	// we also multiply by x, thus adding one to the power.
+
+	w := _W - int(shift)
+	// zz and r are used to avoid allocating in mul and div as
+	// otherwise the arguments would alias.
+	var zz, r nat
+	for j := 0; j < w; j++ {
+		zz = zz.sqr(z)
+		zz, z = z, zz
+
+		if v&mask != 0 {
+			zz = zz.mul(z, x)
+			zz, z = z, zz
+		}
+
+		if len(m) != 0 {
+			zz, r = zz.div(r, z, m)
+			zz, r, q, z = q, z, zz, r
+		}
+
+		v <<= 1
+	}
+
+	for i := len(y) - 2; i >= 0; i-- {
+		v = y[i]
+
+		for j := 0; j < _W; j++ {
+			zz = zz.sqr(z)
+			zz, z = z, zz
+
+			if v&mask != 0 {
+				zz = zz.mul(z, x)
+				zz, z = z, zz
+			}
+
+			if len(m) != 0 {
+				zz, r = zz.div(r, z, m)
+				zz, r, q, z = q, z, zz, r
+			}
+
+			v <<= 1
+		}
+	}
+
+	return z.norm()
+}
+
+// expNNMontgomeryEven calculates x**y mod m where m = m1 × m2 for m1 = 2ⁿ and m2 odd.
+// It uses two recursive calls to expNN for x**y mod m1 and x**y mod m2
+// and then uses the Chinese Remainder Theorem to combine the results.
+// The recursive call using m1 will use expNNWindowed,
+// while the recursive call using m2 will use expNNMontgomery.
+// For more details, see Ç. K. Koç, “Montgomery Reduction with Even Modulus”,
+// IEE Proceedings: Computers and Digital Techniques, 141(5) 314-316, September 1994.
+// http://www.people.vcu.edu/~jwang3/CMSC691/j34monex.pdf
+func (z nat) expNNMontgomeryEven(x, y, m nat) nat {
+	// Split m = m₁ × m₂ where m₁ = 2ⁿ
+	n := m.trailingZeroBits()
+	m1 := nat(nil).shl(natOne, n)
+	m2 := nat(nil).shr(m, n)
+
+	// We want z = x**y mod m.
+	// z₁ = x**y mod m1 = (x**y mod m) mod m1 = z mod m1
+	// z₂ = x**y mod m2 = (x**y mod m) mod m2 = z mod m2
+	// (We are using the math/big convention for names here,
+	// where the computation is z = x**y mod m, so its parts are z1 and z2.
+	// The paper is computing x = a**e mod n; it refers to these as x2 and z1.)
+	z1 := nat(nil).expNN(x, y, m1, false)
+	z2 := nat(nil).expNN(x, y, m2, false)
+
+	// Reconstruct z from z₁, z₂ using CRT, using algorithm from paper,
+	// which uses only a single modInverse (and an easy one at that).
+	//	p = (z₁ - z₂) × m₂⁻¹ (mod m₁)
+	//	z = z₂ + p × m₂
+	// The final addition is in range because:
+	//	z = z₂ + p × m₂
+	//	  ≤ z₂ + (m₁-1) × m₂
+	//	  < m₂ + (m₁-1) × m₂
+	//	  = m₁ × m₂
+	//	  = m.
+	z = z.set(z2)
+
+	// Compute (z₁ - z₂) mod m1 [m1 == 2**n] into z1.
+	z1 = z1.subMod2N(z1, z2, n)
+
+	// Reuse z2 for p = (z₁ - z₂) [in z1] * m2⁻¹ (mod m₁ [= 2ⁿ]).
+	m2inv := nat(nil).modInverse(m2, m1)
+	z2 = z2.mul(z1, m2inv)
+	z2 = z2.trunc(z2, n)
+
+	// Reuse z1 for p * m2.
+	z = z.add(z, z1.mul(z2, m2))
+
+	return z
+}
+
+// expNNWindowed calculates x**y mod m using a fixed, 4-bit window,
+// where m = 2**logM.
+func (z nat) expNNWindowed(x, y nat, logM uint) nat {
+	if len(y) <= 1 {
+		panic("big: misuse of expNNWindowed")
+	}
+	if x[0]&1 == 0 {
+		// len(y) > 1, so y  > logM.
+		// x is even, so x**y is a multiple of 2**y which is a multiple of 2**logM.
+		return z.setWord(0)
+	}
+	if logM == 1 {
+		return z.setWord(1)
+	}
+
+	// zz is used to avoid allocating in mul as otherwise
+	// the arguments would alias.
+	w := int((logM + _W - 1) / _W)
+	zzp := getNat(w)
+	zz := *zzp
+
+	const n = 4
+	// powers[i] contains x^i.
+	var powers [1 << n]*nat
+	for i := range powers {
+		powers[i] = getNat(w)
+	}
+	*powers[0] = powers[0].set(natOne)
+	*powers[1] = powers[1].trunc(x, logM)
+	for i := 2; i < 1<<n; i += 2 {
+		p2, p, p1 := powers[i/2], powers[i], powers[i+1]
+		*p = p.sqr(*p2)
+		*p = p.trunc(*p, logM)
+		*p1 = p1.mul(*p, x)
+		*p1 = p1.trunc(*p1, logM)
+	}
+
+	// Because phi(2**logM) = 2**(logM-1), x**(2**(logM-1)) = 1,
+	// so we can compute x**(y mod 2**(logM-1)) instead of x**y.
+	// That is, we can throw away all but the bottom logM-1 bits of y.
+	// Instead of allocating a new y, we start reading y at the right word
+	// and truncate it appropriately at the start of the loop.
+	i := len(y) - 1
+	mtop := int((logM - 2) / _W) // -2 because the top word of N bits is the (N-1)/W'th word.
+	mmask := ^Word(0)
+	if mbits := (logM - 1) & (_W - 1); mbits != 0 {
+		mmask = (1 << mbits) - 1
+	}
+	if i > mtop {
+		i = mtop
+	}
+	advance := false
+	z = z.setWord(1)
+	for ; i >= 0; i-- {
+		yi := y[i]
+		if i == mtop {
+			yi &= mmask
+		}
+		for j := 0; j < _W; j += n {
+			if advance {
+				// Account for use of 4 bits in previous iteration.
+				// Unrolled loop for significant performance
+				// gain. Use go test -bench=".*" in crypto/rsa
+				// to check performance before making changes.
+				zz = zz.sqr(z)
+				zz, z = z, zz
+				z = z.trunc(z, logM)
+
+				zz = zz.sqr(z)
+				zz, z = z, zz
+				z = z.trunc(z, logM)
+
+				zz = zz.sqr(z)
+				zz, z = z, zz
+				z = z.trunc(z, logM)
+
+				zz = zz.sqr(z)
+				zz, z = z, zz
+				z = z.trunc(z, logM)
+			}
+
+			zz = zz.mul(z, *powers[yi>>(_W-n)])
+			zz, z = z, zz
+			z = z.trunc(z, logM)
+
+			yi <<= n
+			advance = true
+		}
+	}
+
+	*zzp = zz
+	putNat(zzp)
+	for i := range powers {
+		putNat(powers[i])
+	}
+
+	return z.norm()
+}
+
+// expNNMontgomery calculates x**y mod m using a fixed, 4-bit window.
+// Uses Montgomery representation.
+func (z nat) expNNMontgomery(x, y, m nat) nat {
+	numWords := len(m)
+
+	// We want the lengths of x and m to be equal.
+	// It is OK if x >= m as long as len(x) == len(m).
+	if len(x) > numWords {
+		_, x = nat(nil).div(nil, x, m)
+		// Note: now len(x) <= numWords, not guaranteed ==.
+	}
+	if len(x) < numWords {
+		rr := make(nat, numWords)
+		copy(rr, x)
+		x = rr
+	}
+
+	// Ideally the precomputations would be performed outside, and reused
+	// k0 = -m**-1 mod 2**_W. Algorithm from: Dumas, J.G. "On Newton–Raphson
+	// Iteration for Multiplicative Inverses Modulo Prime Powers".
+	k0 := 2 - m[0]
+	t := m[0] - 1
+	for i := 1; i < _W; i <<= 1 {
+		t *= t
+		k0 *= (t + 1)
+	}
+	k0 = -k0
+
+	// RR = 2**(2*_W*len(m)) mod m
+	RR := nat(nil).setWord(1)
+	zz := nat(nil).shl(RR, uint(2*numWords*_W))
+	_, RR = nat(nil).div(RR, zz, m)
+	if len(RR) < numWords {
+		zz = zz.make(numWords)
+		copy(zz, RR)
+		RR = zz
+	}
+	// one = 1, with equal length to that of m
+	one := make(nat, numWords)
+	one[0] = 1
+
+	const n = 4
+	// powers[i] contains x^i
+	var powers [1 << n]nat
+	powers[0] = powers[0].montgomery(one, RR, m, k0, numWords)
+	powers[1] = powers[1].montgomery(x, RR, m, k0, numWords)
+	for i := 2; i < 1<<n; i++ {
+		powers[i] = powers[i].montgomery(powers[i-1], powers[1], m, k0, numWords)
+	}
+
+	// initialize z = 1 (Montgomery 1)
+	z = z.make(numWords)
+	copy(z, powers[0])
+
+	zz = zz.make(numWords)
+
+	// same windowed exponent, but with Montgomery multiplications
+	for i := len(y) - 1; i >= 0; i-- {
+		yi := y[i]
+		for j := 0; j < _W; j += n {
+			if i != len(y)-1 || j != 0 {
+				zz = zz.montgomery(z, z, m, k0, numWords)
+				z = z.montgomery(zz, zz, m, k0, numWords)
+				zz = zz.montgomery(z, z, m, k0, numWords)
+				z = z.montgomery(zz, zz, m, k0, numWords)
+			}
+			zz = zz.montgomery(z, powers[yi>>(_W-n)], m, k0, numWords)
+			z, zz = zz, z
+			yi <<= n
+		}
+	}
+	// convert to regular number
+	zz = zz.montgomery(z, one, m, k0, numWords)
+
+	// One last reduction, just in case.
+	// See golang.org/issue/13907.
+	if zz.cmp(m) >= 0 {
+		// Common case is m has high bit set; in that case,
+		// since zz is the same length as m, there can be just
+		// one multiple of m to remove. Just subtract.
+		// We think that the subtract should be sufficient in general,
+		// so do that unconditionally, but double-check,
+		// in case our beliefs are wrong.
+		// The div is not expected to be reached.
+		zz = zz.sub(zz, m)
+		if zz.cmp(m) >= 0 {
+			_, zz = nat(nil).div(nil, zz, m)
+		}
+	}
+
+	return zz.norm()
+}
+
+// bytes writes the value of z into buf using big-endian encoding.
+// The value of z is encoded in the slice buf[i:]. If the value of z
+// cannot be represented in buf, bytes panics. The number i of unused
+// bytes at the beginning of buf is returned as result.
+func (z nat) bytes(buf []byte) (i int) {
+	// This function is used in cryptographic operations. It must not leak
+	// anything but the Int's sign and bit size through side-channels. Any
+	// changes must be reviewed by a security expert.
+	i = len(buf)
+	for _, d := range z {
+		for j := 0; j < _S; j++ {
+			i--
+			if i >= 0 {
+				buf[i] = byte(d)
+			} else if byte(d) != 0 {
+				panic("math/big: buffer too small to fit value")
+			}
+			d >>= 8
+		}
+	}
+
+	if i < 0 {
+		i = 0
+	}
+	for i < len(buf) && buf[i] == 0 {
+		i++
+	}
+
+	return
+}
+
+// bigEndianWord returns the contents of buf interpreted as a big-endian encoded Word value.
+func bigEndianWord(buf []byte) Word {
+	if _W == 64 {
+		return Word(binary.BigEndian.Uint64(buf))
+	}
+	return Word(binary.BigEndian.Uint32(buf))
+}
+
+// setBytes interprets buf as the bytes of a big-endian unsigned
+// integer, sets z to that value, and returns z.
+func (z nat) setBytes(buf []byte) nat {
+	z = z.make((len(buf) + _S - 1) / _S)
+
+	i := len(buf)
+	for k := 0; i >= _S; k++ {
+		z[k] = bigEndianWord(buf[i-_S : i])
+		i -= _S
+	}
+	if i > 0 {
+		var d Word
+		for s := uint(0); i > 0; s += 8 {
+			d |= Word(buf[i-1]) << s
+			i--
+		}
+		z[len(z)-1] = d
+	}
+
+	return z.norm()
+}
+
+// sqrt sets z = ⌊√x⌋
+func (z nat) sqrt(x nat) nat {
+	if x.cmp(natOne) <= 0 {
+		return z.set(x)
+	}
+	if alias(z, x) {
+		z = nil
+	}
+
+	// Start with value known to be too large and repeat "z = ⌊(z + ⌊x/z⌋)/2⌋" until it stops getting smaller.
+	// See Brent and Zimmermann, Modern Computer Arithmetic, Algorithm 1.13 (SqrtInt).
+	// https://members.loria.fr/PZimmermann/mca/pub226.html
+	// If x is one less than a perfect square, the sequence oscillates between the correct z and z+1;
+	// otherwise it converges to the correct z and stays there.
+	var z1, z2 nat
+	z1 = z
+	z1 = z1.setUint64(1)
+	z1 = z1.shl(z1, uint(x.bitLen()+1)/2) // must be ≥ √x
+	for n := 0; ; n++ {
+		z2, _ = z2.div(nil, x, z1)
+		z2 = z2.add(z2, z1)
+		z2 = z2.shr(z2, 1)
+		if z2.cmp(z1) >= 0 {
+			// z1 is answer.
+			// Figure out whether z1 or z2 is currently aliased to z by looking at loop count.
+			if n&1 == 0 {
+				return z1
+			}
+			return z.set(z1)
+		}
+		z1, z2 = z2, z1
+	}
+}
+
+// subMod2N returns z = (x - y) mod 2ⁿ.
+func (z nat) subMod2N(x, y nat, n uint) nat {
+	if uint(x.bitLen()) > n {
+		if alias(z, x) {
+			// ok to overwrite x in place
+			x = x.trunc(x, n)
+		} else {
+			x = nat(nil).trunc(x, n)
+		}
+	}
+	if uint(y.bitLen()) > n {
+		if alias(z, y) {
+			// ok to overwrite y in place
+			y = y.trunc(y, n)
+		} else {
+			y = nat(nil).trunc(y, n)
+		}
+	}
+	if x.cmp(y) >= 0 {
+		return z.sub(x, y)
+	}
+	// x - y < 0; x - y mod 2ⁿ = x - y + 2ⁿ = 2ⁿ - (y - x) = 1 + 2ⁿ-1 - (y - x) = 1 + ^(y - x).
+	z = z.sub(y, x)
+	for uint(len(z))*_W < n {
+		z = append(z, 0)
+	}
+	for i := range z {
+		z[i] = ^z[i]
+	}
+	z = z.trunc(z, n)
+	return z.add(z, natOne)
+}
diff --git a/src/math/big/nat_test.go b/src/math/big/nat_test.go
new file mode 100644
index 0000000..b84a7be
--- /dev/null
+++ b/src/math/big/nat_test.go
@@ -0,0 +1,886 @@
+// Copyright 2009 The Go Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+package big
+
+import (
+	"fmt"
+	"runtime"
+	"strings"
+	"testing"
+)
+
+var cmpTests = []struct {
+	x, y nat
+	r    int
+}{
+	{nil, nil, 0},
+	{nil, nat(nil), 0},
+	{nat(nil), nil, 0},
+	{nat(nil), nat(nil), 0},
+	{nat{0}, nat{0}, 0},
+	{nat{0}, nat{1}, -1},
+	{nat{1}, nat{0}, 1},
+	{nat{1}, nat{1}, 0},
+	{nat{0, _M}, nat{1}, 1},
+	{nat{1}, nat{0, _M}, -1},
+	{nat{1, _M}, nat{0, _M}, 1},
+	{nat{0, _M}, nat{1, _M}, -1},
+	{nat{16, 571956, 8794, 68}, nat{837, 9146, 1, 754489}, -1},
+	{nat{34986, 41, 105, 1957}, nat{56, 7458, 104, 1957}, 1},
+}
+
+func TestCmp(t *testing.T) {
+	for i, a := range cmpTests {
+		r := a.x.cmp(a.y)
+		if r != a.r {
+			t.Errorf("#%d got r = %v; want %v", i, r, a.r)
+		}
+	}
+}
+
+type funNN func(z, x, y nat) nat
+type argNN struct {
+	z, x, y nat
+}
+
+var sumNN = []argNN{
+	{},
+	{nat{1}, nil, nat{1}},
+	{nat{1111111110}, nat{123456789}, nat{987654321}},
+	{nat{0, 0, 0, 1}, nil, nat{0, 0, 0, 1}},
+	{nat{0, 0, 0, 1111111110}, nat{0, 0, 0, 123456789}, nat{0, 0, 0, 987654321}},
+	{nat{0, 0, 0, 1}, nat{0, 0, _M}, nat{0, 0, 1}},
+}
+
+var prodNN = []argNN{
+	{},
+	{nil, nil, nil},
+	{nil, nat{991}, nil},
+	{nat{991}, nat{991}, nat{1}},
+	{nat{991 * 991}, nat{991}, nat{991}},
+	{nat{0, 0, 991 * 991}, nat{0, 991}, nat{0, 991}},
+	{nat{1 * 991, 2 * 991, 3 * 991, 4 * 991}, nat{1, 2, 3, 4}, nat{991}},
+	{nat{4, 11, 20, 30, 20, 11, 4}, nat{1, 2, 3, 4}, nat{4, 3, 2, 1}},
+	// 3^100 * 3^28 = 3^128
+	{
+		natFromString("11790184577738583171520872861412518665678211592275841109096961"),
+		natFromString("515377520732011331036461129765621272702107522001"),
+		natFromString("22876792454961"),
+	},
+	// z = 111....1 (70000 digits)
+	// x = 10^(99*700) + ... + 10^1400 + 10^700 + 1
+	// y = 111....1 (700 digits, larger than Karatsuba threshold on 32-bit and 64-bit)
+	{
+		natFromString(strings.Repeat("1", 70000)),
+		natFromString("1" + strings.Repeat(strings.Repeat("0", 699)+"1", 99)),
+		natFromString(strings.Repeat("1", 700)),
+	},
+	// z = 111....1 (20000 digits)
+	// x = 10^10000 + 1
+	// y = 111....1 (10000 digits)
+	{
+		natFromString(strings.Repeat("1", 20000)),
+		natFromString("1" + strings.Repeat("0", 9999) + "1"),
+		natFromString(strings.Repeat("1", 10000)),
+	},
+}
+
+func natFromString(s string) nat {
+	x, _, _, err := nat(nil).scan(strings.NewReader(s), 0, false)
+	if err != nil {
+		panic(err)
+	}
+	return x
+}
+
+func TestSet(t *testing.T) {
+	for _, a := range sumNN {
+		z := nat(nil).set(a.z)
+		if z.cmp(a.z) != 0 {
+			t.Errorf("got z = %v; want %v", z, a.z)
+		}
+	}
+}
+
+func testFunNN(t *testing.T, msg string, f funNN, a argNN) {
+	z := f(nil, a.x, a.y)
+	if z.cmp(a.z) != 0 {
+		t.Errorf("%s%+v\n\tgot z = %v; want %v", msg, a, z, a.z)
+	}
+}
+
+func TestFunNN(t *testing.T) {
+	for _, a := range sumNN {
+		arg := a
+		testFunNN(t, "add", nat.add, arg)
+
+		arg = argNN{a.z, a.y, a.x}
+		testFunNN(t, "add symmetric", nat.add, arg)
+
+		arg = argNN{a.x, a.z, a.y}
+		testFunNN(t, "sub", nat.sub, arg)
+
+		arg = argNN{a.y, a.z, a.x}
+		testFunNN(t, "sub symmetric", nat.sub, arg)
+	}
+
+	for _, a := range prodNN {
+		arg := a
+		testFunNN(t, "mul", nat.mul, arg)
+
+		arg = argNN{a.z, a.y, a.x}
+		testFunNN(t, "mul symmetric", nat.mul, arg)
+	}
+}
+
+var mulRangesN = []struct {
+	a, b uint64
+	prod string
+}{
+	{0, 0, "0"},
+	{1, 1, "1"},
+	{1, 2, "2"},
+	{1, 3, "6"},
+	{10, 10, "10"},
+	{0, 100, "0"},
+	{0, 1e9, "0"},
+	{1, 0, "1"},                    // empty range
+	{100, 1, "1"},                  // empty range
+	{1, 10, "3628800"},             // 10!
+	{1, 20, "2432902008176640000"}, // 20!
+	{1, 100,
+		"933262154439441526816992388562667004907159682643816214685929" +
+			"638952175999932299156089414639761565182862536979208272237582" +
+			"51185210916864000000000000000000000000", // 100!
+	},
+}
+
+func TestMulRangeN(t *testing.T) {
+	for i, r := range mulRangesN {
+		prod := string(nat(nil).mulRange(r.a, r.b).utoa(10))
+		if prod != r.prod {
+			t.Errorf("#%d: got %s; want %s", i, prod, r.prod)
+		}
+	}
+}
+
+// allocBytes returns the number of bytes allocated by invoking f.
+func allocBytes(f func()) uint64 {
+	var stats runtime.MemStats
+	runtime.ReadMemStats(&stats)
+	t := stats.TotalAlloc
+	f()
+	runtime.ReadMemStats(&stats)
+	return stats.TotalAlloc - t
+}
+
+// TestMulUnbalanced tests that multiplying numbers of different lengths
+// does not cause deep recursion and in turn allocate too much memory.
+// Test case for issue 3807.
+func TestMulUnbalanced(t *testing.T) {
+	defer runtime.GOMAXPROCS(runtime.GOMAXPROCS(1))
+	x := rndNat(50000)
+	y := rndNat(40)
+	allocSize := allocBytes(func() {
+		nat(nil).mul(x, y)
+	})
+	inputSize := uint64(len(x)+len(y)) * _S
+	if ratio := allocSize / uint64(inputSize); ratio > 10 {
+		t.Errorf("multiplication uses too much memory (%d > %d times the size of inputs)", allocSize, ratio)
+	}
+}
+
+// rndNat returns a random nat value >= 0 of (usually) n words in length.
+// In extremely unlikely cases it may be smaller than n words if the top-
+// most words are 0.
+func rndNat(n int) nat {
+	return nat(rndV(n)).norm()
+}
+
+// rndNat1 is like rndNat but the result is guaranteed to be > 0.
+func rndNat1(n int) nat {
+	x := nat(rndV(n)).norm()
+	if len(x) == 0 {
+		x.setWord(1)
+	}
+	return x
+}
+
+func BenchmarkMul(b *testing.B) {
+	mulx := rndNat(1e4)
+	muly := rndNat(1e4)
+	b.ResetTimer()
+	for i := 0; i < b.N; i++ {
+		var z nat
+		z.mul(mulx, muly)
+	}
+}
+
+func benchmarkNatMul(b *testing.B, nwords int) {
+	x := rndNat(nwords)
+	y := rndNat(nwords)
+	var z nat
+	b.ResetTimer()
+	for i := 0; i < b.N; i++ {
+		z.mul(x, y)
+	}
+}
+
+var mulBenchSizes = []int{10, 100, 1000, 10000, 100000}
+
+func BenchmarkNatMul(b *testing.B) {
+	for _, n := range mulBenchSizes {
+		if isRaceBuilder && n > 1e3 {
+			continue
+		}
+		b.Run(fmt.Sprintf("%d", n), func(b *testing.B) {
+			benchmarkNatMul(b, n)
+		})
+	}
+}
+
+func TestNLZ(t *testing.T) {
+	var x Word = _B >> 1
+	for i := 0; i <= _W; i++ {
+		if int(nlz(x)) != i {
+			t.Errorf("failed at %x: got %d want %d", x, nlz(x), i)
+		}
+		x >>= 1
+	}
+}
+
+type shiftTest struct {
+	in    nat
+	shift uint
+	out   nat
+}
+
+var leftShiftTests = []shiftTest{
+	{nil, 0, nil},
+	{nil, 1, nil},
+	{natOne, 0, natOne},
+	{natOne, 1, natTwo},
+	{nat{1 << (_W - 1)}, 1, nat{0}},
+	{nat{1 << (_W - 1), 0}, 1, nat{0, 1}},
+}
+
+func TestShiftLeft(t *testing.T) {
+	for i, test := range leftShiftTests {
+		var z nat
+		z = z.shl(test.in, test.shift)
+		for j, d := range test.out {
+			if j >= len(z) || z[j] != d {
+				t.Errorf("#%d: got: %v want: %v", i, z, test.out)
+				break
+			}
+		}
+	}
+}
+
+var rightShiftTests = []shiftTest{
+	{nil, 0, nil},
+	{nil, 1, nil},
+	{natOne, 0, natOne},
+	{natOne, 1, nil},
+	{natTwo, 1, natOne},
+	{nat{0, 1}, 1, nat{1 << (_W - 1)}},
+	{nat{2, 1, 1}, 1, nat{1<<(_W-1) + 1, 1 << (_W - 1)}},
+}
+
+func TestShiftRight(t *testing.T) {
+	for i, test := range rightShiftTests {
+		var z nat
+		z = z.shr(test.in, test.shift)
+		for j, d := range test.out {
+			if j >= len(z) || z[j] != d {
+				t.Errorf("#%d: got: %v want: %v", i, z, test.out)
+				break
+			}
+		}
+	}
+}
+
+func BenchmarkZeroShifts(b *testing.B) {
+	x := rndNat(800)
+
+	b.Run("Shl", func(b *testing.B) {
+		for i := 0; i < b.N; i++ {
+			var z nat
+			z.shl(x, 0)
+		}
+	})
+	b.Run("ShlSame", func(b *testing.B) {
+		for i := 0; i < b.N; i++ {
+			x.shl(x, 0)
+		}
+	})
+
+	b.Run("Shr", func(b *testing.B) {
+		for i := 0; i < b.N; i++ {
+			var z nat
+			z.shr(x, 0)
+		}
+	})
+	b.Run("ShrSame", func(b *testing.B) {
+		for i := 0; i < b.N; i++ {
+			x.shr(x, 0)
+		}
+	})
+}
+
+type modWTest struct {
+	in       string
+	dividend string
+	out      string
+}
+
+var modWTests32 = []modWTest{
+	{"23492635982634928349238759823742", "252341", "220170"},
+}
+
+var modWTests64 = []modWTest{
+	{"6527895462947293856291561095690465243862946", "524326975699234", "375066989628668"},
+}
+
+func runModWTests(t *testing.T, tests []modWTest) {
+	for i, test := range tests {
+		in, _ := new(Int).SetString(test.in, 10)
+		d, _ := new(Int).SetString(test.dividend, 10)
+		out, _ := new(Int).SetString(test.out, 10)
+
+		r := in.abs.modW(d.abs[0])
+		if r != out.abs[0] {
+			t.Errorf("#%d failed: got %d want %s", i, r, out)
+		}
+	}
+}
+
+func TestModW(t *testing.T) {
+	if _W >= 32 {
+		runModWTests(t, modWTests32)
+	}
+	if _W >= 64 {
+		runModWTests(t, modWTests64)
+	}
+}
+
+var montgomeryTests = []struct {
+	x, y, m      string
+	k0           uint64
+	out32, out64 string
+}{
+	{
+		"0xffffffffffffffffffffffffffffffffffffffffffffffffe",
+		"0xffffffffffffffffffffffffffffffffffffffffffffffffe",
+		"0xfffffffffffffffffffffffffffffffffffffffffffffffff",
+		1,
+		"0x1000000000000000000000000000000000000000000",
+		"0x10000000000000000000000000000000000",
+	},
+	{
+		"0x000000000ffffff5",
+		"0x000000000ffffff0",
+		"0x0000000010000001",
+		0xff0000000fffffff,
+		"0x000000000bfffff4",
+		"0x0000000003400001",
+	},
+	{
+		"0x0000000080000000",
+		"0x00000000ffffffff",
+		"0x1000000000000001",
+		0xfffffffffffffff,
+		"0x0800000008000001",
+		"0x0800000008000001",
+	},
+	{
+		"0x0000000080000000",
+		"0x0000000080000000",
+		"0xffffffff00000001",
+		0xfffffffeffffffff,
+		"0xbfffffff40000001",
+		"0xbfffffff40000001",
+	},
+	{
+		"0x0000000080000000",
+		"0x0000000080000000",
+		"0x00ffffff00000001",
+		0xfffffeffffffff,
+		"0xbfffff40000001",
+		"0xbfffff40000001",
+	},
+	{
+		"0x0000000080000000",
+		"0x0000000080000000",
+		"0x0000ffff00000001",
+		0xfffeffffffff,
+		"0xbfff40000001",
+		"0xbfff40000001",
+	},
+	{
+		"0x3321ffffffffffffffffffffffffffff00000000000022222623333333332bbbb888c0",
+		"0x3321ffffffffffffffffffffffffffff00000000000022222623333333332bbbb888c0",
+		"0x33377fffffffffffffffffffffffffffffffffffffffffffff0000000000022222eee1",
+		0xdecc8f1249812adf,
+		"0x04eb0e11d72329dc0915f86784820fc403275bf2f6620a20e0dd344c5cd0875e50deb5",
+		"0x0d7144739a7d8e11d72329dc0915f86784820fc403275bf2f61ed96f35dd34dbb3d6a0",
+	},
+	{
+		"0x10000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000ffffffffffffffffffffffffffffffff00000000000022222223333333333444444444",
+		"0x10000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000ffffffffffffffffffffffffffffffff999999999999999aaabbbbbbbbcccccccccccc",
+		"0xffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffff33377fffffffffffffffffffffffffffffffffffffffffffff0000000000022222eee1",
+		0xdecc8f1249812adf,
+		"0x5c0d52f451aec609b15da8e5e5626c4eaa88723bdeac9d25ca9b961269400410ca208a16af9c2fb07d7a11c7772cba02c22f9711078d51a3797eb18e691295293284d988e349fa6deba46b25a4ecd9f715",
+		"0x92fcad4b5c0d52f451aec609b15da8e5e5626c4eaa88723bdeac9d25ca9b961269400410ca208a16af9c2fb07d799c32fe2f3cc5422f9711078d51a3797eb18e691295293284d8f5e69caf6decddfe1df6",
+	},
+}
+
+func TestMontgomery(t *testing.T) {
+	one := NewInt(1)
+	_B := new(Int).Lsh(one, _W)
+	for i, test := range montgomeryTests {
+		x := natFromString(test.x)
+		y := natFromString(test.y)
+		m := natFromString(test.m)
+		for len(x) < len(m) {
+			x = append(x, 0)
+		}
+		for len(y) < len(m) {
+			y = append(y, 0)
+		}
+
+		if x.cmp(m) > 0 {
+			_, r := nat(nil).div(nil, x, m)
+			t.Errorf("#%d: x > m (0x%s > 0x%s; use 0x%s)", i, x.utoa(16), m.utoa(16), r.utoa(16))
+		}
+		if y.cmp(m) > 0 {
+			_, r := nat(nil).div(nil, x, m)
+			t.Errorf("#%d: y > m (0x%s > 0x%s; use 0x%s)", i, y.utoa(16), m.utoa(16), r.utoa(16))
+		}
+
+		var out nat
+		if _W == 32 {
+			out = natFromString(test.out32)
+		} else {
+			out = natFromString(test.out64)
+		}
+
+		// t.Logf("#%d: len=%d\n", i, len(m))
+
+		// check output in table
+		xi := &Int{abs: x}
+		yi := &Int{abs: y}
+		mi := &Int{abs: m}
+		p := new(Int).Mod(new(Int).Mul(xi, new(Int).Mul(yi, new(Int).ModInverse(new(Int).Lsh(one, uint(len(m))*_W), mi))), mi)
+		if out.cmp(p.abs.norm()) != 0 {
+			t.Errorf("#%d: out in table=0x%s, computed=0x%s", i, out.utoa(16), p.abs.norm().utoa(16))
+		}
+
+		// check k0 in table
+		k := new(Int).Mod(&Int{abs: m}, _B)
+		k = new(Int).Sub(_B, k)
+		k = new(Int).Mod(k, _B)
+		k0 := Word(new(Int).ModInverse(k, _B).Uint64())
+		if k0 != Word(test.k0) {
+			t.Errorf("#%d: k0 in table=%#x, computed=%#x\n", i, test.k0, k0)
+		}
+
+		// check montgomery with correct k0 produces correct output
+		z := nat(nil).montgomery(x, y, m, k0, len(m))
+		z = z.norm()
+		if z.cmp(out) != 0 {
+			t.Errorf("#%d: got 0x%s want 0x%s", i, z.utoa(16), out.utoa(16))
+		}
+	}
+}
+
+var expNNTests = []struct {
+	x, y, m string
+	out     string
+}{
+	{"0", "0", "0", "1"},
+	{"0", "0", "1", "0"},
+	{"1", "1", "1", "0"},
+	{"2", "1", "1", "0"},
+	{"2", "2", "1", "0"},
+	{"10", "100000000000", "1", "0"},
+	{"0x8000000000000000", "2", "", "0x40000000000000000000000000000000"},
+	{"0x8000000000000000", "2", "6719", "4944"},
+	{"0x8000000000000000", "3", "6719", "5447"},
+	{"0x8000000000000000", "1000", "6719", "1603"},
+	{"0x8000000000000000", "1000000", "6719", "3199"},
+	{
+		"2938462938472983472983659726349017249287491026512746239764525612965293865296239471239874193284792387498274256129746192347",
+		"298472983472983471903246121093472394872319615612417471234712061",
+		"29834729834729834729347290846729561262544958723956495615629569234729836259263598127342374289365912465901365498236492183464",
+		"23537740700184054162508175125554701713153216681790245129157191391322321508055833908509185839069455749219131480588829346291",
+	},
+	{
+		"11521922904531591643048817447554701904414021819823889996244743037378330903763518501116638828335352811871131385129455853417360623007349090150042001944696604737499160174391019030572483602867266711107136838523916077674888297896995042968746762200926853379",
+		"426343618817810911523",
+		"444747819283133684179",
+		"42",
+	},
+	{"375", "249", "388", "175"},
+	{"375", "18446744073709551801", "388", "175"},
+	{"0", "0x40000000000000", "0x200", "0"},
+	{"0xeffffff900002f00", "0x40000000000000", "0x200", "0"},
+	{"5", "1435700818", "72", "49"},
+	{"0xffff", "0x300030003000300030003000300030003000302a3000300030003000300030003000300030003000300030003000300030003030623066307f3030783062303430383064303630343036", "0x300000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000", "0xa3f94c08b0b90e87af637cacc9383f7ea032352b8961fc036a52b659b6c9b33491b335ffd74c927f64ddd62cfca0001"},
+}
+
+func TestExpNN(t *testing.T) {
+	for i, test := range expNNTests {
+		x := natFromString(test.x)
+		y := natFromString(test.y)
+		out := natFromString(test.out)
+
+		var m nat
+		if len(test.m) > 0 {
+			m = natFromString(test.m)
+		}
+
+		z := nat(nil).expNN(x, y, m, false)
+		if z.cmp(out) != 0 {
+			t.Errorf("#%d got %s want %s", i, z.utoa(10), out.utoa(10))
+		}
+	}
+}
+
+func FuzzExpMont(f *testing.F) {
+	f.Fuzz(func(t *testing.T, x1, x2, x3, y1, y2, y3, m1, m2, m3 uint) {
+		if m1 == 0 && m2 == 0 && m3 == 0 {
+			return
+		}
+		x := new(Int).SetBits([]Word{Word(x1), Word(x2), Word(x3)})
+		y := new(Int).SetBits([]Word{Word(y1), Word(y2), Word(y3)})
+		m := new(Int).SetBits([]Word{Word(m1), Word(m2), Word(m3)})
+		out := new(Int).Exp(x, y, m)
+		want := new(Int).expSlow(x, y, m)
+		if out.Cmp(want) != 0 {
+			t.Errorf("x = %#x\ny=%#x\nz=%#x\nout=%#x\nwant=%#x\ndc: 16o 16i %X %X %X |p", x, y, m, out, want, x, y, m)
+		}
+	})
+}
+
+func BenchmarkExp3Power(b *testing.B) {
+	const x = 3
+	for _, y := range []Word{
+		0x10, 0x40, 0x100, 0x400, 0x1000, 0x4000, 0x10000, 0x40000, 0x100000, 0x400000,
+	} {
+		b.Run(fmt.Sprintf("%#x", y), func(b *testing.B) {
+			var z nat
+			for i := 0; i < b.N; i++ {
+				z.expWW(x, y)
+			}
+		})
+	}
+}
+
+func fibo(n int) nat {
+	switch n {
+	case 0:
+		return nil
+	case 1:
+		return nat{1}
+	}
+	f0 := fibo(0)
+	f1 := fibo(1)
+	var f2 nat
+	for i := 1; i < n; i++ {
+		f2 = f2.add(f0, f1)
+		f0, f1, f2 = f1, f2, f0
+	}
+	return f1
+}
+
+var fiboNums = []string{
+	"0",
+	"55",
+	"6765",
+	"832040",
+	"102334155",
+	"12586269025",
+	"1548008755920",
+	"190392490709135",
+	"23416728348467685",
+	"2880067194370816120",
+	"354224848179261915075",
+}
+
+func TestFibo(t *testing.T) {
+	for i, want := range fiboNums {
+		n := i * 10
+		got := string(fibo(n).utoa(10))
+		if got != want {
+			t.Errorf("fibo(%d) failed: got %s want %s", n, got, want)
+		}
+	}
+}
+
+func BenchmarkFibo(b *testing.B) {
+	for i := 0; i < b.N; i++ {
+		fibo(1e0)
+		fibo(1e1)
+		fibo(1e2)
+		fibo(1e3)
+		fibo(1e4)
+		fibo(1e5)
+	}
+}
+
+var bitTests = []struct {
+	x    string
+	i    uint
+	want uint
+}{
+	{"0", 0, 0},
+	{"0", 1, 0},
+	{"0", 1000, 0},
+
+	{"0x1", 0, 1},
+	{"0x10", 0, 0},
+	{"0x10", 3, 0},
+	{"0x10", 4, 1},
+	{"0x10", 5, 0},
+
+	{"0x8000000000000000", 62, 0},
+	{"0x8000000000000000", 63, 1},
+	{"0x8000000000000000", 64, 0},
+
+	{"0x3" + strings.Repeat("0", 32), 127, 0},
+	{"0x3" + strings.Repeat("0", 32), 128, 1},
+	{"0x3" + strings.Repeat("0", 32), 129, 1},
+	{"0x3" + strings.Repeat("0", 32), 130, 0},
+}
+
+func TestBit(t *testing.T) {
+	for i, test := range bitTests {
+		x := natFromString(test.x)
+		if got := x.bit(test.i); got != test.want {
+			t.Errorf("#%d: %s.bit(%d) = %v; want %v", i, test.x, test.i, got, test.want)
+		}
+	}
+}
+
+var stickyTests = []struct {
+	x    string
+	i    uint
+	want uint
+}{
+	{"0", 0, 0},
+	{"0", 1, 0},
+	{"0", 1000, 0},
+
+	{"0x1", 0, 0},
+	{"0x1", 1, 1},
+
+	{"0x1350", 0, 0},
+	{"0x1350", 4, 0},
+	{"0x1350", 5, 1},
+
+	{"0x8000000000000000", 63, 0},
+	{"0x8000000000000000", 64, 1},
+
+	{"0x1" + strings.Repeat("0", 100), 400, 0},
+	{"0x1" + strings.Repeat("0", 100), 401, 1},
+}
+
+func TestSticky(t *testing.T) {
+	for i, test := range stickyTests {
+		x := natFromString(test.x)
+		if got := x.sticky(test.i); got != test.want {
+			t.Errorf("#%d: %s.sticky(%d) = %v; want %v", i, test.x, test.i, got, test.want)
+		}
+		if test.want == 1 {
+			// all subsequent i's should also return 1
+			for d := uint(1); d <= 3; d++ {
+				if got := x.sticky(test.i + d); got != 1 {
+					t.Errorf("#%d: %s.sticky(%d) = %v; want %v", i, test.x, test.i+d, got, 1)
+				}
+			}
+		}
+	}
+}
+
+func testSqr(t *testing.T, x nat) {
+	got := make(nat, 2*len(x))
+	want := make(nat, 2*len(x))
+	got = got.sqr(x)
+	want = want.mul(x, x)
+	if got.cmp(want) != 0 {
+		t.Errorf("basicSqr(%v), got %v, want %v", x, got, want)
+	}
+}
+
+func TestSqr(t *testing.T) {
+	for _, a := range prodNN {
+		if a.x != nil {
+			testSqr(t, a.x)
+		}
+		if a.y != nil {
+			testSqr(t, a.y)
+		}
+		if a.z != nil {
+			testSqr(t, a.z)
+		}
+	}
+}
+
+func benchmarkNatSqr(b *testing.B, nwords int) {
+	x := rndNat(nwords)
+	var z nat
+	b.ResetTimer()
+	for i := 0; i < b.N; i++ {
+		z.sqr(x)
+	}
+}
+
+var sqrBenchSizes = []int{
+	1, 2, 3, 5, 8, 10, 20, 30, 50, 80,
+	100, 200, 300, 500, 800,
+	1000, 10000, 100000,
+}
+
+func BenchmarkNatSqr(b *testing.B) {
+	for _, n := range sqrBenchSizes {
+		if isRaceBuilder && n > 1e3 {
+			continue
+		}
+		b.Run(fmt.Sprintf("%d", n), func(b *testing.B) {
+			benchmarkNatSqr(b, n)
+		})
+	}
+}
+
+var subMod2NTests = []struct {
+	x string
+	y string
+	n uint
+	z string
+}{
+	{"1", "2", 0, "0"},
+	{"1", "0", 1, "1"},
+	{"0", "1", 1, "1"},
+	{"3", "5", 3, "6"},
+	{"5", "3", 3, "2"},
+	// 2^65, 2^66-1, 2^65 - (2^66-1) + 2^67
+	{"36893488147419103232", "73786976294838206463", 67, "110680464442257309697"},
+	// 2^66-1, 2^65, 2^65-1
+	{"73786976294838206463", "36893488147419103232", 67, "36893488147419103231"},
+}
+
+func TestNatSubMod2N(t *testing.T) {
+	for _, mode := range []string{"noalias", "aliasX", "aliasY"} {
+		t.Run(mode, func(t *testing.T) {
+			for _, tt := range subMod2NTests {
+				x0 := natFromString(tt.x)
+				y0 := natFromString(tt.y)
+				want := natFromString(tt.z)
+				x := nat(nil).set(x0)
+				y := nat(nil).set(y0)
+				var z nat
+				switch mode {
+				case "aliasX":
+					z = x
+				case "aliasY":
+					z = y
+				}
+				z = z.subMod2N(x, y, tt.n)
+				if z.cmp(want) != 0 {
+					t.Fatalf("subMod2N(%d, %d, %d) = %d, want %d", x0, y0, tt.n, z, want)
+				}
+				if mode != "aliasX" && x.cmp(x0) != 0 {
+					t.Fatalf("subMod2N(%d, %d, %d) modified x", x0, y0, tt.n)
+				}
+				if mode != "aliasY" && y.cmp(y0) != 0 {
+					t.Fatalf("subMod2N(%d, %d, %d) modified y", x0, y0, tt.n)
+				}
+			}
+		})
+	}
+}
+
+func BenchmarkNatSetBytes(b *testing.B) {
+	const maxLength = 128
+	lengths := []int{
+		// No remainder:
+		8, 24, maxLength,
+		// With remainder:
+		7, 23, maxLength - 1,
+	}
+	n := make(nat, maxLength/_W) // ensure n doesn't need to grow during the test
+	buf := make([]byte, maxLength)
+	for _, l := range lengths {
+		b.Run(fmt.Sprint(l), func(b *testing.B) {
+			for i := 0; i < b.N; i++ {
+				n.setBytes(buf[:l])
+			}
+		})
+	}
+}
+
+func TestNatDiv(t *testing.T) {
+	sizes := []int{
+		1, 2, 5, 8, 15, 25, 40, 65, 100,
+		200, 500, 800, 1500, 2500, 4000, 6500, 10000,
+	}
+	for _, i := range sizes {
+		for _, j := range sizes {
+			a := rndNat1(i)
+			b := rndNat1(j)
+			// the test requires b >= 2
+			if len(b) == 1 && b[0] == 1 {
+				b[0] = 2
+			}
+			// choose a remainder c < b
+			c := rndNat1(len(b))
+			if len(c) == len(b) && c[len(c)-1] >= b[len(b)-1] {
+				c[len(c)-1] = 0
+				c = c.norm()
+			}
+			// compute x = a*b+c
+			x := nat(nil).mul(a, b)
+			x = x.add(x, c)
+
+			var q, r nat
+			q, r = q.div(r, x, b)
+			if q.cmp(a) != 0 {
+				t.Fatalf("wrong quotient: got %s; want %s for %s/%s", q.utoa(10), a.utoa(10), x.utoa(10), b.utoa(10))
+			}
+			if r.cmp(c) != 0 {
+				t.Fatalf("wrong remainder: got %s; want %s for %s/%s", r.utoa(10), c.utoa(10), x.utoa(10), b.utoa(10))
+			}
+		}
+	}
+}
+
+// TestIssue37499 triggers the edge case of divBasic where
+// the inaccurate estimate of the first word's quotient
+// happens at the very beginning of the loop.
+func TestIssue37499(t *testing.T) {
+	// Choose u and v such that v is slightly larger than u >> N.
+	// This tricks divBasic into choosing 1 as the first word
+	// of the quotient. This works in both 32-bit and 64-bit settings.
+	u := natFromString("0x2b6c385a05be027f5c22005b63c42a1165b79ff510e1706b39f8489c1d28e57bb5ba4ef9fd9387a3e344402c0a453381")
+	v := natFromString("0x2b6c385a05be027f5c22005b63c42a1165b79ff510e1706c")
+
+	q := nat(nil).make(8)
+	q.divBasic(u, v)
+	q = q.norm()
+	if s := string(q.utoa(16)); s != "fffffffffffffffffffffffffffffffffffffffffffffffb" {
+		t.Fatalf("incorrect quotient: %s", s)
+	}
+}
+
+// TestIssue42552 triggers an edge case of recursive division
+// where the first division loop is never entered, and correcting
+// the remainder takes exactly two iterations in the final loop.
+func TestIssue42552(t *testing.T) {
+	u := natFromString("0xc23b166884c3869092a520eceedeced2b00847bd256c9cf3b2c5e2227c15bd5e6ee7ef8a2f49236ad0eedf2c8a3b453cf6e0706f64285c526b372c4b1321245519d430540804a50b7ca8b6f1b34a2ec05cdbc24de7599af112d3e3c8db347e8799fe70f16e43c6566ba3aeb169463a3ecc486172deb2d9b80a3699c776e44fef20036bd946f1b4d054dd88a2c1aeb986199b0b2b7e58c42288824b74934d112fe1fc06e06b4d99fe1c5e725946b23210521e209cd507cce90b5f39a523f27e861f9e232aee50c3f585208b4573dcc0b897b6177f2ba20254fd5c50a033e849dee1b3a93bd2dc44ba8ca836cab2c2ae50e50b126284524fa0187af28628ff0face68d87709200329db1392852c8b8963fbe3d05fb1efe19f0ed5ca9fadc2f96f82187c24bb2512b2e85a66333a7e176605695211e1c8e0b9b9e82813e50654964945b1e1e66a90840396c7d10e23e47f364d2d3f660fa54598e18d1ca2ea4fe4f35a40a11f69f201c80b48eaee3e2e9b0eda63decf92bec08a70f731587d4ed0f218d5929285c8b2ccbc497e20db42de73885191fa453350335990184d8df805072f958d5354debda38f5421effaaafd6cb9b721ace74be0892d77679f62a4a126697cd35797f6858193da4ba1770c06aea2e5c59ec04b8ea26749e61b72ecdde403f3bc7e5e546cd799578cc939fa676dfd5e648576d4a06cbadb028adc2c0b461f145b2321f42e5e0f3b4fb898ecd461df07a6f5154067787bf74b5cc5c03704a1ce47494961931f0263b0aac32505102595957531a2de69dd71aac51f8a49902f81f21283dbe8e21e01e5d82517868826f86acf338d935aa6b4d5a25c8d540389b277dd9d64569d68baf0f71bd03dba45b92a7fc052601d1bd011a2fc6790a23f97c6fa5caeea040ab86841f268d39ce4f7caf01069df78bba098e04366492f0c2ac24f1bf16828752765fa523c9a4d42b71109d123e6be8c7b1ab3ccf8ea03404075fe1a9596f1bba1d267f9a7879ceece514818316c9c0583469d2367831fc42b517ea028a28df7c18d783d16ea2436cee2b15d52db68b5dfdee6b4d26f0905f9b030c911a04d078923a4136afea96eed6874462a482917353264cc9bee298f167ac65a6db4e4eda88044b39cc0b33183843eaa946564a00c3a0ab661f2c915e70bf0bb65bfbb6fa2eea20aed16bf2c1a1d00ec55fb4ff2f76b8e462ea70c19efa579c9ee78194b86708fdae66a9ce6e2cf3d366037798cfb50277ba6d2fd4866361022fd788ab7735b40b8b61d55e32243e06719e53992e9ac16c9c4b6e6933635c3c47c8f7e73e17dd54d0dd8aeba5d76de46894e7b3f9d3ec25ad78ee82297ba69905ea0fa094b8667faa2b8885e2187b3da80268aa1164761d7b0d6de206b676777348152b8ae1d4afed753bc63c739a5ca8ce7afb2b241a226bd9e502baba391b5b13f5054f070b65a9cf3a67063bfaa803ba390732cd03888f664023f888741d04d564e0b5674b0a183ace81452001b3fbb4214c77d42ca75376742c471e58f67307726d56a1032bd236610cbcbcd03d0d7a452900136897dc55bb3ce959d10d4e6a10fb635006bd8c41cd9ded2d3dfdd8f2e229590324a7370cb2124210b2330f4c56155caa09a2564932ceded8d92c79664dcdeb87faad7d3da006cc2ea267ee3df41e9677789cc5a8cc3b83add6491561b3047919e0648b1b2e97d7ad6f6c2aa80cab8e9ae10e1f75b1fdd0246151af709d259a6a0ed0b26bd711024965ecad7c41387de45443defce53f66612948694a6032279131c257119ed876a8e805dfb49576ef5c563574115ee87050d92d191bc761ef51d966918e2ef925639400069e3959d8fe19f36136e947ff430bf74e71da0aa5923b00000000")
+	v := natFromString("0x838332321d443a3d30373d47301d47073847473a383d3030f25b3d3d3e00000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000002e00000000000000000041603038331c3d32f5303441e0e0e0e0e0e0e0e0e0e0e0e0e0e0e0e0e0e0e0e0e0e0e0e0e0e0e0e0e0e0e0e0e0e0e0e0e0e0e0e0e0e0e0e0e0e0e0e0e0e0e01c0a5459bfc7b9be9fcbb9d2383840464319434707303030f43a32f53034411c0a5459413820878787878787878787878787878787878787878787878787878787878787878787870630303a3a30334036605b923a6101f83638413943413960204337602043323801526040523241846038414143015238604060328452413841413638523c0240384141364036605b923a6101f83638413943413960204334602043323801526040523241846038414143015238604060328452413841413638523c02403841413638433030f25a8b83838383838383838383838383838383837d838383ffffffffffffffff838383838383838383000000000000000000030000007d26e27c7c8b83838383838383838383838383838383837d838383ffffffffffffffff83838383838383838383838383838383838383838383435960f535073030f3343200000000000000011881301938343030fa398383300000002300000000000000000000f11af4600c845252904141364138383c60406032414443095238010241414303364443434132305b595a15434160b042385341ffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffff47476043410536613603593a6005411c437405fcfcfcfcfcfcfc0000000000005a3b075815054359000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000")
+	q := nat(nil).make(16)
+	q.div(q, u, v)
+}
diff --git a/src/math/big/natconv.go b/src/math/big/natconv.go
new file mode 100644
index 0000000..ce94f2c
--- /dev/null
+++ b/src/math/big/natconv.go
@@ -0,0 +1,511 @@
+// Copyright 2015 The Go Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+// This file implements nat-to-string conversion functions.
+
+package big
+
+import (
+	"errors"
+	"fmt"
+	"io"
+	"math"
+	"math/bits"
+	"sync"
+)
+
+const digits = "0123456789abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ"
+
+// Note: MaxBase = len(digits), but it must remain an untyped rune constant
+//       for API compatibility.
+
+// MaxBase is the largest number base accepted for string conversions.
+const MaxBase = 10 + ('z' - 'a' + 1) + ('Z' - 'A' + 1)
+const maxBaseSmall = 10 + ('z' - 'a' + 1)
+
+// maxPow returns (b**n, n) such that b**n is the largest power b**n <= _M.
+// For instance maxPow(10) == (1e19, 19) for 19 decimal digits in a 64bit Word.
+// In other words, at most n digits in base b fit into a Word.
+// TODO(gri) replace this with a table, generated at build time.
+func maxPow(b Word) (p Word, n int) {
+	p, n = b, 1 // assuming b <= _M
+	for max := _M / b; p <= max; {
+		// p == b**n && p <= max
+		p *= b
+		n++
+	}
+	// p == b**n && p <= _M
+	return
+}
+
+// pow returns x**n for n > 0, and 1 otherwise.
+func pow(x Word, n int) (p Word) {
+	// n == sum of bi * 2**i, for 0 <= i < imax, and bi is 0 or 1
+	// thus x**n == product of x**(2**i) for all i where bi == 1
+	// (Russian Peasant Method for exponentiation)
+	p = 1
+	for n > 0 {
+		if n&1 != 0 {
+			p *= x
+		}
+		x *= x
+		n >>= 1
+	}
+	return
+}
+
+// scan errors
+var (
+	errNoDigits = errors.New("number has no digits")
+	errInvalSep = errors.New("'_' must separate successive digits")
+)
+
+// scan scans the number corresponding to the longest possible prefix
+// from r representing an unsigned number in a given conversion base.
+// scan returns the corresponding natural number res, the actual base b,
+// a digit count, and a read or syntax error err, if any.
+//
+// For base 0, an underscore character “_” may appear between a base
+// prefix and an adjacent digit, and between successive digits; such
+// underscores do not change the value of the number, or the returned
+// digit count. Incorrect placement of underscores is reported as an
+// error if there are no other errors. If base != 0, underscores are
+// not recognized and thus terminate scanning like any other character
+// that is not a valid radix point or digit.
+//
+//	number    = mantissa | prefix pmantissa .
+//	prefix    = "0" [ "b" | "B" | "o" | "O" | "x" | "X" ] .
+//	mantissa  = digits "." [ digits ] | digits | "." digits .
+//	pmantissa = [ "_" ] digits "." [ digits ] | [ "_" ] digits | "." digits .
+//	digits    = digit { [ "_" ] digit } .
+//	digit     = "0" ... "9" | "a" ... "z" | "A" ... "Z" .
+//
+// Unless fracOk is set, the base argument must be 0 or a value between
+// 2 and MaxBase. If fracOk is set, the base argument must be one of
+// 0, 2, 8, 10, or 16. Providing an invalid base argument leads to a run-
+// time panic.
+//
+// For base 0, the number prefix determines the actual base: A prefix of
+// “0b” or “0B” selects base 2, “0o” or “0O” selects base 8, and
+// “0x” or “0X” selects base 16. If fracOk is false, a “0” prefix
+// (immediately followed by digits) selects base 8 as well. Otherwise,
+// the selected base is 10 and no prefix is accepted.
+//
+// If fracOk is set, a period followed by a fractional part is permitted.
+// The result value is computed as if there were no period present; and
+// the count value is used to determine the fractional part.
+//
+// For bases <= 36, lower and upper case letters are considered the same:
+// The letters 'a' to 'z' and 'A' to 'Z' represent digit values 10 to 35.
+// For bases > 36, the upper case letters 'A' to 'Z' represent the digit
+// values 36 to 61.
+//
+// A result digit count > 0 corresponds to the number of (non-prefix) digits
+// parsed. A digit count <= 0 indicates the presence of a period (if fracOk
+// is set, only), and -count is the number of fractional digits found.
+// In this case, the actual value of the scanned number is res * b**count.
+func (z nat) scan(r io.ByteScanner, base int, fracOk bool) (res nat, b, count int, err error) {
+	// reject invalid bases
+	baseOk := base == 0 ||
+		!fracOk && 2 <= base && base <= MaxBase ||
+		fracOk && (base == 2 || base == 8 || base == 10 || base == 16)
+	if !baseOk {
+		panic(fmt.Sprintf("invalid number base %d", base))
+	}
+
+	// prev encodes the previously seen char: it is one
+	// of '_', '0' (a digit), or '.' (anything else). A
+	// valid separator '_' may only occur after a digit
+	// and if base == 0.
+	prev := '.'
+	invalSep := false
+
+	// one char look-ahead
+	ch, err := r.ReadByte()
+
+	// determine actual base
+	b, prefix := base, 0
+	if base == 0 {
+		// actual base is 10 unless there's a base prefix
+		b = 10
+		if err == nil && ch == '0' {
+			prev = '0'
+			count = 1
+			ch, err = r.ReadByte()
+			if err == nil {
+				// possibly one of 0b, 0B, 0o, 0O, 0x, 0X
+				switch ch {
+				case 'b', 'B':
+					b, prefix = 2, 'b'
+				case 'o', 'O':
+					b, prefix = 8, 'o'
+				case 'x', 'X':
+					b, prefix = 16, 'x'
+				default:
+					if !fracOk {
+						b, prefix = 8, '0'
+					}
+				}
+				if prefix != 0 {
+					count = 0 // prefix is not counted
+					if prefix != '0' {
+						ch, err = r.ReadByte()
+					}
+				}
+			}
+		}
+	}
+
+	// convert string
+	// Algorithm: Collect digits in groups of at most n digits in di
+	// and then use mulAddWW for every such group to add them to the
+	// result.
+	z = z[:0]
+	b1 := Word(b)
+	bn, n := maxPow(b1) // at most n digits in base b1 fit into Word
+	di := Word(0)       // 0 <= di < b1**i < bn
+	i := 0              // 0 <= i < n
+	dp := -1            // position of decimal point
+	for err == nil {
+		if ch == '.' && fracOk {
+			fracOk = false
+			if prev == '_' {
+				invalSep = true
+			}
+			prev = '.'
+			dp = count
+		} else if ch == '_' && base == 0 {
+			if prev != '0' {
+				invalSep = true
+			}
+			prev = '_'
+		} else {
+			// convert rune into digit value d1
+			var d1 Word
+			switch {
+			case '0' <= ch && ch <= '9':
+				d1 = Word(ch - '0')
+			case 'a' <= ch && ch <= 'z':
+				d1 = Word(ch - 'a' + 10)
+			case 'A' <= ch && ch <= 'Z':
+				if b <= maxBaseSmall {
+					d1 = Word(ch - 'A' + 10)
+				} else {
+					d1 = Word(ch - 'A' + maxBaseSmall)
+				}
+			default:
+				d1 = MaxBase + 1
+			}
+			if d1 >= b1 {
+				r.UnreadByte() // ch does not belong to number anymore
+				break
+			}
+			prev = '0'
+			count++
+
+			// collect d1 in di
+			di = di*b1 + d1
+			i++
+
+			// if di is "full", add it to the result
+			if i == n {
+				z = z.mulAddWW(z, bn, di)
+				di = 0
+				i = 0
+			}
+		}
+
+		ch, err = r.ReadByte()
+	}
+
+	if err == io.EOF {
+		err = nil
+	}
+
+	// other errors take precedence over invalid separators
+	if err == nil && (invalSep || prev == '_') {
+		err = errInvalSep
+	}
+
+	if count == 0 {
+		// no digits found
+		if prefix == '0' {
+			// there was only the octal prefix 0 (possibly followed by separators and digits > 7);
+			// interpret as decimal 0
+			return z[:0], 10, 1, err
+		}
+		err = errNoDigits // fall through; result will be 0
+	}
+
+	// add remaining digits to result
+	if i > 0 {
+		z = z.mulAddWW(z, pow(b1, i), di)
+	}
+	res = z.norm()
+
+	// adjust count for fraction, if any
+	if dp >= 0 {
+		// 0 <= dp <= count
+		count = dp - count
+	}
+
+	return
+}
+
+// utoa converts x to an ASCII representation in the given base;
+// base must be between 2 and MaxBase, inclusive.
+func (x nat) utoa(base int) []byte {
+	return x.itoa(false, base)
+}
+
+// itoa is like utoa but it prepends a '-' if neg && x != 0.
+func (x nat) itoa(neg bool, base int) []byte {
+	if base < 2 || base > MaxBase {
+		panic("invalid base")
+	}
+
+	// x == 0
+	if len(x) == 0 {
+		return []byte("0")
+	}
+	// len(x) > 0
+
+	// allocate buffer for conversion
+	i := int(float64(x.bitLen())/math.Log2(float64(base))) + 1 // off by 1 at most
+	if neg {
+		i++
+	}
+	s := make([]byte, i)
+
+	// convert power of two and non power of two bases separately
+	if b := Word(base); b == b&-b {
+		// shift is base b digit size in bits
+		shift := uint(bits.TrailingZeros(uint(b))) // shift > 0 because b >= 2
+		mask := Word(1<<shift - 1)
+		w := x[0]         // current word
+		nbits := uint(_W) // number of unprocessed bits in w
+
+		// convert less-significant words (include leading zeros)
+		for k := 1; k < len(x); k++ {
+			// convert full digits
+			for nbits >= shift {
+				i--
+				s[i] = digits[w&mask]
+				w >>= shift
+				nbits -= shift
+			}
+
+			// convert any partial leading digit and advance to next word
+			if nbits == 0 {
+				// no partial digit remaining, just advance
+				w = x[k]
+				nbits = _W
+			} else {
+				// partial digit in current word w (== x[k-1]) and next word x[k]
+				w |= x[k] << nbits
+				i--
+				s[i] = digits[w&mask]
+
+				// advance
+				w = x[k] >> (shift - nbits)
+				nbits = _W - (shift - nbits)
+			}
+		}
+
+		// convert digits of most-significant word w (omit leading zeros)
+		for w != 0 {
+			i--
+			s[i] = digits[w&mask]
+			w >>= shift
+		}
+
+	} else {
+		bb, ndigits := maxPow(b)
+
+		// construct table of successive squares of bb*leafSize to use in subdivisions
+		// result (table != nil) <=> (len(x) > leafSize > 0)
+		table := divisors(len(x), b, ndigits, bb)
+
+		// preserve x, create local copy for use by convertWords
+		q := nat(nil).set(x)
+
+		// convert q to string s in base b
+		q.convertWords(s, b, ndigits, bb, table)
+
+		// strip leading zeros
+		// (x != 0; thus s must contain at least one non-zero digit
+		// and the loop will terminate)
+		i = 0
+		for s[i] == '0' {
+			i++
+		}
+	}
+
+	if neg {
+		i--
+		s[i] = '-'
+	}
+
+	return s[i:]
+}
+
+// Convert words of q to base b digits in s. If q is large, it is recursively "split in half"
+// by nat/nat division using tabulated divisors. Otherwise, it is converted iteratively using
+// repeated nat/Word division.
+//
+// The iterative method processes n Words by n divW() calls, each of which visits every Word in the
+// incrementally shortened q for a total of n + (n-1) + (n-2) ... + 2 + 1, or n(n+1)/2 divW()'s.
+// Recursive conversion divides q by its approximate square root, yielding two parts, each half
+// the size of q. Using the iterative method on both halves means 2 * (n/2)(n/2 + 1)/2 divW()'s
+// plus the expensive long div(). Asymptotically, the ratio is favorable at 1/2 the divW()'s, and
+// is made better by splitting the subblocks recursively. Best is to split blocks until one more
+// split would take longer (because of the nat/nat div()) than the twice as many divW()'s of the
+// iterative approach. This threshold is represented by leafSize. Benchmarking of leafSize in the
+// range 2..64 shows that values of 8 and 16 work well, with a 4x speedup at medium lengths and
+// ~30x for 20000 digits. Use nat_test.go's BenchmarkLeafSize tests to optimize leafSize for
+// specific hardware.
+func (q nat) convertWords(s []byte, b Word, ndigits int, bb Word, table []divisor) {
+	// split larger blocks recursively
+	if table != nil {
+		// len(q) > leafSize > 0
+		var r nat
+		index := len(table) - 1
+		for len(q) > leafSize {
+			// find divisor close to sqrt(q) if possible, but in any case < q
+			maxLength := q.bitLen()     // ~= log2 q, or at of least largest possible q of this bit length
+			minLength := maxLength >> 1 // ~= log2 sqrt(q)
+			for index > 0 && table[index-1].nbits > minLength {
+				index-- // desired
+			}
+			if table[index].nbits >= maxLength && table[index].bbb.cmp(q) >= 0 {
+				index--
+				if index < 0 {
+					panic("internal inconsistency")
+				}
+			}
+
+			// split q into the two digit number (q'*bbb + r) to form independent subblocks
+			q, r = q.div(r, q, table[index].bbb)
+
+			// convert subblocks and collect results in s[:h] and s[h:]
+			h := len(s) - table[index].ndigits
+			r.convertWords(s[h:], b, ndigits, bb, table[0:index])
+			s = s[:h] // == q.convertWords(s, b, ndigits, bb, table[0:index+1])
+		}
+	}
+
+	// having split any large blocks now process the remaining (small) block iteratively
+	i := len(s)
+	var r Word
+	if b == 10 {
+		// hard-coding for 10 here speeds this up by 1.25x (allows for / and % by constants)
+		for len(q) > 0 {
+			// extract least significant, base bb "digit"
+			q, r = q.divW(q, bb)
+			for j := 0; j < ndigits && i > 0; j++ {
+				i--
+				// avoid % computation since r%10 == r - int(r/10)*10;
+				// this appears to be faster for BenchmarkString10000Base10
+				// and smaller strings (but a bit slower for larger ones)
+				t := r / 10
+				s[i] = '0' + byte(r-t*10)
+				r = t
+			}
+		}
+	} else {
+		for len(q) > 0 {
+			// extract least significant, base bb "digit"
+			q, r = q.divW(q, bb)
+			for j := 0; j < ndigits && i > 0; j++ {
+				i--
+				s[i] = digits[r%b]
+				r /= b
+			}
+		}
+	}
+
+	// prepend high-order zeros
+	for i > 0 { // while need more leading zeros
+		i--
+		s[i] = '0'
+	}
+}
+
+// Split blocks greater than leafSize Words (or set to 0 to disable recursive conversion)
+// Benchmark and configure leafSize using: go test -bench="Leaf"
+//
+//	8 and 16 effective on 3.0 GHz Xeon "Clovertown" CPU (128 byte cache lines)
+//	8 and 16 effective on 2.66 GHz Core 2 Duo "Penryn" CPU
+var leafSize int = 8 // number of Word-size binary values treat as a monolithic block
+
+type divisor struct {
+	bbb     nat // divisor
+	nbits   int // bit length of divisor (discounting leading zeros) ~= log2(bbb)
+	ndigits int // digit length of divisor in terms of output base digits
+}
+
+var cacheBase10 struct {
+	sync.Mutex
+	table [64]divisor // cached divisors for base 10
+}
+
+// expWW computes x**y
+func (z nat) expWW(x, y Word) nat {
+	return z.expNN(nat(nil).setWord(x), nat(nil).setWord(y), nil, false)
+}
+
+// construct table of powers of bb*leafSize to use in subdivisions.
+func divisors(m int, b Word, ndigits int, bb Word) []divisor {
+	// only compute table when recursive conversion is enabled and x is large
+	if leafSize == 0 || m <= leafSize {
+		return nil
+	}
+
+	// determine k where (bb**leafSize)**(2**k) >= sqrt(x)
+	k := 1
+	for words := leafSize; words < m>>1 && k < len(cacheBase10.table); words <<= 1 {
+		k++
+	}
+
+	// reuse and extend existing table of divisors or create new table as appropriate
+	var table []divisor // for b == 10, table overlaps with cacheBase10.table
+	if b == 10 {
+		cacheBase10.Lock()
+		table = cacheBase10.table[0:k] // reuse old table for this conversion
+	} else {
+		table = make([]divisor, k) // create new table for this conversion
+	}
+
+	// extend table
+	if table[k-1].ndigits == 0 {
+		// add new entries as needed
+		var larger nat
+		for i := 0; i < k; i++ {
+			if table[i].ndigits == 0 {
+				if i == 0 {
+					table[0].bbb = nat(nil).expWW(bb, Word(leafSize))
+					table[0].ndigits = ndigits * leafSize
+				} else {
+					table[i].bbb = nat(nil).sqr(table[i-1].bbb)
+					table[i].ndigits = 2 * table[i-1].ndigits
+				}
+
+				// optimization: exploit aggregated extra bits in macro blocks
+				larger = nat(nil).set(table[i].bbb)
+				for mulAddVWW(larger, larger, b, 0) == 0 {
+					table[i].bbb = table[i].bbb.set(larger)
+					table[i].ndigits++
+				}
+
+				table[i].nbits = table[i].bbb.bitLen()
+			}
+		}
+	}
+
+	if b == 10 {
+		cacheBase10.Unlock()
+	}
+
+	return table
+}
diff --git a/src/math/big/natconv_test.go b/src/math/big/natconv_test.go
new file mode 100644
index 0000000..d390272
--- /dev/null
+++ b/src/math/big/natconv_test.go
@@ -0,0 +1,463 @@
+// Copyright 2015 The Go Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+package big
+
+import (
+	"bytes"
+	"fmt"
+	"io"
+	"math/bits"
+	"strings"
+	"testing"
+)
+
+func TestMaxBase(t *testing.T) {
+	if MaxBase != len(digits) {
+		t.Fatalf("%d != %d", MaxBase, len(digits))
+	}
+}
+
+// log2 computes the integer binary logarithm of x.
+// The result is the integer n for which 2^n <= x < 2^(n+1).
+// If x == 0, the result is -1.
+func log2(x Word) int {
+	return bits.Len(uint(x)) - 1
+}
+
+func itoa(x nat, base int) []byte {
+	// special cases
+	switch {
+	case base < 2:
+		panic("illegal base")
+	case len(x) == 0:
+		return []byte("0")
+	}
+
+	// allocate buffer for conversion
+	i := x.bitLen()/log2(Word(base)) + 1 // +1: round up
+	s := make([]byte, i)
+
+	// don't destroy x
+	q := nat(nil).set(x)
+
+	// convert
+	for len(q) > 0 {
+		i--
+		var r Word
+		q, r = q.divW(q, Word(base))
+		s[i] = digits[r]
+	}
+
+	return s[i:]
+}
+
+var strTests = []struct {
+	x nat    // nat value to be converted
+	b int    // conversion base
+	s string // expected result
+}{
+	{nil, 2, "0"},
+	{nat{1}, 2, "1"},
+	{nat{0xc5}, 2, "11000101"},
+	{nat{03271}, 8, "3271"},
+	{nat{10}, 10, "10"},
+	{nat{1234567890}, 10, "1234567890"},
+	{nat{0xdeadbeef}, 16, "deadbeef"},
+	{nat{0x229be7}, 17, "1a2b3c"},
+	{nat{0x309663e6}, 32, "o9cov6"},
+	{nat{0x309663e6}, 62, "TakXI"},
+}
+
+func TestString(t *testing.T) {
+	// test invalid base explicitly
+	var panicStr string
+	func() {
+		defer func() {
+			panicStr = recover().(string)
+		}()
+		natOne.utoa(1)
+	}()
+	if panicStr != "invalid base" {
+		t.Errorf("expected panic for invalid base")
+	}
+
+	for _, a := range strTests {
+		s := string(a.x.utoa(a.b))
+		if s != a.s {
+			t.Errorf("string%+v\n\tgot s = %s; want %s", a, s, a.s)
+		}
+
+		x, b, _, err := nat(nil).scan(strings.NewReader(a.s), a.b, false)
+		if x.cmp(a.x) != 0 {
+			t.Errorf("scan%+v\n\tgot z = %v; want %v", a, x, a.x)
+		}
+		if b != a.b {
+			t.Errorf("scan%+v\n\tgot b = %d; want %d", a, b, a.b)
+		}
+		if err != nil {
+			t.Errorf("scan%+v\n\tgot error = %s", a, err)
+		}
+	}
+}
+
+var natScanTests = []struct {
+	s     string // string to be scanned
+	base  int    // input base
+	frac  bool   // fraction ok
+	x     nat    // expected nat
+	b     int    // expected base
+	count int    // expected digit count
+	err   error  // expected error
+	next  rune   // next character (or 0, if at EOF)
+}{
+	// invalid: no digits
+	{"", 0, false, nil, 10, 0, errNoDigits, 0},
+	{"_", 0, false, nil, 10, 0, errNoDigits, 0},
+	{"?", 0, false, nil, 10, 0, errNoDigits, '?'},
+	{"?", 10, false, nil, 10, 0, errNoDigits, '?'},
+	{"", 10, false, nil, 10, 0, errNoDigits, 0},
+	{"", 36, false, nil, 36, 0, errNoDigits, 0},
+	{"", 62, false, nil, 62, 0, errNoDigits, 0},
+	{"0b", 0, false, nil, 2, 0, errNoDigits, 0},
+	{"0o", 0, false, nil, 8, 0, errNoDigits, 0},
+	{"0x", 0, false, nil, 16, 0, errNoDigits, 0},
+	{"0x_", 0, false, nil, 16, 0, errNoDigits, 0},
+	{"0b2", 0, false, nil, 2, 0, errNoDigits, '2'},
+	{"0B2", 0, false, nil, 2, 0, errNoDigits, '2'},
+	{"0o8", 0, false, nil, 8, 0, errNoDigits, '8'},
+	{"0O8", 0, false, nil, 8, 0, errNoDigits, '8'},
+	{"0xg", 0, false, nil, 16, 0, errNoDigits, 'g'},
+	{"0Xg", 0, false, nil, 16, 0, errNoDigits, 'g'},
+	{"345", 2, false, nil, 2, 0, errNoDigits, '3'},
+
+	// invalid: incorrect use of decimal point
+	{"._", 0, true, nil, 10, 0, errNoDigits, 0},
+	{".0", 0, false, nil, 10, 0, errNoDigits, '.'},
+	{".0", 10, false, nil, 10, 0, errNoDigits, '.'},
+	{".", 0, true, nil, 10, 0, errNoDigits, 0},
+	{"0x.", 0, true, nil, 16, 0, errNoDigits, 0},
+	{"0x.g", 0, true, nil, 16, 0, errNoDigits, 'g'},
+	{"0x.0", 0, false, nil, 16, 0, errNoDigits, '.'},
+
+	// invalid: incorrect use of separators
+	{"_0", 0, false, nil, 10, 1, errInvalSep, 0},
+	{"0_", 0, false, nil, 10, 1, errInvalSep, 0},
+	{"0__0", 0, false, nil, 8, 1, errInvalSep, 0},
+	{"0x___0", 0, false, nil, 16, 1, errInvalSep, 0},
+	{"0_x", 0, false, nil, 10, 1, errInvalSep, 'x'},
+	{"0_8", 0, false, nil, 10, 1, errInvalSep, '8'},
+	{"123_.", 0, true, nat{123}, 10, 0, errInvalSep, 0},
+	{"._123", 0, true, nat{123}, 10, -3, errInvalSep, 0},
+	{"0b__1000", 0, false, nat{0x8}, 2, 4, errInvalSep, 0},
+	{"0o60___0", 0, false, nat{0600}, 8, 3, errInvalSep, 0},
+	{"0466_", 0, false, nat{0466}, 8, 3, errInvalSep, 0},
+	{"01234567_8", 0, false, nat{01234567}, 8, 7, errInvalSep, '8'},
+	{"1_.", 0, true, nat{1}, 10, 0, errInvalSep, 0},
+	{"0._1", 0, true, nat{1}, 10, -1, errInvalSep, 0},
+	{"2.7_", 0, true, nat{27}, 10, -1, errInvalSep, 0},
+	{"0x1.0_", 0, true, nat{0x10}, 16, -1, errInvalSep, 0},
+
+	// valid: separators are not accepted for base != 0
+	{"0_", 10, false, nil, 10, 1, nil, '_'},
+	{"1__0", 10, false, nat{1}, 10, 1, nil, '_'},
+	{"0__8", 10, false, nil, 10, 1, nil, '_'},
+	{"xy_z_", 36, false, nat{33*36 + 34}, 36, 2, nil, '_'},
+
+	// valid, no decimal point
+	{"0", 0, false, nil, 10, 1, nil, 0},
+	{"0", 36, false, nil, 36, 1, nil, 0},
+	{"0", 62, false, nil, 62, 1, nil, 0},
+	{"1", 0, false, nat{1}, 10, 1, nil, 0},
+	{"1", 10, false, nat{1}, 10, 1, nil, 0},
+	{"0 ", 0, false, nil, 10, 1, nil, ' '},
+	{"00 ", 0, false, nil, 8, 1, nil, ' '}, // octal 0
+	{"0b1", 0, false, nat{1}, 2, 1, nil, 0},
+	{"0B11000101", 0, false, nat{0xc5}, 2, 8, nil, 0},
+	{"0B110001012", 0, false, nat{0xc5}, 2, 8, nil, '2'},
+	{"07", 0, false, nat{7}, 8, 1, nil, 0},
+	{"08", 0, false, nil, 10, 1, nil, '8'},
+	{"08", 10, false, nat{8}, 10, 2, nil, 0},
+	{"018", 0, false, nat{1}, 8, 1, nil, '8'},
+	{"0o7", 0, false, nat{7}, 8, 1, nil, 0},
+	{"0o18", 0, false, nat{1}, 8, 1, nil, '8'},
+	{"0O17", 0, false, nat{017}, 8, 2, nil, 0},
+	{"03271", 0, false, nat{03271}, 8, 4, nil, 0},
+	{"10ab", 0, false, nat{10}, 10, 2, nil, 'a'},
+	{"1234567890", 0, false, nat{1234567890}, 10, 10, nil, 0},
+	{"A", 36, false, nat{10}, 36, 1, nil, 0},
+	{"A", 37, false, nat{36}, 37, 1, nil, 0},
+	{"xyz", 36, false, nat{(33*36+34)*36 + 35}, 36, 3, nil, 0},
+	{"XYZ?", 36, false, nat{(33*36+34)*36 + 35}, 36, 3, nil, '?'},
+	{"XYZ?", 62, false, nat{(59*62+60)*62 + 61}, 62, 3, nil, '?'},
+	{"0x", 16, false, nil, 16, 1, nil, 'x'},
+	{"0xdeadbeef", 0, false, nat{0xdeadbeef}, 16, 8, nil, 0},
+	{"0XDEADBEEF", 0, false, nat{0xdeadbeef}, 16, 8, nil, 0},
+
+	// valid, with decimal point
+	{"0.", 0, false, nil, 10, 1, nil, '.'},
+	{"0.", 10, true, nil, 10, 0, nil, 0},
+	{"0.1.2", 10, true, nat{1}, 10, -1, nil, '.'},
+	{".000", 10, true, nil, 10, -3, nil, 0},
+	{"12.3", 10, true, nat{123}, 10, -1, nil, 0},
+	{"012.345", 10, true, nat{12345}, 10, -3, nil, 0},
+	{"0.1", 0, true, nat{1}, 10, -1, nil, 0},
+	{"0.1", 2, true, nat{1}, 2, -1, nil, 0},
+	{"0.12", 2, true, nat{1}, 2, -1, nil, '2'},
+	{"0b0.1", 0, true, nat{1}, 2, -1, nil, 0},
+	{"0B0.12", 0, true, nat{1}, 2, -1, nil, '2'},
+	{"0o0.7", 0, true, nat{7}, 8, -1, nil, 0},
+	{"0O0.78", 0, true, nat{7}, 8, -1, nil, '8'},
+	{"0xdead.beef", 0, true, nat{0xdeadbeef}, 16, -4, nil, 0},
+
+	// valid, with separators
+	{"1_000", 0, false, nat{1000}, 10, 4, nil, 0},
+	{"0_466", 0, false, nat{0466}, 8, 3, nil, 0},
+	{"0o_600", 0, false, nat{0600}, 8, 3, nil, 0},
+	{"0x_f0_0d", 0, false, nat{0xf00d}, 16, 4, nil, 0},
+	{"0b1000_0001", 0, false, nat{0x81}, 2, 8, nil, 0},
+	{"1_000.000_1", 0, true, nat{10000001}, 10, -4, nil, 0},
+	{"0x_f00d.1e", 0, true, nat{0xf00d1e}, 16, -2, nil, 0},
+	{"0x_f00d.1E2", 0, true, nat{0xf00d1e2}, 16, -3, nil, 0},
+	{"0x_f00d.1eg", 0, true, nat{0xf00d1e}, 16, -2, nil, 'g'},
+}
+
+func TestScanBase(t *testing.T) {
+	for _, a := range natScanTests {
+		r := strings.NewReader(a.s)
+		x, b, count, err := nat(nil).scan(r, a.base, a.frac)
+		if err != a.err {
+			t.Errorf("scan%+v\n\tgot error = %v; want %v", a, err, a.err)
+		}
+		if x.cmp(a.x) != 0 {
+			t.Errorf("scan%+v\n\tgot z = %v; want %v", a, x, a.x)
+		}
+		if b != a.b {
+			t.Errorf("scan%+v\n\tgot b = %d; want %d", a, b, a.base)
+		}
+		if count != a.count {
+			t.Errorf("scan%+v\n\tgot count = %d; want %d", a, count, a.count)
+		}
+		next, _, err := r.ReadRune()
+		if err == io.EOF {
+			next = 0
+			err = nil
+		}
+		if err == nil && next != a.next {
+			t.Errorf("scan%+v\n\tgot next = %q; want %q", a, next, a.next)
+		}
+	}
+}
+
+var pi = "3" +
+	"14159265358979323846264338327950288419716939937510582097494459230781640628620899862803482534211706798214808651" +
+	"32823066470938446095505822317253594081284811174502841027019385211055596446229489549303819644288109756659334461" +
+	"28475648233786783165271201909145648566923460348610454326648213393607260249141273724587006606315588174881520920" +
+	"96282925409171536436789259036001133053054882046652138414695194151160943305727036575959195309218611738193261179" +
+	"31051185480744623799627495673518857527248912279381830119491298336733624406566430860213949463952247371907021798" +
+	"60943702770539217176293176752384674818467669405132000568127145263560827785771342757789609173637178721468440901" +
+	"22495343014654958537105079227968925892354201995611212902196086403441815981362977477130996051870721134999999837" +
+	"29780499510597317328160963185950244594553469083026425223082533446850352619311881710100031378387528865875332083" +
+	"81420617177669147303598253490428755468731159562863882353787593751957781857780532171226806613001927876611195909" +
+	"21642019893809525720106548586327886593615338182796823030195203530185296899577362259941389124972177528347913151" +
+	"55748572424541506959508295331168617278558890750983817546374649393192550604009277016711390098488240128583616035" +
+	"63707660104710181942955596198946767837449448255379774726847104047534646208046684259069491293313677028989152104" +
+	"75216205696602405803815019351125338243003558764024749647326391419927260426992279678235478163600934172164121992" +
+	"45863150302861829745557067498385054945885869269956909272107975093029553211653449872027559602364806654991198818" +
+	"34797753566369807426542527862551818417574672890977772793800081647060016145249192173217214772350141441973568548" +
+	"16136115735255213347574184946843852332390739414333454776241686251898356948556209921922218427255025425688767179" +
+	"04946016534668049886272327917860857843838279679766814541009538837863609506800642251252051173929848960841284886" +
+	"26945604241965285022210661186306744278622039194945047123713786960956364371917287467764657573962413890865832645" +
+	"99581339047802759009946576407895126946839835259570982582262052248940772671947826848260147699090264013639443745" +
+	"53050682034962524517493996514314298091906592509372216964615157098583874105978859597729754989301617539284681382" +
+	"68683868942774155991855925245953959431049972524680845987273644695848653836736222626099124608051243884390451244" +
+	"13654976278079771569143599770012961608944169486855584840635342207222582848864815845602850601684273945226746767" +
+	"88952521385225499546667278239864565961163548862305774564980355936345681743241125150760694794510965960940252288" +
+	"79710893145669136867228748940560101503308617928680920874760917824938589009714909675985261365549781893129784821" +
+	"68299894872265880485756401427047755513237964145152374623436454285844479526586782105114135473573952311342716610" +
+	"21359695362314429524849371871101457654035902799344037420073105785390621983874478084784896833214457138687519435" +
+	"06430218453191048481005370614680674919278191197939952061419663428754440643745123718192179998391015919561814675" +
+	"14269123974894090718649423196156794520809514655022523160388193014209376213785595663893778708303906979207734672" +
+	"21825625996615014215030680384477345492026054146659252014974428507325186660021324340881907104863317346496514539" +
+	"05796268561005508106658796998163574736384052571459102897064140110971206280439039759515677157700420337869936007" +
+	"23055876317635942187312514712053292819182618612586732157919841484882916447060957527069572209175671167229109816" +
+	"90915280173506712748583222871835209353965725121083579151369882091444210067510334671103141267111369908658516398" +
+	"31501970165151168517143765761835155650884909989859982387345528331635507647918535893226185489632132933089857064" +
+	"20467525907091548141654985946163718027098199430992448895757128289059232332609729971208443357326548938239119325" +
+	"97463667305836041428138830320382490375898524374417029132765618093773444030707469211201913020330380197621101100" +
+	"44929321516084244485963766983895228684783123552658213144957685726243344189303968642624341077322697802807318915" +
+	"44110104468232527162010526522721116603966655730925471105578537634668206531098965269186205647693125705863566201" +
+	"85581007293606598764861179104533488503461136576867532494416680396265797877185560845529654126654085306143444318" +
+	"58676975145661406800700237877659134401712749470420562230538994561314071127000407854733269939081454664645880797" +
+	"27082668306343285878569830523580893306575740679545716377525420211495576158140025012622859413021647155097925923" +
+	"09907965473761255176567513575178296664547791745011299614890304639947132962107340437518957359614589019389713111" +
+	"79042978285647503203198691514028708085990480109412147221317947647772622414254854540332157185306142288137585043" +
+	"06332175182979866223717215916077166925474873898665494945011465406284336639379003976926567214638530673609657120" +
+	"91807638327166416274888800786925602902284721040317211860820419000422966171196377921337575114959501566049631862" +
+	"94726547364252308177036751590673502350728354056704038674351362222477158915049530984448933309634087807693259939" +
+	"78054193414473774418426312986080998886874132604721569516239658645730216315981931951673538129741677294786724229" +
+	"24654366800980676928238280689964004824354037014163149658979409243237896907069779422362508221688957383798623001" +
+	"59377647165122893578601588161755782973523344604281512627203734314653197777416031990665541876397929334419521541" +
+	"34189948544473456738316249934191318148092777710386387734317720754565453220777092120190516609628049092636019759" +
+	"88281613323166636528619326686336062735676303544776280350450777235547105859548702790814356240145171806246436267" +
+	"94561275318134078330336254232783944975382437205835311477119926063813346776879695970309833913077109870408591337"
+
+// Test case for BenchmarkScanPi.
+func TestScanPi(t *testing.T) {
+	var x nat
+	z, _, _, err := x.scan(strings.NewReader(pi), 10, false)
+	if err != nil {
+		t.Errorf("scanning pi: %s", err)
+	}
+	if s := string(z.utoa(10)); s != pi {
+		t.Errorf("scanning pi: got %s", s)
+	}
+}
+
+func TestScanPiParallel(t *testing.T) {
+	const n = 2
+	c := make(chan int)
+	for i := 0; i < n; i++ {
+		go func() {
+			TestScanPi(t)
+			c <- 0
+		}()
+	}
+	for i := 0; i < n; i++ {
+		<-c
+	}
+}
+
+func BenchmarkScanPi(b *testing.B) {
+	for i := 0; i < b.N; i++ {
+		var x nat
+		x.scan(strings.NewReader(pi), 10, false)
+	}
+}
+
+func BenchmarkStringPiParallel(b *testing.B) {
+	var x nat
+	x, _, _, _ = x.scan(strings.NewReader(pi), 0, false)
+	if string(x.utoa(10)) != pi {
+		panic("benchmark incorrect: conversion failed")
+	}
+	b.RunParallel(func(pb *testing.PB) {
+		for pb.Next() {
+			x.utoa(10)
+		}
+	})
+}
+
+func BenchmarkScan(b *testing.B) {
+	const x = 10
+	for _, base := range []int{2, 8, 10, 16} {
+		for _, y := range []Word{10, 100, 1000, 10000, 100000} {
+			if isRaceBuilder && y > 1000 {
+				continue
+			}
+			b.Run(fmt.Sprintf("%d/Base%d", y, base), func(b *testing.B) {
+				b.StopTimer()
+				var z nat
+				z = z.expWW(x, y)
+
+				s := z.utoa(base)
+				if t := itoa(z, base); !bytes.Equal(s, t) {
+					b.Fatalf("scanning: got %s; want %s", s, t)
+				}
+				b.StartTimer()
+
+				for i := 0; i < b.N; i++ {
+					z.scan(bytes.NewReader(s), base, false)
+				}
+			})
+		}
+	}
+}
+
+func BenchmarkString(b *testing.B) {
+	const x = 10
+	for _, base := range []int{2, 8, 10, 16} {
+		for _, y := range []Word{10, 100, 1000, 10000, 100000} {
+			if isRaceBuilder && y > 1000 {
+				continue
+			}
+			b.Run(fmt.Sprintf("%d/Base%d", y, base), func(b *testing.B) {
+				b.StopTimer()
+				var z nat
+				z = z.expWW(x, y)
+				z.utoa(base) // warm divisor cache
+				b.StartTimer()
+
+				for i := 0; i < b.N; i++ {
+					_ = z.utoa(base)
+				}
+			})
+		}
+	}
+}
+
+func BenchmarkLeafSize(b *testing.B) {
+	for n := 0; n <= 16; n++ {
+		b.Run(fmt.Sprint(n), func(b *testing.B) { LeafSizeHelper(b, 10, n) })
+	}
+	// Try some large lengths
+	for _, n := range []int{32, 64} {
+		b.Run(fmt.Sprint(n), func(b *testing.B) { LeafSizeHelper(b, 10, n) })
+	}
+}
+
+func LeafSizeHelper(b *testing.B, base, size int) {
+	b.StopTimer()
+	originalLeafSize := leafSize
+	resetTable(cacheBase10.table[:])
+	leafSize = size
+	b.StartTimer()
+
+	for d := 1; d <= 10000; d *= 10 {
+		b.StopTimer()
+		var z nat
+		z = z.expWW(Word(base), Word(d)) // build target number
+		_ = z.utoa(base)                 // warm divisor cache
+		b.StartTimer()
+
+		for i := 0; i < b.N; i++ {
+			_ = z.utoa(base)
+		}
+	}
+
+	b.StopTimer()
+	resetTable(cacheBase10.table[:])
+	leafSize = originalLeafSize
+	b.StartTimer()
+}
+
+func resetTable(table []divisor) {
+	if table != nil && table[0].bbb != nil {
+		for i := 0; i < len(table); i++ {
+			table[i].bbb = nil
+			table[i].nbits = 0
+			table[i].ndigits = 0
+		}
+	}
+}
+
+func TestStringPowers(t *testing.T) {
+	var p Word
+	for b := 2; b <= 16; b++ {
+		for p = 0; p <= 512; p++ {
+			if testing.Short() && p > 10 {
+				break
+			}
+			x := nat(nil).expWW(Word(b), p)
+			xs := x.utoa(b)
+			xs2 := itoa(x, b)
+			if !bytes.Equal(xs, xs2) {
+				t.Errorf("failed at %d ** %d in base %d: %s != %s", b, p, b, xs, xs2)
+			}
+		}
+		if b >= 3 && testing.Short() {
+			break
+		}
+	}
+}
diff --git a/src/math/big/natdiv.go b/src/math/big/natdiv.go
new file mode 100644
index 0000000..14233a2
--- /dev/null
+++ b/src/math/big/natdiv.go
@@ -0,0 +1,897 @@
+// Copyright 2009 The Go Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+/*
+
+Multi-precision division. Here be dragons.
+
+Given u and v, where u is n+m digits, and v is n digits (with no leading zeros),
+the goal is to return quo, rem such that u = quo*v + rem, where 0 ≤ rem < v.
+That is, quo = ⌊u/v⌋ where ⌊x⌋ denotes the floor (truncation to integer) of x,
+and rem = u - quo·v.
+
+
+Long Division
+
+Division in a computer proceeds the same as long division in elementary school,
+but computers are not as good as schoolchildren at following vague directions,
+so we have to be much more precise about the actual steps and what can happen.
+
+We work from most to least significant digit of the quotient, doing:
+
+ • Guess a digit q, the number of v to subtract from the current
+   section of u to zero out the topmost digit.
+ • Check the guess by multiplying q·v and comparing it against
+   the current section of u, adjusting the guess as needed.
+ • Subtract q·v from the current section of u.
+ • Add q to the corresponding section of the result quo.
+
+When all digits have been processed, the final remainder is left in u
+and returned as rem.
+
+For example, here is a sketch of dividing 5 digits by 3 digits (n=3, m=2).
+
+	                 q₂ q₁ q₀
+	         _________________
+	v₂ v₁ v₀ ) u₄ u₃ u₂ u₁ u₀
+	           ↓  ↓  ↓  |  |
+	          [u₄ u₃ u₂]|  |
+	        - [  q₂·v  ]|  |
+	        ----------- ↓  |
+	          [  rem  | u₁]|
+	        - [    q₁·v   ]|
+	           ----------- ↓
+	             [  rem  | u₀]
+	           - [    q₀·v   ]
+	              ------------
+	                [  rem   ]
+
+Instead of creating new storage for the remainders and copying digits from u
+as indicated by the arrows, we use u's storage directly as both the source
+and destination of the subtractions, so that the remainders overwrite
+successive overlapping sections of u as the division proceeds, using a slice
+of u to identify the current section. This avoids all the copying as well as
+shifting of remainders.
+
+Division of u with n+m digits by v with n digits (in base B) can in general
+produce at most m+1 digits, because:
+
+  • u < B^(n+m)               [B^(n+m) has n+m+1 digits]
+  • v ≥ B^(n-1)               [B^(n-1) is the smallest n-digit number]
+  • u/v < B^(n+m) / B^(n-1)   [divide bounds for u, v]
+  • u/v < B^(m+1)             [simplify]
+
+The first step is special: it takes the top n digits of u and divides them by
+the n digits of v, producing the first quotient digit and an n-digit remainder.
+In the example, q₂ = ⌊u₄u₃u₂ / v⌋.
+
+The first step divides n digits by n digits to ensure that it produces only a
+single digit.
+
+Each subsequent step appends the next digit from u to the remainder and divides
+those n+1 digits by the n digits of v, producing another quotient digit and a
+new n-digit remainder.
+
+Subsequent steps divide n+1 digits by n digits, an operation that in general
+might produce two digits. However, as used in the algorithm, that division is
+guaranteed to produce only a single digit. The dividend is of the form
+rem·B + d, where rem is a remainder from the previous step and d is a single
+digit, so:
+
+ • rem ≤ v - 1                 [rem is a remainder from dividing by v]
+ • rem·B ≤ v·B - B             [multiply by B]
+ • d ≤ B - 1                   [d is a single digit]
+ • rem·B + d ≤ v·B - 1         [add]
+ • rem·B + d < v·B             [change ≤ to <]
+ • (rem·B + d)/v < B           [divide by v]
+
+
+Guess and Check
+
+At each step we need to divide n+1 digits by n digits, but this is for the
+implementation of division by n digits, so we can't just invoke a division
+routine: we _are_ the division routine. Instead, we guess at the answer and
+then check it using multiplication. If the guess is wrong, we correct it.
+
+How can this guessing possibly be efficient? It turns out that the following
+statement (let's call it the Good Guess Guarantee) is true.
+
+If
+
+ • q = ⌊u/v⌋ where u is n+1 digits and v is n digits,
+ • q < B, and
+ • the topmost digit of v = vₙ₋₁ ≥ B/2,
+
+then q̂ = ⌊uₙuₙ₋₁ / vₙ₋₁⌋ satisfies q ≤ q̂ ≤ q+2. (Proof below.)
+
+That is, if we know the answer has only a single digit and we guess an answer
+by ignoring the bottom n-1 digits of u and v, using a 2-by-1-digit division,
+then that guess is at least as large as the correct answer. It is also not
+too much larger: it is off by at most two from the correct answer.
+
+Note that in the first step of the overall division, which is an n-by-n-digit
+division, the 2-by-1 guess uses an implicit uₙ = 0.
+
+Note that using a 2-by-1-digit division here does not mean calling ourselves
+recursively. Instead, we use an efficient direct hardware implementation of
+that operation.
+
+Note that because q is u/v rounded down, q·v must not exceed u: u ≥ q·v.
+If a guess q̂ is too big, it will not satisfy this test. Viewed a different way,
+the remainder r̂ for a given q̂ is u - q̂·v, which must be positive. If it is
+negative, then the guess q̂ is too big.
+
+This gives us a way to compute q. First compute q̂ with 2-by-1-digit division.
+Then, while u < q̂·v, decrement q̂; this loop executes at most twice, because
+q̂ ≤ q+2.
+
+
+Scaling Inputs
+
+The Good Guess Guarantee requires that the top digit of v (vₙ₋₁) be at least B/2.
+For example in base 10, ⌊172/19⌋ = 9, but ⌊18/1⌋ = 18: the guess is wildly off
+because the first digit 1 is smaller than B/2 = 5.
+
+We can ensure that v has a large top digit by multiplying both u and v by the
+right amount. Continuing the example, if we multiply both 172 and 19 by 3, we
+now have ⌊516/57⌋, the leading digit of v is now ≥ 5, and sure enough
+⌊51/5⌋ = 10 is much closer to the correct answer 9. It would be easier here
+to multiply by 4, because that can be done with a shift. Specifically, we can
+always count the number of leading zeros i in the first digit of v and then
+shift both u and v left by i bits.
+
+Having scaled u and v, the value ⌊u/v⌋ is unchanged, but the remainder will
+be scaled: 172 mod 19 is 1, but 516 mod 57 is 3. We have to divide the remainder
+by the scaling factor (shifting right i bits) when we finish.
+
+Note that these shifts happen before and after the entire division algorithm,
+not at each step in the per-digit iteration.
+
+Note the effect of scaling inputs on the size of the possible quotient.
+In the scaled u/v, u can gain a digit from scaling; v never does, because we
+pick the scaling factor to make v's top digit larger but without overflowing.
+If u and v have n+m and n digits after scaling, then:
+
+  • u < B^(n+m)               [B^(n+m) has n+m+1 digits]
+  • v ≥ B^n / 2               [vₙ₋₁ ≥ B/2, so vₙ₋₁·B^(n-1) ≥ B^n/2]
+  • u/v < B^(n+m) / (B^n / 2) [divide bounds for u, v]
+  • u/v < 2 B^m               [simplify]
+
+The quotient can still have m+1 significant digits, but if so the top digit
+must be a 1. This provides a different way to handle the first digit of the
+result: compare the top n digits of u against v and fill in either a 0 or a 1.
+
+
+Refining Guesses
+
+Before we check whether u < q̂·v, we can adjust our guess to change it from
+q̂ = ⌊uₙuₙ₋₁ / vₙ₋₁⌋ into the refined guess ⌊uₙuₙ₋₁uₙ₋₂ / vₙ₋₁vₙ₋₂⌋.
+Although not mentioned above, the Good Guess Guarantee also promises that this
+3-by-2-digit division guess is more precise and at most one away from the real
+answer q. The improvement from the 2-by-1 to the 3-by-2 guess can also be done
+without n-digit math.
+
+If we have a guess q̂ = ⌊uₙuₙ₋₁ / vₙ₋₁⌋ and we want to see if it also equal to
+⌊uₙuₙ₋₁uₙ₋₂ / vₙ₋₁vₙ₋₂⌋, we can use the same check we would for the full division:
+if uₙuₙ₋₁uₙ₋₂ < q̂·vₙ₋₁vₙ₋₂, then the guess is too large and should be reduced.
+
+Checking uₙuₙ₋₁uₙ₋₂ < q̂·vₙ₋₁vₙ₋₂ is the same as uₙuₙ₋₁uₙ₋₂ - q̂·vₙ₋₁vₙ₋₂ < 0,
+and
+
+	uₙuₙ₋₁uₙ₋₂ - q̂·vₙ₋₁vₙ₋₂ = (uₙuₙ₋₁·B + uₙ₋₂) - q̂·(vₙ₋₁·B + vₙ₋₂)
+	                          [splitting off the bottom digit]
+	                      = (uₙuₙ₋₁ - q̂·vₙ₋₁)·B + uₙ₋₂ - q̂·vₙ₋₂
+	                          [regrouping]
+
+The expression (uₙuₙ₋₁ - q̂·vₙ₋₁) is the remainder of uₙuₙ₋₁ / vₙ₋₁.
+If the initial guess returns both q̂ and its remainder r̂, then checking
+whether uₙuₙ₋₁uₙ₋₂ < q̂·vₙ₋₁vₙ₋₂ is the same as checking r̂·B + uₙ₋₂ < q̂·vₙ₋₂.
+
+If we find that r̂·B + uₙ₋₂ < q̂·vₙ₋₂, then we can adjust the guess by
+decrementing q̂ and adding vₙ₋₁ to r̂. We repeat until r̂·B + uₙ₋₂ ≥ q̂·vₙ₋₂.
+(As before, this fixup is only needed at most twice.)
+
+Now that q̂ = ⌊uₙuₙ₋₁uₙ₋₂ / vₙ₋₁vₙ₋₂⌋, as mentioned above it is at most one
+away from the correct q, and we've avoided doing any n-digit math.
+(If we need the new remainder, it can be computed as r̂·B + uₙ₋₂ - q̂·vₙ₋₂.)
+
+The final check u < q̂·v and the possible fixup must be done at full precision.
+For random inputs, a fixup at this step is exceedingly rare: the 3-by-2 guess
+is not often wrong at all. But still we must do the check. Note that since the
+3-by-2 guess is off by at most 1, it can be convenient to perform the final
+u < q̂·v as part of the computation of the remainder r = u - q̂·v. If the
+subtraction underflows, decremeting q̂ and adding one v back to r is enough to
+arrive at the final q, r.
+
+That's the entirety of long division: scale the inputs, and then loop over
+each output position, guessing, checking, and correcting the next output digit.
+
+For a 2n-digit number divided by an n-digit number (the worst size-n case for
+division complexity), this algorithm uses n+1 iterations, each of which must do
+at least the 1-by-n-digit multiplication q̂·v. That's O(n) iterations of
+O(n) time each, so O(n²) time overall.
+
+
+Recursive Division
+
+For very large inputs, it is possible to improve on the O(n²) algorithm.
+Let's call a group of n/2 real digits a (very) “wide digit”. We can run the
+standard long division algorithm explained above over the wide digits instead of
+the actual digits. This will result in many fewer steps, but the math involved in
+each step is more work.
+
+Where basic long division uses a 2-by-1-digit division to guess the initial q̂,
+the new algorithm must use a 2-by-1-wide-digit division, which is of course
+really an n-by-n/2-digit division. That's OK: if we implement n-digit division
+in terms of n/2-digit division, the recursion will terminate when the divisor
+becomes small enough to handle with standard long division or even with the
+2-by-1 hardware instruction.
+
+For example, here is a sketch of dividing 10 digits by 4, proceeding with
+wide digits corresponding to two regular digits. The first step, still special,
+must leave off a (regular) digit, dividing 5 by 4 and producing a 4-digit
+remainder less than v. The middle steps divide 6 digits by 4, guaranteed to
+produce two output digits each (one wide digit) with 4-digit remainders.
+The final step must use what it has: the 4-digit remainder plus one more,
+5 digits to divide by 4.
+
+	                       q₆ q₅ q₄ q₃ q₂ q₁ q₀
+	            _______________________________
+	v₃ v₂ v₁ v₀ ) u₉ u₈ u₇ u₆ u₅ u₄ u₃ u₂ u₁ u₀
+	              ↓  ↓  ↓  ↓  ↓  |  |  |  |  |
+	             [u₉ u₈ u₇ u₆ u₅]|  |  |  |  |
+	           - [    q₆q₅·v    ]|  |  |  |  |
+	           ----------------- ↓  ↓  |  |  |
+	                [    rem    |u₄ u₃]|  |  |
+	              - [     q₄q₃·v      ]|  |  |
+	              -------------------- ↓  ↓  |
+	                      [    rem    |u₂ u₁]|
+	                    - [     q₂q₁·v      ]|
+	                    -------------------- ↓
+	                            [    rem    |u₀]
+	                          - [     q₀·v     ]
+	                          ------------------
+	                               [    rem    ]
+
+An alternative would be to look ahead to how well n/2 divides into n+m and
+adjust the first step to use fewer digits as needed, making the first step
+more special to make the last step not special at all. For example, using the
+same input, we could choose to use only 4 digits in the first step, leaving
+a full wide digit for the last step:
+
+	                       q₆ q₅ q₄ q₃ q₂ q₁ q₀
+	            _______________________________
+	v₃ v₂ v₁ v₀ ) u₉ u₈ u₇ u₆ u₅ u₄ u₃ u₂ u₁ u₀
+	              ↓  ↓  ↓  ↓  |  |  |  |  |  |
+	             [u₉ u₈ u₇ u₆]|  |  |  |  |  |
+	           - [    q₆·v   ]|  |  |  |  |  |
+	           -------------- ↓  ↓  |  |  |  |
+	             [    rem    |u₅ u₄]|  |  |  |
+	           - [     q₅q₄·v      ]|  |  |  |
+	           -------------------- ↓  ↓  |  |
+	                   [    rem    |u₃ u₂]|  |
+	                 - [     q₃q₂·v      ]|  |
+	                 -------------------- ↓  ↓
+	                         [    rem    |u₁ u₀]
+	                       - [     q₁q₀·v      ]
+	                       ---------------------
+	                               [    rem    ]
+
+Today, the code in divRecursiveStep works like the first example. Perhaps in
+the future we will make it work like the alternative, to avoid a special case
+in the final iteration.
+
+Either way, each step is a 3-by-2-wide-digit division approximated first by
+a 2-by-1-wide-digit division, just as we did for regular digits in long division.
+Because the actual answer we want is a 3-by-2-wide-digit division, instead of
+multiplying q̂·v directly during the fixup, we can use the quick refinement
+from long division (an n/2-by-n/2 multiply) to correct q to its actual value
+and also compute the remainder (as mentioned above), and then stop after that,
+never doing a full n-by-n multiply.
+
+Instead of using an n-by-n/2-digit division to produce n/2 digits, we can add
+(not discard) one more real digit, doing an (n+1)-by-(n/2+1)-digit division that
+produces n/2+1 digits. That single extra digit tightens the Good Guess Guarantee
+to q ≤ q̂ ≤ q+1 and lets us drop long division's special treatment of the first
+digit. These benefits are discussed more after the Good Guess Guarantee proof
+below.
+
+
+How Fast is Recursive Division?
+
+For a 2n-by-n-digit division, this algorithm runs a 4-by-2 long division over
+wide digits, producing two wide digits plus a possible leading regular digit 1,
+which can be handled without a recursive call. That is, the algorithm uses two
+full iterations, each using an n-by-n/2-digit division and an n/2-by-n/2-digit
+multiplication, along with a few n-digit additions and subtractions. The standard
+n-by-n-digit multiplication algorithm requires O(n²) time, making the overall
+algorithm require time T(n) where
+
+	T(n) = 2T(n/2) + O(n) + O(n²)
+
+which, by the Bentley-Haken-Saxe theorem, ends up reducing to T(n) = O(n²).
+This is not an improvement over regular long division.
+
+When the number of digits n becomes large enough, Karatsuba's algorithm for
+multiplication can be used instead, which takes O(n^log₂3) = O(n^1.6) time.
+(Karatsuba multiplication is implemented in func karatsuba in nat.go.)
+That makes the overall recursive division algorithm take O(n^1.6) time as well,
+which is an improvement, but again only for large enough numbers.
+
+It is not critical to make sure that every recursion does only two recursive
+calls. While in general the number of recursive calls can change the time
+analysis, in this case doing three calls does not change the analysis:
+
+	T(n) = 3T(n/2) + O(n) + O(n^log₂3)
+
+ends up being T(n) = O(n^log₂3). Because the Karatsuba multiplication taking
+time O(n^log₂3) is itself doing 3 half-sized recursions, doing three for the
+division does not hurt the asymptotic performance. Of course, it is likely
+still faster in practice to do two.
+
+
+Proof of the Good Guess Guarantee
+
+Given numbers x, y, let us break them into the quotients and remainders when
+divided by some scaling factor S, with the added constraints that the quotient
+x/y and the high part of y are both less than some limit T, and that the high
+part of y is at least half as big as T.
+
+	x₁ = ⌊x/S⌋        y₁ = ⌊y/S⌋
+	x₀ = x mod S      y₀ = y mod S
+
+	x  = x₁·S + x₀    0 ≤ x₀ < S    x/y < T
+	y  = y₁·S + y₀    0 ≤ y₀ < S    T/2 ≤ y₁ < T
+
+And consider the two truncated quotients:
+
+	q = ⌊x/y⌋
+	q̂ = ⌊x₁/y₁⌋
+
+We will prove that q ≤ q̂ ≤ q+2.
+
+The guarantee makes no real demands on the scaling factor S: it is simply the
+magnitude of the digits cut from both x and y to produce x₁ and y₁.
+The guarantee makes only limited demands on T: it must be large enough to hold
+the quotient x/y, and y₁ must have roughly the same size.
+
+To apply to the earlier discussion of 2-by-1 guesses in long division,
+we would choose:
+
+	S  = Bⁿ⁻¹
+	T  = B
+	x  = u
+	x₁ = uₙuₙ₋₁
+	x₀ = uₙ₋₂...u₀
+	y  = v
+	y₁ = vₙ₋₁
+	y₀ = vₙ₋₂...u₀
+
+These simpler variables avoid repeating those longer expressions in the proof.
+
+Note also that, by definition, truncating division ⌊x/y⌋ satisfies
+
+	x/y - 1 < ⌊x/y⌋ ≤ x/y.
+
+This fact will be used a few times in the proofs.
+
+Proof that q ≤ q̂:
+
+	q̂·y₁ = ⌊x₁/y₁⌋·y₁                      [by definition, q̂ = ⌊x₁/y₁⌋]
+	     > (x₁/y₁ - 1)·y₁                  [x₁/y₁ - 1 < ⌊x₁/y₁⌋]
+	     = x₁ - y₁                         [distribute y₁]
+
+	So q̂·y₁ > x₁ - y₁.
+	Since q̂·y₁ is an integer, q̂·y₁ ≥ x₁ - y₁ + 1.
+
+	q̂ - q = q̂ - ⌊x/y⌋                      [by definition, q = ⌊x/y⌋]
+	      ≥ q̂ - x/y                        [⌊x/y⌋ < x/y]
+	      = (1/y)·(q̂·y - x)                [factor out 1/y]
+	      ≥ (1/y)·(q̂·y₁·S - x)             [y = y₁·S + y₀ ≥ y₁·S]
+	      ≥ (1/y)·((x₁ - y₁ + 1)·S - x)    [above: q̂·y₁ ≥ x₁ - y₁ + 1]
+	      = (1/y)·(x₁·S - y₁·S + S - x)    [distribute S]
+	      = (1/y)·(S - x₀ - y₁·S)          [-x = -x₁·S - x₀]
+	      > -y₁·S / y                      [x₀ < S, so S - x₀ < 0; drop it]
+	      ≥ -1                             [y₁·S ≤ y]
+
+	So q̂ - q > -1.
+	Since q̂ - q is an integer, q̂ - q ≥ 0, or equivalently q ≤ q̂.
+
+Proof that q̂ ≤ q+2:
+
+	x₁/y₁ - x/y = x₁·S/y₁·S - x/y          [multiply left term by S/S]
+	            ≤ x/y₁·S - x/y             [x₁S ≤ x]
+	            = (x/y)·(y/y₁·S - 1)       [factor out x/y]
+	            = (x/y)·((y - y₁·S)/y₁·S)  [move -1 into y/y₁·S fraction]
+	            = (x/y)·(y₀/y₁·S)          [y - y₁·S = y₀]
+	            = (x/y)·(1/y₁)·(y₀/S)      [factor out 1/y₁]
+	            < (x/y)·(1/y₁)             [y₀ < S, so y₀/S < 1]
+	            ≤ (x/y)·(2/T)              [y₁ ≥ T/2, so 1/y₁ ≤ 2/T]
+	            < T·(2/T)                  [x/y < T]
+	            = 2                        [T·(2/T) = 2]
+
+	So x₁/y₁ - x/y < 2.
+
+	q̂ - q = ⌊x₁/y₁⌋ - q                    [by definition, q̂ = ⌊x₁/y₁⌋]
+	      = ⌊x₁/y₁⌋ - ⌊x/y⌋                [by definition, q = ⌊x/y⌋]
+	      ≤ x₁/y₁ - ⌊x/y⌋                  [⌊x₁/y₁⌋ ≤ x₁/y₁]
+	      < x₁/y₁ - (x/y - 1)              [⌊x/y⌋ > x/y - 1]
+	      = (x₁/y₁ - x/y) + 1              [regrouping]
+	      < 2 + 1                          [above: x₁/y₁ - x/y < 2]
+	      = 3
+
+	So q̂ - q < 3.
+	Since q̂ - q is an integer, q̂ - q ≤ 2.
+
+Note that when x/y < T/2, the bounds tighten to x₁/y₁ - x/y < 1 and therefore
+q̂ - q ≤ 1.
+
+Note also that in the general case 2n-by-n division where we don't know that
+x/y < T, we do know that x/y < 2T, yielding the bound q̂ - q ≤ 4. So we could
+remove the special case first step of long division as long as we allow the
+first fixup loop to run up to four times. (Using a simple comparison to decide
+whether the first digit is 0 or 1 is still more efficient, though.)
+
+Finally, note that when dividing three leading base-B digits by two (scaled),
+we have T = B² and x/y < B = T/B, a much tighter bound than x/y < T.
+This in turn yields the much tighter bound x₁/y₁ - x/y < 2/B. This means that
+⌊x₁/y₁⌋ and ⌊x/y⌋ can only differ when x/y is less than 2/B greater than an
+integer. For random x and y, the chance of this is 2/B, or, for large B,
+approximately zero. This means that after we produce the 3-by-2 guess in the
+long division algorithm, the fixup loop essentially never runs.
+
+In the recursive algorithm, the extra digit in (2·⌊n/2⌋+1)-by-(⌊n/2⌋+1)-digit
+division has exactly the same effect: the probability of needing a fixup is the
+same 2/B. Even better, we can allow the general case x/y < 2T and the fixup
+probability only grows to 4/B, still essentially zero.
+
+
+References
+
+There are no great references for implementing long division; thus this comment.
+Here are some notes about what to expect from the obvious references.
+
+Knuth Volume 2 (Seminumerical Algorithms) section 4.3.1 is the usual canonical
+reference for long division, but that entire series is highly compressed, never
+repeating a necessary fact and leaving important insights to the exercises.
+For example, no rationale whatsoever is given for the calculation that extends
+q̂ from a 2-by-1 to a 3-by-2 guess, nor why it reduces the error bound.
+The proof that the calculation even has the desired effect is left to exercises.
+The solutions to those exercises provided at the back of the book are entirely
+calculations, still with no explanation as to what is going on or how you would
+arrive at the idea of doing those exact calculations. Nowhere is it mentioned
+that this test extends the 2-by-1 guess into a 3-by-2 guess. The proof of the
+Good Guess Guarantee is only for the 2-by-1 guess and argues by contradiction,
+making it difficult to understand how modifications like adding another digit
+or adjusting the quotient range affects the overall bound.
+
+All that said, Knuth remains the canonical reference. It is dense but packed
+full of information and references, and the proofs are simpler than many other
+presentations. The proofs above are reworkings of Knuth's to remove the
+arguments by contradiction and add explanations or steps that Knuth omitted.
+But beware of errors in older printings. Take the published errata with you.
+
+Brinch Hansen's “Multiple-length Division Revisited: a Tour of the Minefield”
+starts with a blunt critique of Knuth's presentation (among others) and then
+presents a more detailed and easier to follow treatment of long division,
+including an implementation in Pascal. But the algorithm and implementation
+work entirely in terms of 3-by-2 division, which is much less useful on modern
+hardware than an algorithm using 2-by-1 division. The proofs are a bit too
+focused on digit counting and seem needlessly complex, especially compared to
+the ones given above.
+
+Burnikel and Ziegler's “Fast Recursive Division” introduced the key insight of
+implementing division by an n-digit divisor using recursive calls to division
+by an n/2-digit divisor, relying on Karatsuba multiplication to yield a
+sub-quadratic run time. However, the presentation decisions are made almost
+entirely for the purpose of simplifying the run-time analysis, rather than
+simplifying the presentation. Instead of a single algorithm that loops over
+quotient digits, the paper presents two mutually-recursive algorithms, for
+2n-by-n and 3n-by-2n. The paper also does not present any general (n+m)-by-n
+algorithm.
+
+The proofs in the paper are remarkably complex, especially considering that
+the algorithm is at its core just long division on wide digits, so that the
+usual long division proofs apply essentially unaltered.
+*/
+
+package big
+
+import "math/bits"
+
+// rem returns r such that r = u%v.
+// It uses z as the storage for r.
+func (z nat) rem(u, v nat) (r nat) {
+	if alias(z, u) {
+		z = nil
+	}
+	qp := getNat(0)
+	q, r := qp.div(z, u, v)
+	*qp = q
+	putNat(qp)
+	return r
+}
+
+// div returns q, r such that q = ⌊u/v⌋ and r = u%v = u - q·v.
+// It uses z and z2 as the storage for q and r.
+func (z nat) div(z2, u, v nat) (q, r nat) {
+	if len(v) == 0 {
+		panic("division by zero")
+	}
+
+	if u.cmp(v) < 0 {
+		q = z[:0]
+		r = z2.set(u)
+		return
+	}
+
+	if len(v) == 1 {
+		// Short division: long optimized for a single-word divisor.
+		// In that case, the 2-by-1 guess is all we need at each step.
+		var r2 Word
+		q, r2 = z.divW(u, v[0])
+		r = z2.setWord(r2)
+		return
+	}
+
+	q, r = z.divLarge(z2, u, v)
+	return
+}
+
+// divW returns q, r such that q = ⌊x/y⌋ and r = x%y = x - q·y.
+// It uses z as the storage for q.
+// Note that y is a single digit (Word), not a big number.
+func (z nat) divW(x nat, y Word) (q nat, r Word) {
+	m := len(x)
+	switch {
+	case y == 0:
+		panic("division by zero")
+	case y == 1:
+		q = z.set(x) // result is x
+		return
+	case m == 0:
+		q = z[:0] // result is 0
+		return
+	}
+	// m > 0
+	z = z.make(m)
+	r = divWVW(z, 0, x, y)
+	q = z.norm()
+	return
+}
+
+// modW returns x % d.
+func (x nat) modW(d Word) (r Word) {
+	// TODO(agl): we don't actually need to store the q value.
+	var q nat
+	q = q.make(len(x))
+	return divWVW(q, 0, x, d)
+}
+
+// divWVW overwrites z with ⌊x/y⌋, returning the remainder r.
+// The caller must ensure that len(z) = len(x).
+func divWVW(z []Word, xn Word, x []Word, y Word) (r Word) {
+	r = xn
+	if len(x) == 1 {
+		qq, rr := bits.Div(uint(r), uint(x[0]), uint(y))
+		z[0] = Word(qq)
+		return Word(rr)
+	}
+	rec := reciprocalWord(y)
+	for i := len(z) - 1; i >= 0; i-- {
+		z[i], r = divWW(r, x[i], y, rec)
+	}
+	return r
+}
+
+// div returns q, r such that q = ⌊uIn/vIn⌋ and r = uIn%vIn = uIn - q·vIn.
+// It uses z and u as the storage for q and r.
+// The caller must ensure that len(vIn) ≥ 2 (use divW otherwise)
+// and that len(uIn) ≥ len(vIn) (the answer is 0, uIn otherwise).
+func (z nat) divLarge(u, uIn, vIn nat) (q, r nat) {
+	n := len(vIn)
+	m := len(uIn) - n
+
+	// Scale the inputs so vIn's top bit is 1 (see “Scaling Inputs” above).
+	// vIn is treated as a read-only input (it may be in use by another
+	// goroutine), so we must make a copy.
+	// uIn is copied to u.
+	shift := nlz(vIn[n-1])
+	vp := getNat(n)
+	v := *vp
+	shlVU(v, vIn, shift)
+	u = u.make(len(uIn) + 1)
+	u[len(uIn)] = shlVU(u[0:len(uIn)], uIn, shift)
+
+	// The caller should not pass aliased z and u, since those are
+	// the two different outputs, but correct just in case.
+	if alias(z, u) {
+		z = nil
+	}
+	q = z.make(m + 1)
+
+	// Use basic or recursive long division depending on size.
+	if n < divRecursiveThreshold {
+		q.divBasic(u, v)
+	} else {
+		q.divRecursive(u, v)
+	}
+	putNat(vp)
+
+	q = q.norm()
+
+	// Undo scaling of remainder.
+	shrVU(u, u, shift)
+	r = u.norm()
+
+	return q, r
+}
+
+// divBasic implements long division as described above.
+// It overwrites q with ⌊u/v⌋ and overwrites u with the remainder r.
+// q must be large enough to hold ⌊u/v⌋.
+func (q nat) divBasic(u, v nat) {
+	n := len(v)
+	m := len(u) - n
+
+	qhatvp := getNat(n + 1)
+	qhatv := *qhatvp
+
+	// Set up for divWW below, precomputing reciprocal argument.
+	vn1 := v[n-1]
+	rec := reciprocalWord(vn1)
+
+	// Compute each digit of quotient.
+	for j := m; j >= 0; j-- {
+		// Compute the 2-by-1 guess q̂.
+		// The first iteration must invent a leading 0 for u.
+		qhat := Word(_M)
+		var ujn Word
+		if j+n < len(u) {
+			ujn = u[j+n]
+		}
+
+		// ujn ≤ vn1, or else q̂ would be more than one digit.
+		// For ujn == vn1, we set q̂ to the max digit M above.
+		// Otherwise, we compute the 2-by-1 guess.
+		if ujn != vn1 {
+			var rhat Word
+			qhat, rhat = divWW(ujn, u[j+n-1], vn1, rec)
+
+			// Refine q̂ to a 3-by-2 guess. See “Refining Guesses” above.
+			vn2 := v[n-2]
+			x1, x2 := mulWW(qhat, vn2)
+			ujn2 := u[j+n-2]
+			for greaterThan(x1, x2, rhat, ujn2) { // x1x2 > r̂ u[j+n-2]
+				qhat--
+				prevRhat := rhat
+				rhat += vn1
+				// If r̂  overflows, then
+				// r̂ u[j+n-2]v[n-1] is now definitely > x1 x2.
+				if rhat < prevRhat {
+					break
+				}
+				// TODO(rsc): No need for a full mulWW.
+				// x2 += vn2; if x2 overflows, x1++
+				x1, x2 = mulWW(qhat, vn2)
+			}
+		}
+
+		// Compute q̂·v.
+		qhatv[n] = mulAddVWW(qhatv[0:n], v, qhat, 0)
+		qhl := len(qhatv)
+		if j+qhl > len(u) && qhatv[n] == 0 {
+			qhl--
+		}
+
+		// Subtract q̂·v from the current section of u.
+		// If it underflows, q̂·v > u, which we fix up
+		// by decrementing q̂ and adding v back.
+		c := subVV(u[j:j+qhl], u[j:], qhatv)
+		if c != 0 {
+			c := addVV(u[j:j+n], u[j:], v)
+			// If n == qhl, the carry from subVV and the carry from addVV
+			// cancel out and don't affect u[j+n].
+			if n < qhl {
+				u[j+n] += c
+			}
+			qhat--
+		}
+
+		// Save quotient digit.
+		// Caller may know the top digit is zero and not leave room for it.
+		if j == m && m == len(q) && qhat == 0 {
+			continue
+		}
+		q[j] = qhat
+	}
+
+	putNat(qhatvp)
+}
+
+// greaterThan reports whether the two digit numbers x1 x2 > y1 y2.
+// TODO(rsc): In contradiction to most of this file, x1 is the high
+// digit and x2 is the low digit. This should be fixed.
+func greaterThan(x1, x2, y1, y2 Word) bool {
+	return x1 > y1 || x1 == y1 && x2 > y2
+}
+
+// divRecursiveThreshold is the number of divisor digits
+// at which point divRecursive is faster than divBasic.
+const divRecursiveThreshold = 100
+
+// divRecursive implements recursive division as described above.
+// It overwrites z with ⌊u/v⌋ and overwrites u with the remainder r.
+// z must be large enough to hold ⌊u/v⌋.
+// This function is just for allocating and freeing temporaries
+// around divRecursiveStep, the real implementation.
+func (z nat) divRecursive(u, v nat) {
+	// Recursion depth is (much) less than 2 log₂(len(v)).
+	// Allocate a slice of temporaries to be reused across recursion,
+	// plus one extra temporary not live across the recursion.
+	recDepth := 2 * bits.Len(uint(len(v)))
+	tmp := getNat(3 * len(v))
+	temps := make([]*nat, recDepth)
+
+	z.clear()
+	z.divRecursiveStep(u, v, 0, tmp, temps)
+
+	// Free temporaries.
+	for _, n := range temps {
+		if n != nil {
+			putNat(n)
+		}
+	}
+	putNat(tmp)
+}
+
+// divRecursiveStep is the actual implementation of recursive division.
+// It adds ⌊u/v⌋ to z and overwrites u with the remainder r.
+// z must be large enough to hold ⌊u/v⌋.
+// It uses temps[depth] (allocating if needed) as a temporary live across
+// the recursive call. It also uses tmp, but not live across the recursion.
+func (z nat) divRecursiveStep(u, v nat, depth int, tmp *nat, temps []*nat) {
+	// u is a subsection of the original and may have leading zeros.
+	// TODO(rsc): The v = v.norm() is useless and should be removed.
+	// We know (and require) that v's top digit is ≥ B/2.
+	u = u.norm()
+	v = v.norm()
+	if len(u) == 0 {
+		z.clear()
+		return
+	}
+
+	// Fall back to basic division if the problem is now small enough.
+	n := len(v)
+	if n < divRecursiveThreshold {
+		z.divBasic(u, v)
+		return
+	}
+
+	// Nothing to do if u is shorter than v (implies u < v).
+	m := len(u) - n
+	if m < 0 {
+		return
+	}
+
+	// We consider B digits in a row as a single wide digit.
+	// (See “Recursive Division” above.)
+	//
+	// TODO(rsc): rename B to Wide, to avoid confusion with _B,
+	// which is something entirely different.
+	// TODO(rsc): Look into whether using ⌈n/2⌉ is better than ⌊n/2⌋.
+	B := n / 2
+
+	// Allocate a nat for qhat below.
+	if temps[depth] == nil {
+		temps[depth] = getNat(n) // TODO(rsc): Can be just B+1.
+	} else {
+		*temps[depth] = temps[depth].make(B + 1)
+	}
+
+	// Compute each wide digit of the quotient.
+	//
+	// TODO(rsc): Change the loop to be
+	//	for j := (m+B-1)/B*B; j > 0; j -= B {
+	// which will make the final step a regular step, letting us
+	// delete what amounts to an extra copy of the loop body below.
+	j := m
+	for j > B {
+		// Divide u[j-B:j+n] (3 wide digits) by v (2 wide digits).
+		// First make the 2-by-1-wide-digit guess using a recursive call.
+		// Then extend the guess to the full 3-by-2 (see “Refining Guesses”).
+		//
+		// For the 2-by-1-wide-digit guess, instead of doing 2B-by-B-digit,
+		// we use a (2B+1)-by-(B+1) digit, which handles the possibility that
+		// the result has an extra leading 1 digit as well as guaranteeing
+		// that the computed q̂ will be off by at most 1 instead of 2.
+
+		// s is the number of digits to drop from the 3B- and 2B-digit chunks.
+		// We drop B-1 to be left with 2B+1 and B+1.
+		s := (B - 1)
+
+		// uu is the up-to-3B-digit section of u we are working on.
+		uu := u[j-B:]
+
+		// Compute the 2-by-1 guess q̂, leaving r̂ in uu[s:B+n].
+		qhat := *temps[depth]
+		qhat.clear()
+		qhat.divRecursiveStep(uu[s:B+n], v[s:], depth+1, tmp, temps)
+		qhat = qhat.norm()
+
+		// Extend to a 3-by-2 quotient and remainder.
+		// Because divRecursiveStep overwrote the top part of uu with
+		// the remainder r̂, the full uu already contains the equivalent
+		// of r̂·B + uₙ₋₂ from the “Refining Guesses” discussion.
+		// Subtracting q̂·vₙ₋₂ from it will compute the full-length remainder.
+		// If that subtraction underflows, q̂·v > u, which we fix up
+		// by decrementing q̂ and adding v back, same as in long division.
+
+		// TODO(rsc): Instead of subtract and fix-up, this code is computing
+		// q̂·vₙ₋₂ and decrementing q̂ until that product is ≤ u.
+		// But we can do the subtraction directly, as in the comment above
+		// and in long division, because we know that q̂ is wrong by at most one.
+		qhatv := tmp.make(3 * n)
+		qhatv.clear()
+		qhatv = qhatv.mul(qhat, v[:s])
+		for i := 0; i < 2; i++ {
+			e := qhatv.cmp(uu.norm())
+			if e <= 0 {
+				break
+			}
+			subVW(qhat, qhat, 1)
+			c := subVV(qhatv[:s], qhatv[:s], v[:s])
+			if len(qhatv) > s {
+				subVW(qhatv[s:], qhatv[s:], c)
+			}
+			addAt(uu[s:], v[s:], 0)
+		}
+		if qhatv.cmp(uu.norm()) > 0 {
+			panic("impossible")
+		}
+		c := subVV(uu[:len(qhatv)], uu[:len(qhatv)], qhatv)
+		if c > 0 {
+			subVW(uu[len(qhatv):], uu[len(qhatv):], c)
+		}
+		addAt(z, qhat, j-B)
+		j -= B
+	}
+
+	// TODO(rsc): Rewrite loop as described above and delete all this code.
+
+	// Now u < (v<<B), compute lower bits in the same way.
+	// Choose shift = B-1 again.
+	s := B - 1
+	qhat := *temps[depth]
+	qhat.clear()
+	qhat.divRecursiveStep(u[s:].norm(), v[s:], depth+1, tmp, temps)
+	qhat = qhat.norm()
+	qhatv := tmp.make(3 * n)
+	qhatv.clear()
+	qhatv = qhatv.mul(qhat, v[:s])
+	// Set the correct remainder as before.
+	for i := 0; i < 2; i++ {
+		if e := qhatv.cmp(u.norm()); e > 0 {
+			subVW(qhat, qhat, 1)
+			c := subVV(qhatv[:s], qhatv[:s], v[:s])
+			if len(qhatv) > s {
+				subVW(qhatv[s:], qhatv[s:], c)
+			}
+			addAt(u[s:], v[s:], 0)
+		}
+	}
+	if qhatv.cmp(u.norm()) > 0 {
+		panic("impossible")
+	}
+	c := subVV(u[0:len(qhatv)], u[0:len(qhatv)], qhatv)
+	if c > 0 {
+		c = subVW(u[len(qhatv):], u[len(qhatv):], c)
+	}
+	if c > 0 {
+		panic("impossible")
+	}
+
+	// Done!
+	addAt(z, qhat.norm(), 0)
+}
diff --git a/src/math/big/prime.go b/src/math/big/prime.go
new file mode 100644
index 0000000..26688bb
--- /dev/null
+++ b/src/math/big/prime.go
@@ -0,0 +1,320 @@
+// Copyright 2016 The Go Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+package big
+
+import "math/rand"
+
+// ProbablyPrime reports whether x is probably prime,
+// applying the Miller-Rabin test with n pseudorandomly chosen bases
+// as well as a Baillie-PSW test.
+//
+// If x is prime, ProbablyPrime returns true.
+// If x is chosen randomly and not prime, ProbablyPrime probably returns false.
+// The probability of returning true for a randomly chosen non-prime is at most ¼ⁿ.
+//
+// ProbablyPrime is 100% accurate for inputs less than 2⁶⁴.
+// See Menezes et al., Handbook of Applied Cryptography, 1997, pp. 145-149,
+// and FIPS 186-4 Appendix F for further discussion of the error probabilities.
+//
+// ProbablyPrime is not suitable for judging primes that an adversary may
+// have crafted to fool the test.
+//
+// As of Go 1.8, ProbablyPrime(0) is allowed and applies only a Baillie-PSW test.
+// Before Go 1.8, ProbablyPrime applied only the Miller-Rabin tests, and ProbablyPrime(0) panicked.
+func (x *Int) ProbablyPrime(n int) bool {
+	// Note regarding the doc comment above:
+	// It would be more precise to say that the Baillie-PSW test uses the
+	// extra strong Lucas test as its Lucas test, but since no one knows
+	// how to tell any of the Lucas tests apart inside a Baillie-PSW test
+	// (they all work equally well empirically), that detail need not be
+	// documented or implicitly guaranteed.
+	// The comment does avoid saying "the" Baillie-PSW test
+	// because of this general ambiguity.
+
+	if n < 0 {
+		panic("negative n for ProbablyPrime")
+	}
+	if x.neg || len(x.abs) == 0 {
+		return false
+	}
+
+	// primeBitMask records the primes < 64.
+	const primeBitMask uint64 = 1<<2 | 1<<3 | 1<<5 | 1<<7 |
+		1<<11 | 1<<13 | 1<<17 | 1<<19 | 1<<23 | 1<<29 | 1<<31 |
+		1<<37 | 1<<41 | 1<<43 | 1<<47 | 1<<53 | 1<<59 | 1<<61
+
+	w := x.abs[0]
+	if len(x.abs) == 1 && w < 64 {
+		return primeBitMask&(1<<w) != 0
+	}
+
+	if w&1 == 0 {
+		return false // x is even
+	}
+
+	const primesA = 3 * 5 * 7 * 11 * 13 * 17 * 19 * 23 * 37
+	const primesB = 29 * 31 * 41 * 43 * 47 * 53
+
+	var rA, rB uint32
+	switch _W {
+	case 32:
+		rA = uint32(x.abs.modW(primesA))
+		rB = uint32(x.abs.modW(primesB))
+	case 64:
+		r := x.abs.modW((primesA * primesB) & _M)
+		rA = uint32(r % primesA)
+		rB = uint32(r % primesB)
+	default:
+		panic("math/big: invalid word size")
+	}
+
+	if rA%3 == 0 || rA%5 == 0 || rA%7 == 0 || rA%11 == 0 || rA%13 == 0 || rA%17 == 0 || rA%19 == 0 || rA%23 == 0 || rA%37 == 0 ||
+		rB%29 == 0 || rB%31 == 0 || rB%41 == 0 || rB%43 == 0 || rB%47 == 0 || rB%53 == 0 {
+		return false
+	}
+
+	return x.abs.probablyPrimeMillerRabin(n+1, true) && x.abs.probablyPrimeLucas()
+}
+
+// probablyPrimeMillerRabin reports whether n passes reps rounds of the
+// Miller-Rabin primality test, using pseudo-randomly chosen bases.
+// If force2 is true, one of the rounds is forced to use base 2.
+// See Handbook of Applied Cryptography, p. 139, Algorithm 4.24.
+// The number n is known to be non-zero.
+func (n nat) probablyPrimeMillerRabin(reps int, force2 bool) bool {
+	nm1 := nat(nil).sub(n, natOne)
+	// determine q, k such that nm1 = q << k
+	k := nm1.trailingZeroBits()
+	q := nat(nil).shr(nm1, k)
+
+	nm3 := nat(nil).sub(nm1, natTwo)
+	rand := rand.New(rand.NewSource(int64(n[0])))
+
+	var x, y, quotient nat
+	nm3Len := nm3.bitLen()
+
+NextRandom:
+	for i := 0; i < reps; i++ {
+		if i == reps-1 && force2 {
+			x = x.set(natTwo)
+		} else {
+			x = x.random(rand, nm3, nm3Len)
+			x = x.add(x, natTwo)
+		}
+		y = y.expNN(x, q, n, false)
+		if y.cmp(natOne) == 0 || y.cmp(nm1) == 0 {
+			continue
+		}
+		for j := uint(1); j < k; j++ {
+			y = y.sqr(y)
+			quotient, y = quotient.div(y, y, n)
+			if y.cmp(nm1) == 0 {
+				continue NextRandom
+			}
+			if y.cmp(natOne) == 0 {
+				return false
+			}
+		}
+		return false
+	}
+
+	return true
+}
+
+// probablyPrimeLucas reports whether n passes the "almost extra strong" Lucas probable prime test,
+// using Baillie-OEIS parameter selection. This corresponds to "AESLPSP" on Jacobsen's tables (link below).
+// The combination of this test and a Miller-Rabin/Fermat test with base 2 gives a Baillie-PSW test.
+//
+// References:
+//
+// Baillie and Wagstaff, "Lucas Pseudoprimes", Mathematics of Computation 35(152),
+// October 1980, pp. 1391-1417, especially page 1401.
+// https://www.ams.org/journals/mcom/1980-35-152/S0025-5718-1980-0583518-6/S0025-5718-1980-0583518-6.pdf
+//
+// Grantham, "Frobenius Pseudoprimes", Mathematics of Computation 70(234),
+// March 2000, pp. 873-891.
+// https://www.ams.org/journals/mcom/2001-70-234/S0025-5718-00-01197-2/S0025-5718-00-01197-2.pdf
+//
+// Baillie, "Extra strong Lucas pseudoprimes", OEIS A217719, https://oeis.org/A217719.
+//
+// Jacobsen, "Pseudoprime Statistics, Tables, and Data", http://ntheory.org/pseudoprimes.html.
+//
+// Nicely, "The Baillie-PSW Primality Test", https://web.archive.org/web/20191121062007/http://www.trnicely.net/misc/bpsw.html.
+// (Note that Nicely's definition of the "extra strong" test gives the wrong Jacobi condition,
+// as pointed out by Jacobsen.)
+//
+// Crandall and Pomerance, Prime Numbers: A Computational Perspective, 2nd ed.
+// Springer, 2005.
+func (n nat) probablyPrimeLucas() bool {
+	// Discard 0, 1.
+	if len(n) == 0 || n.cmp(natOne) == 0 {
+		return false
+	}
+	// Two is the only even prime.
+	// Already checked by caller, but here to allow testing in isolation.
+	if n[0]&1 == 0 {
+		return n.cmp(natTwo) == 0
+	}
+
+	// Baillie-OEIS "method C" for choosing D, P, Q,
+	// as in https://oeis.org/A217719/a217719.txt:
+	// try increasing P ≥ 3 such that D = P² - 4 (so Q = 1)
+	// until Jacobi(D, n) = -1.
+	// The search is expected to succeed for non-square n after just a few trials.
+	// After more than expected failures, check whether n is square
+	// (which would cause Jacobi(D, n) = 1 for all D not dividing n).
+	p := Word(3)
+	d := nat{1}
+	t1 := nat(nil) // temp
+	intD := &Int{abs: d}
+	intN := &Int{abs: n}
+	for ; ; p++ {
+		if p > 10000 {
+			// This is widely believed to be impossible.
+			// If we get a report, we'll want the exact number n.
+			panic("math/big: internal error: cannot find (D/n) = -1 for " + intN.String())
+		}
+		d[0] = p*p - 4
+		j := Jacobi(intD, intN)
+		if j == -1 {
+			break
+		}
+		if j == 0 {
+			// d = p²-4 = (p-2)(p+2).
+			// If (d/n) == 0 then d shares a prime factor with n.
+			// Since the loop proceeds in increasing p and starts with p-2==1,
+			// the shared prime factor must be p+2.
+			// If p+2 == n, then n is prime; otherwise p+2 is a proper factor of n.
+			return len(n) == 1 && n[0] == p+2
+		}
+		if p == 40 {
+			// We'll never find (d/n) = -1 if n is a square.
+			// If n is a non-square we expect to find a d in just a few attempts on average.
+			// After 40 attempts, take a moment to check if n is indeed a square.
+			t1 = t1.sqrt(n)
+			t1 = t1.sqr(t1)
+			if t1.cmp(n) == 0 {
+				return false
+			}
+		}
+	}
+
+	// Grantham definition of "extra strong Lucas pseudoprime", after Thm 2.3 on p. 876
+	// (D, P, Q above have become Δ, b, 1):
+	//
+	// Let U_n = U_n(b, 1), V_n = V_n(b, 1), and Δ = b²-4.
+	// An extra strong Lucas pseudoprime to base b is a composite n = 2^r s + Jacobi(Δ, n),
+	// where s is odd and gcd(n, 2*Δ) = 1, such that either (i) U_s ≡ 0 mod n and V_s ≡ ±2 mod n,
+	// or (ii) V_{2^t s} ≡ 0 mod n for some 0 ≤ t < r-1.
+	//
+	// We know gcd(n, Δ) = 1 or else we'd have found Jacobi(d, n) == 0 above.
+	// We know gcd(n, 2) = 1 because n is odd.
+	//
+	// Arrange s = (n - Jacobi(Δ, n)) / 2^r = (n+1) / 2^r.
+	s := nat(nil).add(n, natOne)
+	r := int(s.trailingZeroBits())
+	s = s.shr(s, uint(r))
+	nm2 := nat(nil).sub(n, natTwo) // n-2
+
+	// We apply the "almost extra strong" test, which checks the above conditions
+	// except for U_s ≡ 0 mod n, which allows us to avoid computing any U_k values.
+	// Jacobsen points out that maybe we should just do the full extra strong test:
+	// "It is also possible to recover U_n using Crandall and Pomerance equation 3.13:
+	// U_n = D^-1 (2V_{n+1} - PV_n) allowing us to run the full extra-strong test
+	// at the cost of a single modular inversion. This computation is easy and fast in GMP,
+	// so we can get the full extra-strong test at essentially the same performance as the
+	// almost extra strong test."
+
+	// Compute Lucas sequence V_s(b, 1), where:
+	//
+	//	V(0) = 2
+	//	V(1) = P
+	//	V(k) = P V(k-1) - Q V(k-2).
+	//
+	// (Remember that due to method C above, P = b, Q = 1.)
+	//
+	// In general V(k) = α^k + β^k, where α and β are roots of x² - Px + Q.
+	// Crandall and Pomerance (p.147) observe that for 0 ≤ j ≤ k,
+	//
+	//	V(j+k) = V(j)V(k) - V(k-j).
+	//
+	// So in particular, to quickly double the subscript:
+	//
+	//	V(2k) = V(k)² - 2
+	//	V(2k+1) = V(k) V(k+1) - P
+	//
+	// We can therefore start with k=0 and build up to k=s in log₂(s) steps.
+	natP := nat(nil).setWord(p)
+	vk := nat(nil).setWord(2)
+	vk1 := nat(nil).setWord(p)
+	t2 := nat(nil) // temp
+	for i := int(s.bitLen()); i >= 0; i-- {
+		if s.bit(uint(i)) != 0 {
+			// k' = 2k+1
+			// V(k') = V(2k+1) = V(k) V(k+1) - P.
+			t1 = t1.mul(vk, vk1)
+			t1 = t1.add(t1, n)
+			t1 = t1.sub(t1, natP)
+			t2, vk = t2.div(vk, t1, n)
+			// V(k'+1) = V(2k+2) = V(k+1)² - 2.
+			t1 = t1.sqr(vk1)
+			t1 = t1.add(t1, nm2)
+			t2, vk1 = t2.div(vk1, t1, n)
+		} else {
+			// k' = 2k
+			// V(k'+1) = V(2k+1) = V(k) V(k+1) - P.
+			t1 = t1.mul(vk, vk1)
+			t1 = t1.add(t1, n)
+			t1 = t1.sub(t1, natP)
+			t2, vk1 = t2.div(vk1, t1, n)
+			// V(k') = V(2k) = V(k)² - 2
+			t1 = t1.sqr(vk)
+			t1 = t1.add(t1, nm2)
+			t2, vk = t2.div(vk, t1, n)
+		}
+	}
+
+	// Now k=s, so vk = V(s). Check V(s) ≡ ±2 (mod n).
+	if vk.cmp(natTwo) == 0 || vk.cmp(nm2) == 0 {
+		// Check U(s) ≡ 0.
+		// As suggested by Jacobsen, apply Crandall and Pomerance equation 3.13:
+		//
+		//	U(k) = D⁻¹ (2 V(k+1) - P V(k))
+		//
+		// Since we are checking for U(k) == 0 it suffices to check 2 V(k+1) == P V(k) mod n,
+		// or P V(k) - 2 V(k+1) == 0 mod n.
+		t1 := t1.mul(vk, natP)
+		t2 := t2.shl(vk1, 1)
+		if t1.cmp(t2) < 0 {
+			t1, t2 = t2, t1
+		}
+		t1 = t1.sub(t1, t2)
+		t3 := vk1 // steal vk1, no longer needed below
+		vk1 = nil
+		_ = vk1
+		t2, t3 = t2.div(t3, t1, n)
+		if len(t3) == 0 {
+			return true
+		}
+	}
+
+	// Check V(2^t s) ≡ 0 mod n for some 0 ≤ t < r-1.
+	for t := 0; t < r-1; t++ {
+		if len(vk) == 0 { // vk == 0
+			return true
+		}
+		// Optimization: V(k) = 2 is a fixed point for V(k') = V(k)² - 2,
+		// so if V(k) = 2, we can stop: we will never find a future V(k) == 0.
+		if len(vk) == 1 && vk[0] == 2 { // vk == 2
+			return false
+		}
+		// k' = 2k
+		// V(k') = V(2k) = V(k)² - 2
+		t1 = t1.sqr(vk)
+		t1 = t1.sub(t1, natTwo)
+		t2, vk = t2.div(vk, t1, n)
+	}
+	return false
+}
diff --git a/src/math/big/prime_test.go b/src/math/big/prime_test.go
new file mode 100644
index 0000000..8596e33
--- /dev/null
+++ b/src/math/big/prime_test.go
@@ -0,0 +1,222 @@
+// Copyright 2016 The Go Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+package big
+
+import (
+	"fmt"
+	"strings"
+	"testing"
+	"unicode"
+)
+
+var primes = []string{
+	"2",
+	"3",
+	"5",
+	"7",
+	"11",
+
+	"13756265695458089029",
+	"13496181268022124907",
+	"10953742525620032441",
+	"17908251027575790097",
+
+	// https://golang.org/issue/638
+	"18699199384836356663",
+
+	"98920366548084643601728869055592650835572950932266967461790948584315647051443",
+	"94560208308847015747498523884063394671606671904944666360068158221458669711639",
+
+	// https://primes.utm.edu/lists/small/small3.html
+	"449417999055441493994709297093108513015373787049558499205492347871729927573118262811508386655998299074566974373711472560655026288668094291699357843464363003144674940345912431129144354948751003607115263071543163",
+	"230975859993204150666423538988557839555560243929065415434980904258310530753006723857139742334640122533598517597674807096648905501653461687601339782814316124971547968912893214002992086353183070342498989426570593",
+	"5521712099665906221540423207019333379125265462121169655563495403888449493493629943498064604536961775110765377745550377067893607246020694972959780839151452457728855382113555867743022746090187341871655890805971735385789993",
+	"203956878356401977405765866929034577280193993314348263094772646453283062722701277632936616063144088173312372882677123879538709400158306567338328279154499698366071906766440037074217117805690872792848149112022286332144876183376326512083574821647933992961249917319836219304274280243803104015000563790123",
+
+	// ECC primes: https://tools.ietf.org/html/draft-ladd-safecurves-02
+	"3618502788666131106986593281521497120414687020801267626233049500247285301239",                                                                                  // Curve1174: 2^251-9
+	"57896044618658097711785492504343953926634992332820282019728792003956564819949",                                                                                 // Curve25519: 2^255-19
+	"9850501549098619803069760025035903451269934817616361666987073351061430442874302652853566563721228910201656997576599",                                           // E-382: 2^382-105
+	"42307582002575910332922579714097346549017899709713998034217522897561970639123926132812109468141778230245837569601494931472367",                                 // Curve41417: 2^414-17
+	"6864797660130609714981900799081393217269435300143305409394463459185543183397656052122559640661454554977296311391480858037121987999716643812574028291115057151", // E-521: 2^521-1
+}
+
+var composites = []string{
+	"0",
+	"1",
+	"21284175091214687912771199898307297748211672914763848041968395774954376176754",
+	"6084766654921918907427900243509372380954290099172559290432744450051395395951",
+	"84594350493221918389213352992032324280367711247940675652888030554255915464401",
+	"82793403787388584738507275144194252681",
+
+	// Arnault, "Rabin-Miller Primality Test: Composite Numbers Which Pass It",
+	// Mathematics of Computation, 64(209) (January 1995), pp. 335-361.
+	"1195068768795265792518361315725116351898245581", // strong pseudoprime to prime bases 2 through 29
+	// strong pseudoprime to all prime bases up to 200
+	`
+     80383745745363949125707961434194210813883768828755814583748891752229
+      74273765333652186502336163960045457915042023603208766569966760987284
+       0439654082329287387918508691668573282677617710293896977394701670823
+        0428687109997439976544144845341155872450633409279022275296229414984
+         2306881685404326457534018329786111298960644845216191652872597534901`,
+
+	// Extra-strong Lucas pseudoprimes. https://oeis.org/A217719
+	"989",
+	"3239",
+	"5777",
+	"10877",
+	"27971",
+	"29681",
+	"30739",
+	"31631",
+	"39059",
+	"72389",
+	"73919",
+	"75077",
+	"100127",
+	"113573",
+	"125249",
+	"137549",
+	"137801",
+	"153931",
+	"155819",
+	"161027",
+	"162133",
+	"189419",
+	"218321",
+	"231703",
+	"249331",
+	"370229",
+	"429479",
+	"430127",
+	"459191",
+	"473891",
+	"480689",
+	"600059",
+	"621781",
+	"632249",
+	"635627",
+
+	"3673744903",
+	"3281593591",
+	"2385076987",
+	"2738053141",
+	"2009621503",
+	"1502682721",
+	"255866131",
+	"117987841",
+	"587861",
+
+	"6368689",
+	"8725753",
+	"80579735209",
+	"105919633",
+}
+
+func cutSpace(r rune) rune {
+	if unicode.IsSpace(r) {
+		return -1
+	}
+	return r
+}
+
+func TestProbablyPrime(t *testing.T) {
+	nreps := 20
+	if testing.Short() {
+		nreps = 1
+	}
+	for i, s := range primes {
+		p, _ := new(Int).SetString(s, 10)
+		if !p.ProbablyPrime(nreps) || nreps != 1 && !p.ProbablyPrime(1) || !p.ProbablyPrime(0) {
+			t.Errorf("#%d prime found to be non-prime (%s)", i, s)
+		}
+	}
+
+	for i, s := range composites {
+		s = strings.Map(cutSpace, s)
+		c, _ := new(Int).SetString(s, 10)
+		if c.ProbablyPrime(nreps) || nreps != 1 && c.ProbablyPrime(1) || c.ProbablyPrime(0) {
+			t.Errorf("#%d composite found to be prime (%s)", i, s)
+		}
+	}
+
+	// check that ProbablyPrime panics if n <= 0
+	c := NewInt(11) // a prime
+	for _, n := range []int{-1, 0, 1} {
+		func() {
+			defer func() {
+				if n < 0 && recover() == nil {
+					t.Fatalf("expected panic from ProbablyPrime(%d)", n)
+				}
+			}()
+			if !c.ProbablyPrime(n) {
+				t.Fatalf("%v should be a prime", c)
+			}
+		}()
+	}
+}
+
+func BenchmarkProbablyPrime(b *testing.B) {
+	p, _ := new(Int).SetString("203956878356401977405765866929034577280193993314348263094772646453283062722701277632936616063144088173312372882677123879538709400158306567338328279154499698366071906766440037074217117805690872792848149112022286332144876183376326512083574821647933992961249917319836219304274280243803104015000563790123", 10)
+	for _, n := range []int{0, 1, 5, 10, 20} {
+		b.Run(fmt.Sprintf("n=%d", n), func(b *testing.B) {
+			for i := 0; i < b.N; i++ {
+				p.ProbablyPrime(n)
+			}
+		})
+	}
+
+	b.Run("Lucas", func(b *testing.B) {
+		for i := 0; i < b.N; i++ {
+			p.abs.probablyPrimeLucas()
+		}
+	})
+	b.Run("MillerRabinBase2", func(b *testing.B) {
+		for i := 0; i < b.N; i++ {
+			p.abs.probablyPrimeMillerRabin(1, true)
+		}
+	})
+}
+
+func TestMillerRabinPseudoprimes(t *testing.T) {
+	testPseudoprimes(t, "probablyPrimeMillerRabin",
+		func(n nat) bool { return n.probablyPrimeMillerRabin(1, true) && !n.probablyPrimeLucas() },
+		// https://oeis.org/A001262
+		[]int{2047, 3277, 4033, 4681, 8321, 15841, 29341, 42799, 49141, 52633, 65281, 74665, 80581, 85489, 88357, 90751})
+}
+
+func TestLucasPseudoprimes(t *testing.T) {
+	testPseudoprimes(t, "probablyPrimeLucas",
+		func(n nat) bool { return n.probablyPrimeLucas() && !n.probablyPrimeMillerRabin(1, true) },
+		// https://oeis.org/A217719
+		[]int{989, 3239, 5777, 10877, 27971, 29681, 30739, 31631, 39059, 72389, 73919, 75077})
+}
+
+func testPseudoprimes(t *testing.T, name string, cond func(nat) bool, want []int) {
+	n := nat{1}
+	for i := 3; i < 100000; i += 2 {
+		if testing.Short() {
+			if len(want) == 0 {
+				break
+			}
+			if i < want[0]-2 {
+				i = want[0] - 2
+			}
+		}
+		n[0] = Word(i)
+		pseudo := cond(n)
+		if pseudo && (len(want) == 0 || i != want[0]) {
+			t.Errorf("%s(%v, base=2) = true, want false", name, i)
+		} else if !pseudo && len(want) >= 1 && i == want[0] {
+			t.Errorf("%s(%v, base=2) = false, want true", name, i)
+		}
+		if len(want) > 0 && i == want[0] {
+			want = want[1:]
+		}
+	}
+	if len(want) > 0 {
+		t.Fatalf("forgot to test %v", want)
+	}
+}
diff --git a/src/math/big/rat.go b/src/math/big/rat.go
new file mode 100644
index 0000000..700a643
--- /dev/null
+++ b/src/math/big/rat.go
@@ -0,0 +1,542 @@
+// Copyright 2010 The Go Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+// This file implements multi-precision rational numbers.
+
+package big
+
+import (
+	"fmt"
+	"math"
+)
+
+// A Rat represents a quotient a/b of arbitrary precision.
+// The zero value for a Rat represents the value 0.
+//
+// Operations always take pointer arguments (*Rat) rather
+// than Rat values, and each unique Rat value requires
+// its own unique *Rat pointer. To "copy" a Rat value,
+// an existing (or newly allocated) Rat must be set to
+// a new value using the Rat.Set method; shallow copies
+// of Rats are not supported and may lead to errors.
+type Rat struct {
+	// To make zero values for Rat work w/o initialization,
+	// a zero value of b (len(b) == 0) acts like b == 1. At
+	// the earliest opportunity (when an assignment to the Rat
+	// is made), such uninitialized denominators are set to 1.
+	// a.neg determines the sign of the Rat, b.neg is ignored.
+	a, b Int
+}
+
+// NewRat creates a new Rat with numerator a and denominator b.
+func NewRat(a, b int64) *Rat {
+	return new(Rat).SetFrac64(a, b)
+}
+
+// SetFloat64 sets z to exactly f and returns z.
+// If f is not finite, SetFloat returns nil.
+func (z *Rat) SetFloat64(f float64) *Rat {
+	const expMask = 1<<11 - 1
+	bits := math.Float64bits(f)
+	mantissa := bits & (1<<52 - 1)
+	exp := int((bits >> 52) & expMask)
+	switch exp {
+	case expMask: // non-finite
+		return nil
+	case 0: // denormal
+		exp -= 1022
+	default: // normal
+		mantissa |= 1 << 52
+		exp -= 1023
+	}
+
+	shift := 52 - exp
+
+	// Optimization (?): partially pre-normalise.
+	for mantissa&1 == 0 && shift > 0 {
+		mantissa >>= 1
+		shift--
+	}
+
+	z.a.SetUint64(mantissa)
+	z.a.neg = f < 0
+	z.b.Set(intOne)
+	if shift > 0 {
+		z.b.Lsh(&z.b, uint(shift))
+	} else {
+		z.a.Lsh(&z.a, uint(-shift))
+	}
+	return z.norm()
+}
+
+// quotToFloat32 returns the non-negative float32 value
+// nearest to the quotient a/b, using round-to-even in
+// halfway cases. It does not mutate its arguments.
+// Preconditions: b is non-zero; a and b have no common factors.
+func quotToFloat32(a, b nat) (f float32, exact bool) {
+	const (
+		// float size in bits
+		Fsize = 32
+
+		// mantissa
+		Msize  = 23
+		Msize1 = Msize + 1 // incl. implicit 1
+		Msize2 = Msize1 + 1
+
+		// exponent
+		Esize = Fsize - Msize1
+		Ebias = 1<<(Esize-1) - 1
+		Emin  = 1 - Ebias
+		Emax  = Ebias
+	)
+
+	// TODO(adonovan): specialize common degenerate cases: 1.0, integers.
+	alen := a.bitLen()
+	if alen == 0 {
+		return 0, true
+	}
+	blen := b.bitLen()
+	if blen == 0 {
+		panic("division by zero")
+	}
+
+	// 1. Left-shift A or B such that quotient A/B is in [1<<Msize1, 1<<(Msize2+1)
+	// (Msize2 bits if A < B when they are left-aligned, Msize2+1 bits if A >= B).
+	// This is 2 or 3 more than the float32 mantissa field width of Msize:
+	// - the optional extra bit is shifted away in step 3 below.
+	// - the high-order 1 is omitted in "normal" representation;
+	// - the low-order 1 will be used during rounding then discarded.
+	exp := alen - blen
+	var a2, b2 nat
+	a2 = a2.set(a)
+	b2 = b2.set(b)
+	if shift := Msize2 - exp; shift > 0 {
+		a2 = a2.shl(a2, uint(shift))
+	} else if shift < 0 {
+		b2 = b2.shl(b2, uint(-shift))
+	}
+
+	// 2. Compute quotient and remainder (q, r).  NB: due to the
+	// extra shift, the low-order bit of q is logically the
+	// high-order bit of r.
+	var q nat
+	q, r := q.div(a2, a2, b2) // (recycle a2)
+	mantissa := low32(q)
+	haveRem := len(r) > 0 // mantissa&1 && !haveRem => remainder is exactly half
+
+	// 3. If quotient didn't fit in Msize2 bits, redo division by b2<<1
+	// (in effect---we accomplish this incrementally).
+	if mantissa>>Msize2 == 1 {
+		if mantissa&1 == 1 {
+			haveRem = true
+		}
+		mantissa >>= 1
+		exp++
+	}
+	if mantissa>>Msize1 != 1 {
+		panic(fmt.Sprintf("expected exactly %d bits of result", Msize2))
+	}
+
+	// 4. Rounding.
+	if Emin-Msize <= exp && exp <= Emin {
+		// Denormal case; lose 'shift' bits of precision.
+		shift := uint(Emin - (exp - 1)) // [1..Esize1)
+		lostbits := mantissa & (1<<shift - 1)
+		haveRem = haveRem || lostbits != 0
+		mantissa >>= shift
+		exp = 2 - Ebias // == exp + shift
+	}
+	// Round q using round-half-to-even.
+	exact = !haveRem
+	if mantissa&1 != 0 {
+		exact = false
+		if haveRem || mantissa&2 != 0 {
+			if mantissa++; mantissa >= 1<<Msize2 {
+				// Complete rollover 11...1 => 100...0, so shift is safe
+				mantissa >>= 1
+				exp++
+			}
+		}
+	}
+	mantissa >>= 1 // discard rounding bit.  Mantissa now scaled by 1<<Msize1.
+
+	f = float32(math.Ldexp(float64(mantissa), exp-Msize1))
+	if math.IsInf(float64(f), 0) {
+		exact = false
+	}
+	return
+}
+
+// quotToFloat64 returns the non-negative float64 value
+// nearest to the quotient a/b, using round-to-even in
+// halfway cases. It does not mutate its arguments.
+// Preconditions: b is non-zero; a and b have no common factors.
+func quotToFloat64(a, b nat) (f float64, exact bool) {
+	const (
+		// float size in bits
+		Fsize = 64
+
+		// mantissa
+		Msize  = 52
+		Msize1 = Msize + 1 // incl. implicit 1
+		Msize2 = Msize1 + 1
+
+		// exponent
+		Esize = Fsize - Msize1
+		Ebias = 1<<(Esize-1) - 1
+		Emin  = 1 - Ebias
+		Emax  = Ebias
+	)
+
+	// TODO(adonovan): specialize common degenerate cases: 1.0, integers.
+	alen := a.bitLen()
+	if alen == 0 {
+		return 0, true
+	}
+	blen := b.bitLen()
+	if blen == 0 {
+		panic("division by zero")
+	}
+
+	// 1. Left-shift A or B such that quotient A/B is in [1<<Msize1, 1<<(Msize2+1)
+	// (Msize2 bits if A < B when they are left-aligned, Msize2+1 bits if A >= B).
+	// This is 2 or 3 more than the float64 mantissa field width of Msize:
+	// - the optional extra bit is shifted away in step 3 below.
+	// - the high-order 1 is omitted in "normal" representation;
+	// - the low-order 1 will be used during rounding then discarded.
+	exp := alen - blen
+	var a2, b2 nat
+	a2 = a2.set(a)
+	b2 = b2.set(b)
+	if shift := Msize2 - exp; shift > 0 {
+		a2 = a2.shl(a2, uint(shift))
+	} else if shift < 0 {
+		b2 = b2.shl(b2, uint(-shift))
+	}
+
+	// 2. Compute quotient and remainder (q, r).  NB: due to the
+	// extra shift, the low-order bit of q is logically the
+	// high-order bit of r.
+	var q nat
+	q, r := q.div(a2, a2, b2) // (recycle a2)
+	mantissa := low64(q)
+	haveRem := len(r) > 0 // mantissa&1 && !haveRem => remainder is exactly half
+
+	// 3. If quotient didn't fit in Msize2 bits, redo division by b2<<1
+	// (in effect---we accomplish this incrementally).
+	if mantissa>>Msize2 == 1 {
+		if mantissa&1 == 1 {
+			haveRem = true
+		}
+		mantissa >>= 1
+		exp++
+	}
+	if mantissa>>Msize1 != 1 {
+		panic(fmt.Sprintf("expected exactly %d bits of result", Msize2))
+	}
+
+	// 4. Rounding.
+	if Emin-Msize <= exp && exp <= Emin {
+		// Denormal case; lose 'shift' bits of precision.
+		shift := uint(Emin - (exp - 1)) // [1..Esize1)
+		lostbits := mantissa & (1<<shift - 1)
+		haveRem = haveRem || lostbits != 0
+		mantissa >>= shift
+		exp = 2 - Ebias // == exp + shift
+	}
+	// Round q using round-half-to-even.
+	exact = !haveRem
+	if mantissa&1 != 0 {
+		exact = false
+		if haveRem || mantissa&2 != 0 {
+			if mantissa++; mantissa >= 1<<Msize2 {
+				// Complete rollover 11...1 => 100...0, so shift is safe
+				mantissa >>= 1
+				exp++
+			}
+		}
+	}
+	mantissa >>= 1 // discard rounding bit.  Mantissa now scaled by 1<<Msize1.
+
+	f = math.Ldexp(float64(mantissa), exp-Msize1)
+	if math.IsInf(f, 0) {
+		exact = false
+	}
+	return
+}
+
+// Float32 returns the nearest float32 value for x and a bool indicating
+// whether f represents x exactly. If the magnitude of x is too large to
+// be represented by a float32, f is an infinity and exact is false.
+// The sign of f always matches the sign of x, even if f == 0.
+func (x *Rat) Float32() (f float32, exact bool) {
+	b := x.b.abs
+	if len(b) == 0 {
+		b = natOne
+	}
+	f, exact = quotToFloat32(x.a.abs, b)
+	if x.a.neg {
+		f = -f
+	}
+	return
+}
+
+// Float64 returns the nearest float64 value for x and a bool indicating
+// whether f represents x exactly. If the magnitude of x is too large to
+// be represented by a float64, f is an infinity and exact is false.
+// The sign of f always matches the sign of x, even if f == 0.
+func (x *Rat) Float64() (f float64, exact bool) {
+	b := x.b.abs
+	if len(b) == 0 {
+		b = natOne
+	}
+	f, exact = quotToFloat64(x.a.abs, b)
+	if x.a.neg {
+		f = -f
+	}
+	return
+}
+
+// SetFrac sets z to a/b and returns z.
+// If b == 0, SetFrac panics.
+func (z *Rat) SetFrac(a, b *Int) *Rat {
+	z.a.neg = a.neg != b.neg
+	babs := b.abs
+	if len(babs) == 0 {
+		panic("division by zero")
+	}
+	if &z.a == b || alias(z.a.abs, babs) {
+		babs = nat(nil).set(babs) // make a copy
+	}
+	z.a.abs = z.a.abs.set(a.abs)
+	z.b.abs = z.b.abs.set(babs)
+	return z.norm()
+}
+
+// SetFrac64 sets z to a/b and returns z.
+// If b == 0, SetFrac64 panics.
+func (z *Rat) SetFrac64(a, b int64) *Rat {
+	if b == 0 {
+		panic("division by zero")
+	}
+	z.a.SetInt64(a)
+	if b < 0 {
+		b = -b
+		z.a.neg = !z.a.neg
+	}
+	z.b.abs = z.b.abs.setUint64(uint64(b))
+	return z.norm()
+}
+
+// SetInt sets z to x (by making a copy of x) and returns z.
+func (z *Rat) SetInt(x *Int) *Rat {
+	z.a.Set(x)
+	z.b.abs = z.b.abs.setWord(1)
+	return z
+}
+
+// SetInt64 sets z to x and returns z.
+func (z *Rat) SetInt64(x int64) *Rat {
+	z.a.SetInt64(x)
+	z.b.abs = z.b.abs.setWord(1)
+	return z
+}
+
+// SetUint64 sets z to x and returns z.
+func (z *Rat) SetUint64(x uint64) *Rat {
+	z.a.SetUint64(x)
+	z.b.abs = z.b.abs.setWord(1)
+	return z
+}
+
+// Set sets z to x (by making a copy of x) and returns z.
+func (z *Rat) Set(x *Rat) *Rat {
+	if z != x {
+		z.a.Set(&x.a)
+		z.b.Set(&x.b)
+	}
+	if len(z.b.abs) == 0 {
+		z.b.abs = z.b.abs.setWord(1)
+	}
+	return z
+}
+
+// Abs sets z to |x| (the absolute value of x) and returns z.
+func (z *Rat) Abs(x *Rat) *Rat {
+	z.Set(x)
+	z.a.neg = false
+	return z
+}
+
+// Neg sets z to -x and returns z.
+func (z *Rat) Neg(x *Rat) *Rat {
+	z.Set(x)
+	z.a.neg = len(z.a.abs) > 0 && !z.a.neg // 0 has no sign
+	return z
+}
+
+// Inv sets z to 1/x and returns z.
+// If x == 0, Inv panics.
+func (z *Rat) Inv(x *Rat) *Rat {
+	if len(x.a.abs) == 0 {
+		panic("division by zero")
+	}
+	z.Set(x)
+	z.a.abs, z.b.abs = z.b.abs, z.a.abs
+	return z
+}
+
+// Sign returns:
+//
+//	-1 if x <  0
+//	 0 if x == 0
+//	+1 if x >  0
+func (x *Rat) Sign() int {
+	return x.a.Sign()
+}
+
+// IsInt reports whether the denominator of x is 1.
+func (x *Rat) IsInt() bool {
+	return len(x.b.abs) == 0 || x.b.abs.cmp(natOne) == 0
+}
+
+// Num returns the numerator of x; it may be <= 0.
+// The result is a reference to x's numerator; it
+// may change if a new value is assigned to x, and vice versa.
+// The sign of the numerator corresponds to the sign of x.
+func (x *Rat) Num() *Int {
+	return &x.a
+}
+
+// Denom returns the denominator of x; it is always > 0.
+// The result is a reference to x's denominator, unless
+// x is an uninitialized (zero value) Rat, in which case
+// the result is a new Int of value 1. (To initialize x,
+// any operation that sets x will do, including x.Set(x).)
+// If the result is a reference to x's denominator it
+// may change if a new value is assigned to x, and vice versa.
+func (x *Rat) Denom() *Int {
+	// Note that x.b.neg is guaranteed false.
+	if len(x.b.abs) == 0 {
+		// Note: If this proves problematic, we could
+		//       panic instead and require the Rat to
+		//       be explicitly initialized.
+		return &Int{abs: nat{1}}
+	}
+	return &x.b
+}
+
+func (z *Rat) norm() *Rat {
+	switch {
+	case len(z.a.abs) == 0:
+		// z == 0; normalize sign and denominator
+		z.a.neg = false
+		fallthrough
+	case len(z.b.abs) == 0:
+		// z is integer; normalize denominator
+		z.b.abs = z.b.abs.setWord(1)
+	default:
+		// z is fraction; normalize numerator and denominator
+		neg := z.a.neg
+		z.a.neg = false
+		z.b.neg = false
+		if f := NewInt(0).lehmerGCD(nil, nil, &z.a, &z.b); f.Cmp(intOne) != 0 {
+			z.a.abs, _ = z.a.abs.div(nil, z.a.abs, f.abs)
+			z.b.abs, _ = z.b.abs.div(nil, z.b.abs, f.abs)
+		}
+		z.a.neg = neg
+	}
+	return z
+}
+
+// mulDenom sets z to the denominator product x*y (by taking into
+// account that 0 values for x or y must be interpreted as 1) and
+// returns z.
+func mulDenom(z, x, y nat) nat {
+	switch {
+	case len(x) == 0 && len(y) == 0:
+		return z.setWord(1)
+	case len(x) == 0:
+		return z.set(y)
+	case len(y) == 0:
+		return z.set(x)
+	}
+	return z.mul(x, y)
+}
+
+// scaleDenom sets z to the product x*f.
+// If f == 0 (zero value of denominator), z is set to (a copy of) x.
+func (z *Int) scaleDenom(x *Int, f nat) {
+	if len(f) == 0 {
+		z.Set(x)
+		return
+	}
+	z.abs = z.abs.mul(x.abs, f)
+	z.neg = x.neg
+}
+
+// Cmp compares x and y and returns:
+//
+//	-1 if x <  y
+//	 0 if x == y
+//	+1 if x >  y
+func (x *Rat) Cmp(y *Rat) int {
+	var a, b Int
+	a.scaleDenom(&x.a, y.b.abs)
+	b.scaleDenom(&y.a, x.b.abs)
+	return a.Cmp(&b)
+}
+
+// Add sets z to the sum x+y and returns z.
+func (z *Rat) Add(x, y *Rat) *Rat {
+	var a1, a2 Int
+	a1.scaleDenom(&x.a, y.b.abs)
+	a2.scaleDenom(&y.a, x.b.abs)
+	z.a.Add(&a1, &a2)
+	z.b.abs = mulDenom(z.b.abs, x.b.abs, y.b.abs)
+	return z.norm()
+}
+
+// Sub sets z to the difference x-y and returns z.
+func (z *Rat) Sub(x, y *Rat) *Rat {
+	var a1, a2 Int
+	a1.scaleDenom(&x.a, y.b.abs)
+	a2.scaleDenom(&y.a, x.b.abs)
+	z.a.Sub(&a1, &a2)
+	z.b.abs = mulDenom(z.b.abs, x.b.abs, y.b.abs)
+	return z.norm()
+}
+
+// Mul sets z to the product x*y and returns z.
+func (z *Rat) Mul(x, y *Rat) *Rat {
+	if x == y {
+		// a squared Rat is positive and can't be reduced (no need to call norm())
+		z.a.neg = false
+		z.a.abs = z.a.abs.sqr(x.a.abs)
+		if len(x.b.abs) == 0 {
+			z.b.abs = z.b.abs.setWord(1)
+		} else {
+			z.b.abs = z.b.abs.sqr(x.b.abs)
+		}
+		return z
+	}
+	z.a.Mul(&x.a, &y.a)
+	z.b.abs = mulDenom(z.b.abs, x.b.abs, y.b.abs)
+	return z.norm()
+}
+
+// Quo sets z to the quotient x/y and returns z.
+// If y == 0, Quo panics.
+func (z *Rat) Quo(x, y *Rat) *Rat {
+	if len(y.a.abs) == 0 {
+		panic("division by zero")
+	}
+	var a, b Int
+	a.scaleDenom(&x.a, y.b.abs)
+	b.scaleDenom(&y.a, x.b.abs)
+	z.a.abs = a.abs
+	z.b.abs = b.abs
+	z.a.neg = a.neg != b.neg
+	return z.norm()
+}
diff --git a/src/math/big/rat_test.go b/src/math/big/rat_test.go
new file mode 100644
index 0000000..d98c89b
--- /dev/null
+++ b/src/math/big/rat_test.go
@@ -0,0 +1,746 @@
+// Copyright 2010 The Go Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+package big
+
+import (
+	"math"
+	"testing"
+)
+
+func TestZeroRat(t *testing.T) {
+	var x, y, z Rat
+	y.SetFrac64(0, 42)
+
+	if x.Cmp(&y) != 0 {
+		t.Errorf("x and y should be both equal and zero")
+	}
+
+	if s := x.String(); s != "0/1" {
+		t.Errorf("got x = %s, want 0/1", s)
+	}
+
+	if s := x.RatString(); s != "0" {
+		t.Errorf("got x = %s, want 0", s)
+	}
+
+	z.Add(&x, &y)
+	if s := z.RatString(); s != "0" {
+		t.Errorf("got x+y = %s, want 0", s)
+	}
+
+	z.Sub(&x, &y)
+	if s := z.RatString(); s != "0" {
+		t.Errorf("got x-y = %s, want 0", s)
+	}
+
+	z.Mul(&x, &y)
+	if s := z.RatString(); s != "0" {
+		t.Errorf("got x*y = %s, want 0", s)
+	}
+
+	// check for division by zero
+	defer func() {
+		if s := recover(); s == nil || s.(string) != "division by zero" {
+			panic(s)
+		}
+	}()
+	z.Quo(&x, &y)
+}
+
+func TestRatSign(t *testing.T) {
+	zero := NewRat(0, 1)
+	for _, a := range setStringTests {
+		x, ok := new(Rat).SetString(a.in)
+		if !ok {
+			continue
+		}
+		s := x.Sign()
+		e := x.Cmp(zero)
+		if s != e {
+			t.Errorf("got %d; want %d for z = %v", s, e, &x)
+		}
+	}
+}
+
+var ratCmpTests = []struct {
+	rat1, rat2 string
+	out        int
+}{
+	{"0", "0/1", 0},
+	{"1/1", "1", 0},
+	{"-1", "-2/2", 0},
+	{"1", "0", 1},
+	{"0/1", "1/1", -1},
+	{"-5/1434770811533343057144", "-5/1434770811533343057145", -1},
+	{"49832350382626108453/8964749413", "49832350382626108454/8964749413", -1},
+	{"-37414950961700930/7204075375675961", "37414950961700930/7204075375675961", -1},
+	{"37414950961700930/7204075375675961", "74829901923401860/14408150751351922", 0},
+}
+
+func TestRatCmp(t *testing.T) {
+	for i, test := range ratCmpTests {
+		x, _ := new(Rat).SetString(test.rat1)
+		y, _ := new(Rat).SetString(test.rat2)
+
+		out := x.Cmp(y)
+		if out != test.out {
+			t.Errorf("#%d got out = %v; want %v", i, out, test.out)
+		}
+	}
+}
+
+func TestIsInt(t *testing.T) {
+	one := NewInt(1)
+	for _, a := range setStringTests {
+		x, ok := new(Rat).SetString(a.in)
+		if !ok {
+			continue
+		}
+		i := x.IsInt()
+		e := x.Denom().Cmp(one) == 0
+		if i != e {
+			t.Errorf("got IsInt(%v) == %v; want %v", x, i, e)
+		}
+	}
+}
+
+func TestRatAbs(t *testing.T) {
+	zero := new(Rat)
+	for _, a := range setStringTests {
+		x, ok := new(Rat).SetString(a.in)
+		if !ok {
+			continue
+		}
+		e := new(Rat).Set(x)
+		if e.Cmp(zero) < 0 {
+			e.Sub(zero, e)
+		}
+		z := new(Rat).Abs(x)
+		if z.Cmp(e) != 0 {
+			t.Errorf("got Abs(%v) = %v; want %v", x, z, e)
+		}
+	}
+}
+
+func TestRatNeg(t *testing.T) {
+	zero := new(Rat)
+	for _, a := range setStringTests {
+		x, ok := new(Rat).SetString(a.in)
+		if !ok {
+			continue
+		}
+		e := new(Rat).Sub(zero, x)
+		z := new(Rat).Neg(x)
+		if z.Cmp(e) != 0 {
+			t.Errorf("got Neg(%v) = %v; want %v", x, z, e)
+		}
+	}
+}
+
+func TestRatInv(t *testing.T) {
+	zero := new(Rat)
+	for _, a := range setStringTests {
+		x, ok := new(Rat).SetString(a.in)
+		if !ok {
+			continue
+		}
+		if x.Cmp(zero) == 0 {
+			continue // avoid division by zero
+		}
+		e := new(Rat).SetFrac(x.Denom(), x.Num())
+		z := new(Rat).Inv(x)
+		if z.Cmp(e) != 0 {
+			t.Errorf("got Inv(%v) = %v; want %v", x, z, e)
+		}
+	}
+}
+
+type ratBinFun func(z, x, y *Rat) *Rat
+type ratBinArg struct {
+	x, y, z string
+}
+
+func testRatBin(t *testing.T, i int, name string, f ratBinFun, a ratBinArg) {
+	x, _ := new(Rat).SetString(a.x)
+	y, _ := new(Rat).SetString(a.y)
+	z, _ := new(Rat).SetString(a.z)
+	out := f(new(Rat), x, y)
+
+	if out.Cmp(z) != 0 {
+		t.Errorf("%s #%d got %s want %s", name, i, out, z)
+	}
+}
+
+var ratBinTests = []struct {
+	x, y      string
+	sum, prod string
+}{
+	{"0", "0", "0", "0"},
+	{"0", "1", "1", "0"},
+	{"-1", "0", "-1", "0"},
+	{"-1", "1", "0", "-1"},
+	{"1", "1", "2", "1"},
+	{"1/2", "1/2", "1", "1/4"},
+	{"1/4", "1/3", "7/12", "1/12"},
+	{"2/5", "-14/3", "-64/15", "-28/15"},
+	{"4707/49292519774798173060", "-3367/70976135186689855734", "84058377121001851123459/1749296273614329067191168098769082663020", "-1760941/388732505247628681598037355282018369560"},
+	{"-61204110018146728334/3", "-31052192278051565633/2", "-215564796870448153567/6", "950260896245257153059642991192710872711/3"},
+	{"-854857841473707320655/4237645934602118692642972629634714039", "-18/31750379913563777419", "-27/133467566250814981", "15387441146526731771790/134546868362786310073779084329032722548987800600710485341"},
+	{"618575745270541348005638912139/19198433543745179392300736", "-19948846211000086/637313996471", "27674141753240653/30123979153216", "-6169936206128396568797607742807090270137721977/6117715203873571641674006593837351328"},
+	{"-3/26206484091896184128", "5/2848423294177090248", "15310893822118706237/9330894968229805033368778458685147968", "-5/24882386581946146755650075889827061248"},
+	{"26946729/330400702820", "41563965/225583428284", "1238218672302860271/4658307703098666660055", "224002580204097/14906584649915733312176"},
+	{"-8259900599013409474/7", "-84829337473700364773/56707961321161574960", "-468402123685491748914621885145127724451/396955729248131024720", "350340947706464153265156004876107029701/198477864624065512360"},
+	{"575775209696864/1320203974639986246357", "29/712593081308", "410331716733912717985762465/940768218243776489278275419794956", "808/45524274987585732633"},
+	{"1786597389946320496771/2066653520653241", "6269770/1992362624741777", "3559549865190272133656109052308126637/4117523232840525481453983149257", "8967230/3296219033"},
+	{"-36459180403360509753/32150500941194292113930", "9381566963714/9633539", "301622077145533298008420642898530153/309723104686531919656937098270", "-3784609207827/3426986245"},
+}
+
+func TestRatBin(t *testing.T) {
+	for i, test := range ratBinTests {
+		arg := ratBinArg{test.x, test.y, test.sum}
+		testRatBin(t, i, "Add", (*Rat).Add, arg)
+
+		arg = ratBinArg{test.y, test.x, test.sum}
+		testRatBin(t, i, "Add symmetric", (*Rat).Add, arg)
+
+		arg = ratBinArg{test.sum, test.x, test.y}
+		testRatBin(t, i, "Sub", (*Rat).Sub, arg)
+
+		arg = ratBinArg{test.sum, test.y, test.x}
+		testRatBin(t, i, "Sub symmetric", (*Rat).Sub, arg)
+
+		arg = ratBinArg{test.x, test.y, test.prod}
+		testRatBin(t, i, "Mul", (*Rat).Mul, arg)
+
+		arg = ratBinArg{test.y, test.x, test.prod}
+		testRatBin(t, i, "Mul symmetric", (*Rat).Mul, arg)
+
+		if test.x != "0" {
+			arg = ratBinArg{test.prod, test.x, test.y}
+			testRatBin(t, i, "Quo", (*Rat).Quo, arg)
+		}
+
+		if test.y != "0" {
+			arg = ratBinArg{test.prod, test.y, test.x}
+			testRatBin(t, i, "Quo symmetric", (*Rat).Quo, arg)
+		}
+	}
+}
+
+func TestIssue820(t *testing.T) {
+	x := NewRat(3, 1)
+	y := NewRat(2, 1)
+	z := y.Quo(x, y)
+	q := NewRat(3, 2)
+	if z.Cmp(q) != 0 {
+		t.Errorf("got %s want %s", z, q)
+	}
+
+	y = NewRat(3, 1)
+	x = NewRat(2, 1)
+	z = y.Quo(x, y)
+	q = NewRat(2, 3)
+	if z.Cmp(q) != 0 {
+		t.Errorf("got %s want %s", z, q)
+	}
+
+	x = NewRat(3, 1)
+	z = x.Quo(x, x)
+	q = NewRat(3, 3)
+	if z.Cmp(q) != 0 {
+		t.Errorf("got %s want %s", z, q)
+	}
+}
+
+var setFrac64Tests = []struct {
+	a, b int64
+	out  string
+}{
+	{0, 1, "0"},
+	{0, -1, "0"},
+	{1, 1, "1"},
+	{-1, 1, "-1"},
+	{1, -1, "-1"},
+	{-1, -1, "1"},
+	{-9223372036854775808, -9223372036854775808, "1"},
+}
+
+func TestRatSetFrac64Rat(t *testing.T) {
+	for i, test := range setFrac64Tests {
+		x := new(Rat).SetFrac64(test.a, test.b)
+		if x.RatString() != test.out {
+			t.Errorf("#%d got %s want %s", i, x.RatString(), test.out)
+		}
+	}
+}
+
+func TestIssue2379(t *testing.T) {
+	// 1) no aliasing
+	q := NewRat(3, 2)
+	x := new(Rat)
+	x.SetFrac(NewInt(3), NewInt(2))
+	if x.Cmp(q) != 0 {
+		t.Errorf("1) got %s want %s", x, q)
+	}
+
+	// 2) aliasing of numerator
+	x = NewRat(2, 3)
+	x.SetFrac(NewInt(3), x.Num())
+	if x.Cmp(q) != 0 {
+		t.Errorf("2) got %s want %s", x, q)
+	}
+
+	// 3) aliasing of denominator
+	x = NewRat(2, 3)
+	x.SetFrac(x.Denom(), NewInt(2))
+	if x.Cmp(q) != 0 {
+		t.Errorf("3) got %s want %s", x, q)
+	}
+
+	// 4) aliasing of numerator and denominator
+	x = NewRat(2, 3)
+	x.SetFrac(x.Denom(), x.Num())
+	if x.Cmp(q) != 0 {
+		t.Errorf("4) got %s want %s", x, q)
+	}
+
+	// 5) numerator and denominator are the same
+	q = NewRat(1, 1)
+	x = new(Rat)
+	n := NewInt(7)
+	x.SetFrac(n, n)
+	if x.Cmp(q) != 0 {
+		t.Errorf("5) got %s want %s", x, q)
+	}
+}
+
+func TestIssue3521(t *testing.T) {
+	a := new(Int)
+	b := new(Int)
+	a.SetString("64375784358435883458348587", 0)
+	b.SetString("4789759874531", 0)
+
+	// 0) a raw zero value has 1 as denominator
+	zero := new(Rat)
+	one := NewInt(1)
+	if zero.Denom().Cmp(one) != 0 {
+		t.Errorf("0) got %s want %s", zero.Denom(), one)
+	}
+
+	// 1a) the denominator of an (uninitialized) zero value is not shared with the value
+	s := &zero.b
+	d := zero.Denom()
+	if d == s {
+		t.Errorf("1a) got %s (%p) == %s (%p) want different *Int values", d, d, s, s)
+	}
+
+	// 1b) the denominator of an (uninitialized) value is a new 1 each time
+	d1 := zero.Denom()
+	d2 := zero.Denom()
+	if d1 == d2 {
+		t.Errorf("1b) got %s (%p) == %s (%p) want different *Int values", d1, d1, d2, d2)
+	}
+
+	// 1c) the denominator of an initialized zero value is shared with the value
+	x := new(Rat)
+	x.Set(x) // initialize x (any operation that sets x explicitly will do)
+	s = &x.b
+	d = x.Denom()
+	if d != s {
+		t.Errorf("1c) got %s (%p) != %s (%p) want identical *Int values", d, d, s, s)
+	}
+
+	// 1d) a zero value remains zero independent of denominator
+	x.Denom().Set(new(Int).Neg(b))
+	if x.Cmp(zero) != 0 {
+		t.Errorf("1d) got %s want %s", x, zero)
+	}
+
+	// 1e) a zero value may have a denominator != 0 and != 1
+	x.Num().Set(a)
+	qab := new(Rat).SetFrac(a, b)
+	if x.Cmp(qab) != 0 {
+		t.Errorf("1e) got %s want %s", x, qab)
+	}
+
+	// 2a) an integral value becomes a fraction depending on denominator
+	x.SetFrac64(10, 2)
+	x.Denom().SetInt64(3)
+	q53 := NewRat(5, 3)
+	if x.Cmp(q53) != 0 {
+		t.Errorf("2a) got %s want %s", x, q53)
+	}
+
+	// 2b) an integral value becomes a fraction depending on denominator
+	x = NewRat(10, 2)
+	x.Denom().SetInt64(3)
+	if x.Cmp(q53) != 0 {
+		t.Errorf("2b) got %s want %s", x, q53)
+	}
+
+	// 3) changing the numerator/denominator of a Rat changes the Rat
+	x.SetFrac(a, b)
+	a = x.Num()
+	b = x.Denom()
+	a.SetInt64(5)
+	b.SetInt64(3)
+	if x.Cmp(q53) != 0 {
+		t.Errorf("3) got %s want %s", x, q53)
+	}
+}
+
+func TestFloat32Distribution(t *testing.T) {
+	// Generate a distribution of (sign, mantissa, exp) values
+	// broader than the float32 range, and check Rat.Float32()
+	// always picks the closest float32 approximation.
+	var add = []int64{
+		0,
+		1,
+		3,
+		5,
+		7,
+		9,
+		11,
+	}
+	var winc, einc = uint64(5), 15 // quick test (~60ms on x86-64)
+	if *long {
+		winc, einc = uint64(1), 1 // soak test (~1.5s on x86-64)
+	}
+
+	for _, sign := range "+-" {
+		for _, a := range add {
+			for wid := uint64(0); wid < 30; wid += winc {
+				b := 1<<wid + a
+				if sign == '-' {
+					b = -b
+				}
+				for exp := -150; exp < 150; exp += einc {
+					num, den := NewInt(b), NewInt(1)
+					if exp > 0 {
+						num.Lsh(num, uint(exp))
+					} else {
+						den.Lsh(den, uint(-exp))
+					}
+					r := new(Rat).SetFrac(num, den)
+					f, _ := r.Float32()
+
+					if !checkIsBestApprox32(t, f, r) {
+						// Append context information.
+						t.Errorf("(input was mantissa %#x, exp %d; f = %g (%b); f ~ %g; r = %v)",
+							b, exp, f, f, math.Ldexp(float64(b), exp), r)
+					}
+
+					checkNonLossyRoundtrip32(t, f)
+				}
+			}
+		}
+	}
+}
+
+func TestFloat64Distribution(t *testing.T) {
+	// Generate a distribution of (sign, mantissa, exp) values
+	// broader than the float64 range, and check Rat.Float64()
+	// always picks the closest float64 approximation.
+	var add = []int64{
+		0,
+		1,
+		3,
+		5,
+		7,
+		9,
+		11,
+	}
+	var winc, einc = uint64(10), 500 // quick test (~12ms on x86-64)
+	if *long {
+		winc, einc = uint64(1), 1 // soak test (~75s on x86-64)
+	}
+
+	for _, sign := range "+-" {
+		for _, a := range add {
+			for wid := uint64(0); wid < 60; wid += winc {
+				b := 1<<wid + a
+				if sign == '-' {
+					b = -b
+				}
+				for exp := -1100; exp < 1100; exp += einc {
+					num, den := NewInt(b), NewInt(1)
+					if exp > 0 {
+						num.Lsh(num, uint(exp))
+					} else {
+						den.Lsh(den, uint(-exp))
+					}
+					r := new(Rat).SetFrac(num, den)
+					f, _ := r.Float64()
+
+					if !checkIsBestApprox64(t, f, r) {
+						// Append context information.
+						t.Errorf("(input was mantissa %#x, exp %d; f = %g (%b); f ~ %g; r = %v)",
+							b, exp, f, f, math.Ldexp(float64(b), exp), r)
+					}
+
+					checkNonLossyRoundtrip64(t, f)
+				}
+			}
+		}
+	}
+}
+
+// TestSetFloat64NonFinite checks that SetFloat64 of a non-finite value
+// returns nil.
+func TestSetFloat64NonFinite(t *testing.T) {
+	for _, f := range []float64{math.NaN(), math.Inf(+1), math.Inf(-1)} {
+		var r Rat
+		if r2 := r.SetFloat64(f); r2 != nil {
+			t.Errorf("SetFloat64(%g) was %v, want nil", f, r2)
+		}
+	}
+}
+
+// checkNonLossyRoundtrip32 checks that a float->Rat->float roundtrip is
+// non-lossy for finite f.
+func checkNonLossyRoundtrip32(t *testing.T, f float32) {
+	if !isFinite(float64(f)) {
+		return
+	}
+	r := new(Rat).SetFloat64(float64(f))
+	if r == nil {
+		t.Errorf("Rat.SetFloat64(float64(%g) (%b)) == nil", f, f)
+		return
+	}
+	f2, exact := r.Float32()
+	if f != f2 || !exact {
+		t.Errorf("Rat.SetFloat64(float64(%g)).Float32() = %g (%b), %v, want %g (%b), %v; delta = %b",
+			f, f2, f2, exact, f, f, true, f2-f)
+	}
+}
+
+// checkNonLossyRoundtrip64 checks that a float->Rat->float roundtrip is
+// non-lossy for finite f.
+func checkNonLossyRoundtrip64(t *testing.T, f float64) {
+	if !isFinite(f) {
+		return
+	}
+	r := new(Rat).SetFloat64(f)
+	if r == nil {
+		t.Errorf("Rat.SetFloat64(%g (%b)) == nil", f, f)
+		return
+	}
+	f2, exact := r.Float64()
+	if f != f2 || !exact {
+		t.Errorf("Rat.SetFloat64(%g).Float64() = %g (%b), %v, want %g (%b), %v; delta = %b",
+			f, f2, f2, exact, f, f, true, f2-f)
+	}
+}
+
+// delta returns the absolute difference between r and f.
+func delta(r *Rat, f float64) *Rat {
+	d := new(Rat).Sub(r, new(Rat).SetFloat64(f))
+	return d.Abs(d)
+}
+
+// checkIsBestApprox32 checks that f is the best possible float32
+// approximation of r.
+// Returns true on success.
+func checkIsBestApprox32(t *testing.T, f float32, r *Rat) bool {
+	if math.Abs(float64(f)) >= math.MaxFloat32 {
+		// Cannot check +Inf, -Inf, nor the float next to them (MaxFloat32).
+		// But we have tests for these special cases.
+		return true
+	}
+
+	// r must be strictly between f0 and f1, the floats bracketing f.
+	f0 := math.Nextafter32(f, float32(math.Inf(-1)))
+	f1 := math.Nextafter32(f, float32(math.Inf(+1)))
+
+	// For f to be correct, r must be closer to f than to f0 or f1.
+	df := delta(r, float64(f))
+	df0 := delta(r, float64(f0))
+	df1 := delta(r, float64(f1))
+	if df.Cmp(df0) > 0 {
+		t.Errorf("Rat(%v).Float32() = %g (%b), but previous float32 %g (%b) is closer", r, f, f, f0, f0)
+		return false
+	}
+	if df.Cmp(df1) > 0 {
+		t.Errorf("Rat(%v).Float32() = %g (%b), but next float32 %g (%b) is closer", r, f, f, f1, f1)
+		return false
+	}
+	if df.Cmp(df0) == 0 && !isEven32(f) {
+		t.Errorf("Rat(%v).Float32() = %g (%b); halfway should have rounded to %g (%b) instead", r, f, f, f0, f0)
+		return false
+	}
+	if df.Cmp(df1) == 0 && !isEven32(f) {
+		t.Errorf("Rat(%v).Float32() = %g (%b); halfway should have rounded to %g (%b) instead", r, f, f, f1, f1)
+		return false
+	}
+	return true
+}
+
+// checkIsBestApprox64 checks that f is the best possible float64
+// approximation of r.
+// Returns true on success.
+func checkIsBestApprox64(t *testing.T, f float64, r *Rat) bool {
+	if math.Abs(f) >= math.MaxFloat64 {
+		// Cannot check +Inf, -Inf, nor the float next to them (MaxFloat64).
+		// But we have tests for these special cases.
+		return true
+	}
+
+	// r must be strictly between f0 and f1, the floats bracketing f.
+	f0 := math.Nextafter(f, math.Inf(-1))
+	f1 := math.Nextafter(f, math.Inf(+1))
+
+	// For f to be correct, r must be closer to f than to f0 or f1.
+	df := delta(r, f)
+	df0 := delta(r, f0)
+	df1 := delta(r, f1)
+	if df.Cmp(df0) > 0 {
+		t.Errorf("Rat(%v).Float64() = %g (%b), but previous float64 %g (%b) is closer", r, f, f, f0, f0)
+		return false
+	}
+	if df.Cmp(df1) > 0 {
+		t.Errorf("Rat(%v).Float64() = %g (%b), but next float64 %g (%b) is closer", r, f, f, f1, f1)
+		return false
+	}
+	if df.Cmp(df0) == 0 && !isEven64(f) {
+		t.Errorf("Rat(%v).Float64() = %g (%b); halfway should have rounded to %g (%b) instead", r, f, f, f0, f0)
+		return false
+	}
+	if df.Cmp(df1) == 0 && !isEven64(f) {
+		t.Errorf("Rat(%v).Float64() = %g (%b); halfway should have rounded to %g (%b) instead", r, f, f, f1, f1)
+		return false
+	}
+	return true
+}
+
+func isEven32(f float32) bool { return math.Float32bits(f)&1 == 0 }
+func isEven64(f float64) bool { return math.Float64bits(f)&1 == 0 }
+
+func TestIsFinite(t *testing.T) {
+	finites := []float64{
+		1.0 / 3,
+		4891559871276714924261e+222,
+		math.MaxFloat64,
+		math.SmallestNonzeroFloat64,
+		-math.MaxFloat64,
+		-math.SmallestNonzeroFloat64,
+	}
+	for _, f := range finites {
+		if !isFinite(f) {
+			t.Errorf("!IsFinite(%g (%b))", f, f)
+		}
+	}
+	nonfinites := []float64{
+		math.NaN(),
+		math.Inf(-1),
+		math.Inf(+1),
+	}
+	for _, f := range nonfinites {
+		if isFinite(f) {
+			t.Errorf("IsFinite(%g, (%b))", f, f)
+		}
+	}
+}
+
+func TestRatSetInt64(t *testing.T) {
+	var testCases = []int64{
+		0,
+		1,
+		-1,
+		12345,
+		-98765,
+		math.MaxInt64,
+		math.MinInt64,
+	}
+	var r = new(Rat)
+	for i, want := range testCases {
+		r.SetInt64(want)
+		if !r.IsInt() {
+			t.Errorf("#%d: Rat.SetInt64(%d) is not an integer", i, want)
+		}
+		num := r.Num()
+		if !num.IsInt64() {
+			t.Errorf("#%d: Rat.SetInt64(%d) numerator is not an int64", i, want)
+		}
+		got := num.Int64()
+		if got != want {
+			t.Errorf("#%d: Rat.SetInt64(%d) = %d, but expected %d", i, want, got, want)
+		}
+	}
+}
+
+func TestRatSetUint64(t *testing.T) {
+	var testCases = []uint64{
+		0,
+		1,
+		12345,
+		^uint64(0),
+	}
+	var r = new(Rat)
+	for i, want := range testCases {
+		r.SetUint64(want)
+		if !r.IsInt() {
+			t.Errorf("#%d: Rat.SetUint64(%d) is not an integer", i, want)
+		}
+		num := r.Num()
+		if !num.IsUint64() {
+			t.Errorf("#%d: Rat.SetUint64(%d) numerator is not a uint64", i, want)
+		}
+		got := num.Uint64()
+		if got != want {
+			t.Errorf("#%d: Rat.SetUint64(%d) = %d, but expected %d", i, want, got, want)
+		}
+	}
+}
+
+func BenchmarkRatCmp(b *testing.B) {
+	x, y := NewRat(4, 1), NewRat(7, 2)
+	for i := 0; i < b.N; i++ {
+		x.Cmp(y)
+	}
+}
+
+// TestIssue34919 verifies that a Rat's denominator is not modified
+// when simply accessing the Rat value.
+func TestIssue34919(t *testing.T) {
+	for _, acc := range []struct {
+		name string
+		f    func(*Rat)
+	}{
+		{"Float32", func(x *Rat) { x.Float32() }},
+		{"Float64", func(x *Rat) { x.Float64() }},
+		{"Inv", func(x *Rat) { new(Rat).Inv(x) }},
+		{"Sign", func(x *Rat) { x.Sign() }},
+		{"IsInt", func(x *Rat) { x.IsInt() }},
+		{"Num", func(x *Rat) { x.Num() }},
+		// {"Denom", func(x *Rat) { x.Denom() }}, TODO(gri) should we change the API? See issue #33792.
+	} {
+		// A denominator of length 0 is interpreted as 1. Make sure that
+		// "materialization" of the denominator doesn't lead to setting
+		// the underlying array element 0 to 1.
+		r := &Rat{Int{abs: nat{991}}, Int{abs: make(nat, 0, 1)}}
+		acc.f(r)
+		if d := r.b.abs[:1][0]; d != 0 {
+			t.Errorf("%s modified denominator: got %d, want 0", acc.name, d)
+		}
+	}
+}
+
+func TestDenomRace(t *testing.T) {
+	x := NewRat(1, 2)
+	const N = 3
+	c := make(chan bool, N)
+	for i := 0; i < N; i++ {
+		go func() {
+			// Denom (also used by Float.SetRat) used to mutate x unnecessarily,
+			// provoking race reports when run in the race detector.
+			x.Denom()
+			new(Float).SetRat(x)
+			c <- true
+		}()
+	}
+	for i := 0; i < N; i++ {
+		<-c
+	}
+}
diff --git a/src/math/big/ratconv.go b/src/math/big/ratconv.go
new file mode 100644
index 0000000..8537a67
--- /dev/null
+++ b/src/math/big/ratconv.go
@@ -0,0 +1,380 @@
+// Copyright 2015 The Go Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+// This file implements rat-to-string conversion functions.
+
+package big
+
+import (
+	"errors"
+	"fmt"
+	"io"
+	"strconv"
+	"strings"
+)
+
+func ratTok(ch rune) bool {
+	return strings.ContainsRune("+-/0123456789.eE", ch)
+}
+
+var ratZero Rat
+var _ fmt.Scanner = &ratZero // *Rat must implement fmt.Scanner
+
+// Scan is a support routine for fmt.Scanner. It accepts the formats
+// 'e', 'E', 'f', 'F', 'g', 'G', and 'v'. All formats are equivalent.
+func (z *Rat) Scan(s fmt.ScanState, ch rune) error {
+	tok, err := s.Token(true, ratTok)
+	if err != nil {
+		return err
+	}
+	if !strings.ContainsRune("efgEFGv", ch) {
+		return errors.New("Rat.Scan: invalid verb")
+	}
+	if _, ok := z.SetString(string(tok)); !ok {
+		return errors.New("Rat.Scan: invalid syntax")
+	}
+	return nil
+}
+
+// SetString sets z to the value of s and returns z and a boolean indicating
+// success. s can be given as a (possibly signed) fraction "a/b", or as a
+// floating-point number optionally followed by an exponent.
+// If a fraction is provided, both the dividend and the divisor may be a
+// decimal integer or independently use a prefix of “0b”, “0” or “0o”,
+// or “0x” (or their upper-case variants) to denote a binary, octal, or
+// hexadecimal integer, respectively. The divisor may not be signed.
+// If a floating-point number is provided, it may be in decimal form or
+// use any of the same prefixes as above but for “0” to denote a non-decimal
+// mantissa. A leading “0” is considered a decimal leading 0; it does not
+// indicate octal representation in this case.
+// An optional base-10 “e” or base-2 “p” (or their upper-case variants)
+// exponent may be provided as well, except for hexadecimal floats which
+// only accept an (optional) “p” exponent (because an “e” or “E” cannot
+// be distinguished from a mantissa digit). If the exponent's absolute value
+// is too large, the operation may fail.
+// The entire string, not just a prefix, must be valid for success. If the
+// operation failed, the value of z is undefined but the returned value is nil.
+func (z *Rat) SetString(s string) (*Rat, bool) {
+	if len(s) == 0 {
+		return nil, false
+	}
+	// len(s) > 0
+
+	// parse fraction a/b, if any
+	if sep := strings.Index(s, "/"); sep >= 0 {
+		if _, ok := z.a.SetString(s[:sep], 0); !ok {
+			return nil, false
+		}
+		r := strings.NewReader(s[sep+1:])
+		var err error
+		if z.b.abs, _, _, err = z.b.abs.scan(r, 0, false); err != nil {
+			return nil, false
+		}
+		// entire string must have been consumed
+		if _, err = r.ReadByte(); err != io.EOF {
+			return nil, false
+		}
+		if len(z.b.abs) == 0 {
+			return nil, false
+		}
+		return z.norm(), true
+	}
+
+	// parse floating-point number
+	r := strings.NewReader(s)
+
+	// sign
+	neg, err := scanSign(r)
+	if err != nil {
+		return nil, false
+	}
+
+	// mantissa
+	var base int
+	var fcount int // fractional digit count; valid if <= 0
+	z.a.abs, base, fcount, err = z.a.abs.scan(r, 0, true)
+	if err != nil {
+		return nil, false
+	}
+
+	// exponent
+	var exp int64
+	var ebase int
+	exp, ebase, err = scanExponent(r, true, true)
+	if err != nil {
+		return nil, false
+	}
+
+	// there should be no unread characters left
+	if _, err = r.ReadByte(); err != io.EOF {
+		return nil, false
+	}
+
+	// special-case 0 (see also issue #16176)
+	if len(z.a.abs) == 0 {
+		return z.norm(), true
+	}
+	// len(z.a.abs) > 0
+
+	// The mantissa may have a radix point (fcount <= 0) and there
+	// may be a nonzero exponent exp. The radix point amounts to a
+	// division by base**(-fcount), which equals a multiplication by
+	// base**fcount. An exponent means multiplication by ebase**exp.
+	// Multiplications are commutative, so we can apply them in any
+	// order. We only have powers of 2 and 10, and we split powers
+	// of 10 into the product of the same powers of 2 and 5. This
+	// may reduce the size of shift/multiplication factors or
+	// divisors required to create the final fraction, depending
+	// on the actual floating-point value.
+
+	// determine binary or decimal exponent contribution of radix point
+	var exp2, exp5 int64
+	if fcount < 0 {
+		// The mantissa has a radix point ddd.dddd; and
+		// -fcount is the number of digits to the right
+		// of '.'. Adjust relevant exponent accordingly.
+		d := int64(fcount)
+		switch base {
+		case 10:
+			exp5 = d
+			fallthrough // 10**e == 5**e * 2**e
+		case 2:
+			exp2 = d
+		case 8:
+			exp2 = d * 3 // octal digits are 3 bits each
+		case 16:
+			exp2 = d * 4 // hexadecimal digits are 4 bits each
+		default:
+			panic("unexpected mantissa base")
+		}
+		// fcount consumed - not needed anymore
+	}
+
+	// take actual exponent into account
+	switch ebase {
+	case 10:
+		exp5 += exp
+		fallthrough // see fallthrough above
+	case 2:
+		exp2 += exp
+	default:
+		panic("unexpected exponent base")
+	}
+	// exp consumed - not needed anymore
+
+	// apply exp5 contributions
+	// (start with exp5 so the numbers to multiply are smaller)
+	if exp5 != 0 {
+		n := exp5
+		if n < 0 {
+			n = -n
+			if n < 0 {
+				// This can occur if -n overflows. -(-1 << 63) would become
+				// -1 << 63, which is still negative.
+				return nil, false
+			}
+		}
+		if n > 1e6 {
+			return nil, false // avoid excessively large exponents
+		}
+		pow5 := z.b.abs.expNN(natFive, nat(nil).setWord(Word(n)), nil, false) // use underlying array of z.b.abs
+		if exp5 > 0 {
+			z.a.abs = z.a.abs.mul(z.a.abs, pow5)
+			z.b.abs = z.b.abs.setWord(1)
+		} else {
+			z.b.abs = pow5
+		}
+	} else {
+		z.b.abs = z.b.abs.setWord(1)
+	}
+
+	// apply exp2 contributions
+	if exp2 < -1e7 || exp2 > 1e7 {
+		return nil, false // avoid excessively large exponents
+	}
+	if exp2 > 0 {
+		z.a.abs = z.a.abs.shl(z.a.abs, uint(exp2))
+	} else if exp2 < 0 {
+		z.b.abs = z.b.abs.shl(z.b.abs, uint(-exp2))
+	}
+
+	z.a.neg = neg && len(z.a.abs) > 0 // 0 has no sign
+
+	return z.norm(), true
+}
+
+// scanExponent scans the longest possible prefix of r representing a base 10
+// (“e”, “E”) or a base 2 (“p”, “P”) exponent, if any. It returns the
+// exponent, the exponent base (10 or 2), or a read or syntax error, if any.
+//
+// If sepOk is set, an underscore character “_” may appear between successive
+// exponent digits; such underscores do not change the value of the exponent.
+// Incorrect placement of underscores is reported as an error if there are no
+// other errors. If sepOk is not set, underscores are not recognized and thus
+// terminate scanning like any other character that is not a valid digit.
+//
+//	exponent = ( "e" | "E" | "p" | "P" ) [ sign ] digits .
+//	sign     = "+" | "-" .
+//	digits   = digit { [ '_' ] digit } .
+//	digit    = "0" ... "9" .
+//
+// A base 2 exponent is only permitted if base2ok is set.
+func scanExponent(r io.ByteScanner, base2ok, sepOk bool) (exp int64, base int, err error) {
+	// one char look-ahead
+	ch, err := r.ReadByte()
+	if err != nil {
+		if err == io.EOF {
+			err = nil
+		}
+		return 0, 10, err
+	}
+
+	// exponent char
+	switch ch {
+	case 'e', 'E':
+		base = 10
+	case 'p', 'P':
+		if base2ok {
+			base = 2
+			break // ok
+		}
+		fallthrough // binary exponent not permitted
+	default:
+		r.UnreadByte() // ch does not belong to exponent anymore
+		return 0, 10, nil
+	}
+
+	// sign
+	var digits []byte
+	ch, err = r.ReadByte()
+	if err == nil && (ch == '+' || ch == '-') {
+		if ch == '-' {
+			digits = append(digits, '-')
+		}
+		ch, err = r.ReadByte()
+	}
+
+	// prev encodes the previously seen char: it is one
+	// of '_', '0' (a digit), or '.' (anything else). A
+	// valid separator '_' may only occur after a digit.
+	prev := '.'
+	invalSep := false
+
+	// exponent value
+	hasDigits := false
+	for err == nil {
+		if '0' <= ch && ch <= '9' {
+			digits = append(digits, ch)
+			prev = '0'
+			hasDigits = true
+		} else if ch == '_' && sepOk {
+			if prev != '0' {
+				invalSep = true
+			}
+			prev = '_'
+		} else {
+			r.UnreadByte() // ch does not belong to number anymore
+			break
+		}
+		ch, err = r.ReadByte()
+	}
+
+	if err == io.EOF {
+		err = nil
+	}
+	if err == nil && !hasDigits {
+		err = errNoDigits
+	}
+	if err == nil {
+		exp, err = strconv.ParseInt(string(digits), 10, 64)
+	}
+	// other errors take precedence over invalid separators
+	if err == nil && (invalSep || prev == '_') {
+		err = errInvalSep
+	}
+
+	return
+}
+
+// String returns a string representation of x in the form "a/b" (even if b == 1).
+func (x *Rat) String() string {
+	return string(x.marshal())
+}
+
+// marshal implements String returning a slice of bytes
+func (x *Rat) marshal() []byte {
+	var buf []byte
+	buf = x.a.Append(buf, 10)
+	buf = append(buf, '/')
+	if len(x.b.abs) != 0 {
+		buf = x.b.Append(buf, 10)
+	} else {
+		buf = append(buf, '1')
+	}
+	return buf
+}
+
+// RatString returns a string representation of x in the form "a/b" if b != 1,
+// and in the form "a" if b == 1.
+func (x *Rat) RatString() string {
+	if x.IsInt() {
+		return x.a.String()
+	}
+	return x.String()
+}
+
+// FloatString returns a string representation of x in decimal form with prec
+// digits of precision after the radix point. The last digit is rounded to
+// nearest, with halves rounded away from zero.
+func (x *Rat) FloatString(prec int) string {
+	var buf []byte
+
+	if x.IsInt() {
+		buf = x.a.Append(buf, 10)
+		if prec > 0 {
+			buf = append(buf, '.')
+			for i := prec; i > 0; i-- {
+				buf = append(buf, '0')
+			}
+		}
+		return string(buf)
+	}
+	// x.b.abs != 0
+
+	q, r := nat(nil).div(nat(nil), x.a.abs, x.b.abs)
+
+	p := natOne
+	if prec > 0 {
+		p = nat(nil).expNN(natTen, nat(nil).setUint64(uint64(prec)), nil, false)
+	}
+
+	r = r.mul(r, p)
+	r, r2 := r.div(nat(nil), r, x.b.abs)
+
+	// see if we need to round up
+	r2 = r2.add(r2, r2)
+	if x.b.abs.cmp(r2) <= 0 {
+		r = r.add(r, natOne)
+		if r.cmp(p) >= 0 {
+			q = nat(nil).add(q, natOne)
+			r = nat(nil).sub(r, p)
+		}
+	}
+
+	if x.a.neg {
+		buf = append(buf, '-')
+	}
+	buf = append(buf, q.utoa(10)...) // itoa ignores sign if q == 0
+
+	if prec > 0 {
+		buf = append(buf, '.')
+		rs := r.utoa(10)
+		for i := prec - len(rs); i > 0; i-- {
+			buf = append(buf, '0')
+		}
+		buf = append(buf, rs...)
+	}
+
+	return string(buf)
+}
diff --git a/src/math/big/ratconv_test.go b/src/math/big/ratconv_test.go
new file mode 100644
index 0000000..45a3560
--- /dev/null
+++ b/src/math/big/ratconv_test.go
@@ -0,0 +1,626 @@
+// Copyright 2015 The Go Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+package big
+
+import (
+	"bytes"
+	"fmt"
+	"io"
+	"math"
+	"reflect"
+	"strconv"
+	"strings"
+	"testing"
+)
+
+var exponentTests = []struct {
+	s       string // string to be scanned
+	base2ok bool   // true if 'p'/'P' exponents are accepted
+	sepOk   bool   // true if '_' separators are accepted
+	x       int64  // expected exponent
+	b       int    // expected exponent base
+	err     error  // expected error
+	next    rune   // next character (or 0, if at EOF)
+}{
+	// valid, without separators
+	{"", false, false, 0, 10, nil, 0},
+	{"1", false, false, 0, 10, nil, '1'},
+	{"e0", false, false, 0, 10, nil, 0},
+	{"E1", false, false, 1, 10, nil, 0},
+	{"e+10", false, false, 10, 10, nil, 0},
+	{"e-10", false, false, -10, 10, nil, 0},
+	{"e123456789a", false, false, 123456789, 10, nil, 'a'},
+	{"p", false, false, 0, 10, nil, 'p'},
+	{"P+100", false, false, 0, 10, nil, 'P'},
+	{"p0", true, false, 0, 2, nil, 0},
+	{"P-123", true, false, -123, 2, nil, 0},
+	{"p+0a", true, false, 0, 2, nil, 'a'},
+	{"p+123__", true, false, 123, 2, nil, '_'}, // '_' is not part of the number anymore
+
+	// valid, with separators
+	{"e+1_0", false, true, 10, 10, nil, 0},
+	{"e-1_0", false, true, -10, 10, nil, 0},
+	{"e123_456_789a", false, true, 123456789, 10, nil, 'a'},
+	{"P+1_00", false, true, 0, 10, nil, 'P'},
+	{"p-1_2_3", true, true, -123, 2, nil, 0},
+
+	// invalid: no digits
+	{"e", false, false, 0, 10, errNoDigits, 0},
+	{"ef", false, false, 0, 10, errNoDigits, 'f'},
+	{"e+", false, false, 0, 10, errNoDigits, 0},
+	{"E-x", false, false, 0, 10, errNoDigits, 'x'},
+	{"p", true, false, 0, 2, errNoDigits, 0},
+	{"P-", true, false, 0, 2, errNoDigits, 0},
+	{"p+e", true, false, 0, 2, errNoDigits, 'e'},
+	{"e+_x", false, true, 0, 10, errNoDigits, 'x'},
+
+	// invalid: incorrect use of separator
+	{"e0_", false, true, 0, 10, errInvalSep, 0},
+	{"e_0", false, true, 0, 10, errInvalSep, 0},
+	{"e-1_2__3", false, true, -123, 10, errInvalSep, 0},
+}
+
+func TestScanExponent(t *testing.T) {
+	for _, a := range exponentTests {
+		r := strings.NewReader(a.s)
+		x, b, err := scanExponent(r, a.base2ok, a.sepOk)
+		if err != a.err {
+			t.Errorf("scanExponent%+v\n\tgot error = %v; want %v", a, err, a.err)
+		}
+		if x != a.x {
+			t.Errorf("scanExponent%+v\n\tgot z = %v; want %v", a, x, a.x)
+		}
+		if b != a.b {
+			t.Errorf("scanExponent%+v\n\tgot b = %d; want %d", a, b, a.b)
+		}
+		next, _, err := r.ReadRune()
+		if err == io.EOF {
+			next = 0
+			err = nil
+		}
+		if err == nil && next != a.next {
+			t.Errorf("scanExponent%+v\n\tgot next = %q; want %q", a, next, a.next)
+		}
+	}
+}
+
+type StringTest struct {
+	in, out string
+	ok      bool
+}
+
+var setStringTests = []StringTest{
+	// invalid
+	{in: "1e"},
+	{in: "1.e"},
+	{in: "1e+14e-5"},
+	{in: "1e4.5"},
+	{in: "r"},
+	{in: "a/b"},
+	{in: "a.b"},
+	{in: "1/0"},
+	{in: "4/3/2"}, // issue 17001
+	{in: "4/3/"},
+	{in: "4/3."},
+	{in: "4/"},
+	{in: "13e-9223372036854775808"}, // CVE-2022-23772
+
+	// valid
+	{"0", "0", true},
+	{"-0", "0", true},
+	{"1", "1", true},
+	{"-1", "-1", true},
+	{"1.", "1", true},
+	{"1e0", "1", true},
+	{"1.e1", "10", true},
+	{"-0.1", "-1/10", true},
+	{"-.1", "-1/10", true},
+	{"2/4", "1/2", true},
+	{".25", "1/4", true},
+	{"-1/5", "-1/5", true},
+	{"8129567.7690E14", "812956776900000000000", true},
+	{"78189e+4", "781890000", true},
+	{"553019.8935e+8", "55301989350000", true},
+	{"98765432109876543210987654321e-10", "98765432109876543210987654321/10000000000", true},
+	{"9877861857500000E-7", "3951144743/4", true},
+	{"2169378.417e-3", "2169378417/1000000", true},
+	{"884243222337379604041632732738665534", "884243222337379604041632732738665534", true},
+	{"53/70893980658822810696", "53/70893980658822810696", true},
+	{"106/141787961317645621392", "53/70893980658822810696", true},
+	{"204211327800791583.81095", "4084226556015831676219/20000", true},
+	{"0e9999999999", "0", true}, // issue #16176
+}
+
+// These are not supported by fmt.Fscanf.
+var setStringTests2 = []StringTest{
+	// invalid
+	{in: "4/3x"},
+	{in: "0/-1"},
+	{in: "-1/-1"},
+
+	// invalid with separators
+	// (smoke tests only - a comprehensive set of tests is in natconv_test.go)
+	{in: "10_/1"},
+	{in: "_10/1"},
+	{in: "1/1__0"},
+
+	// valid
+	{"0b1000/3", "8/3", true},
+	{"0B1000/0x8", "1", true},
+	{"-010/1", "-8", true}, // 0-prefix indicates octal in this case
+	{"-010.0", "-10", true},
+	{"-0o10/1", "-8", true},
+	{"0x10/1", "16", true},
+	{"0x10/0x20", "1/2", true},
+
+	{"0010", "10", true}, // 0-prefix is ignored in this case (not a fraction)
+	{"0x10.0", "16", true},
+	{"0x1.8", "3/2", true},
+	{"0X1.8p4", "24", true},
+	{"0x1.1E2", "2289/2048", true}, // E is part of hex mantissa, not exponent
+	{"0b1.1E2", "150", true},
+	{"0B1.1P3", "12", true},
+	{"0o10e-2", "2/25", true},
+	{"0O10p-3", "1", true},
+
+	// valid with separators
+	// (smoke tests only - a comprehensive set of tests is in natconv_test.go)
+	{"0b_1000/3", "8/3", true},
+	{"0B_10_00/0x8", "1", true},
+	{"0xdead/0B1101_1110_1010_1101", "1", true},
+	{"0B1101_1110_1010_1101/0XD_E_A_D", "1", true},
+	{"1_000.0", "1000", true},
+
+	{"0x_10.0", "16", true},
+	{"0x1_0.0", "16", true},
+	{"0x1.8_0", "3/2", true},
+	{"0X1.8p0_4", "24", true},
+	{"0b1.1_0E2", "150", true},
+	{"0o1_0e-2", "2/25", true},
+	{"0O_10p-3", "1", true},
+}
+
+func TestRatSetString(t *testing.T) {
+	var tests []StringTest
+	tests = append(tests, setStringTests...)
+	tests = append(tests, setStringTests2...)
+
+	for i, test := range tests {
+		x, ok := new(Rat).SetString(test.in)
+
+		if ok {
+			if !test.ok {
+				t.Errorf("#%d SetString(%q) expected failure", i, test.in)
+			} else if x.RatString() != test.out {
+				t.Errorf("#%d SetString(%q) got %s want %s", i, test.in, x.RatString(), test.out)
+			}
+		} else {
+			if test.ok {
+				t.Errorf("#%d SetString(%q) expected success", i, test.in)
+			} else if x != nil {
+				t.Errorf("#%d SetString(%q) got %p want nil", i, test.in, x)
+			}
+		}
+	}
+}
+
+func TestRatSetStringZero(t *testing.T) {
+	got, _ := new(Rat).SetString("0")
+	want := new(Rat).SetInt64(0)
+	if !reflect.DeepEqual(got, want) {
+		t.Errorf("got %#+v, want %#+v", got, want)
+	}
+}
+
+func TestRatScan(t *testing.T) {
+	var buf bytes.Buffer
+	for i, test := range setStringTests {
+		x := new(Rat)
+		buf.Reset()
+		buf.WriteString(test.in)
+
+		_, err := fmt.Fscanf(&buf, "%v", x)
+		if err == nil != test.ok {
+			if test.ok {
+				t.Errorf("#%d (%s) error: %s", i, test.in, err)
+			} else {
+				t.Errorf("#%d (%s) expected error", i, test.in)
+			}
+			continue
+		}
+		if err == nil && x.RatString() != test.out {
+			t.Errorf("#%d got %s want %s", i, x.RatString(), test.out)
+		}
+	}
+}
+
+var floatStringTests = []struct {
+	in   string
+	prec int
+	out  string
+}{
+	{"0", 0, "0"},
+	{"0", 4, "0.0000"},
+	{"1", 0, "1"},
+	{"1", 2, "1.00"},
+	{"-1", 0, "-1"},
+	{"0.05", 1, "0.1"},
+	{"-0.05", 1, "-0.1"},
+	{".25", 2, "0.25"},
+	{".25", 1, "0.3"},
+	{".25", 3, "0.250"},
+	{"-1/3", 3, "-0.333"},
+	{"-2/3", 4, "-0.6667"},
+	{"0.96", 1, "1.0"},
+	{"0.999", 2, "1.00"},
+	{"0.9", 0, "1"},
+	{".25", -1, "0"},
+	{".55", -1, "1"},
+}
+
+func TestFloatString(t *testing.T) {
+	for i, test := range floatStringTests {
+		x, _ := new(Rat).SetString(test.in)
+
+		if x.FloatString(test.prec) != test.out {
+			t.Errorf("#%d got %s want %s", i, x.FloatString(test.prec), test.out)
+		}
+	}
+}
+
+// Test inputs to Rat.SetString. The prefix "long:" causes the test
+// to be skipped except in -long mode.  (The threshold is about 500us.)
+var float64inputs = []string{
+	// Constants plundered from strconv/testfp.txt.
+
+	// Table 1: Stress Inputs for Conversion to 53-bit Binary, < 1/2 ULP
+	"5e+125",
+	"69e+267",
+	"999e-026",
+	"7861e-034",
+	"75569e-254",
+	"928609e-261",
+	"9210917e+080",
+	"84863171e+114",
+	"653777767e+273",
+	"5232604057e-298",
+	"27235667517e-109",
+	"653532977297e-123",
+	"3142213164987e-294",
+	"46202199371337e-072",
+	"231010996856685e-073",
+	"9324754620109615e+212",
+	"78459735791271921e+049",
+	"272104041512242479e+200",
+	"6802601037806061975e+198",
+	"20505426358836677347e-221",
+	"836168422905420598437e-234",
+	"4891559871276714924261e+222",
+
+	// Table 2: Stress Inputs for Conversion to 53-bit Binary, > 1/2 ULP
+	"9e-265",
+	"85e-037",
+	"623e+100",
+	"3571e+263",
+	"81661e+153",
+	"920657e-023",
+	"4603285e-024",
+	"87575437e-309",
+	"245540327e+122",
+	"6138508175e+120",
+	"83356057653e+193",
+	"619534293513e+124",
+	"2335141086879e+218",
+	"36167929443327e-159",
+	"609610927149051e-255",
+	"3743626360493413e-165",
+	"94080055902682397e-242",
+	"899810892172646163e+283",
+	"7120190517612959703e+120",
+	"25188282901709339043e-252",
+	"308984926168550152811e-052",
+	"6372891218502368041059e+064",
+
+	// Table 14: Stress Inputs for Conversion to 24-bit Binary, <1/2 ULP
+	"5e-20",
+	"67e+14",
+	"985e+15",
+	"7693e-42",
+	"55895e-16",
+	"996622e-44",
+	"7038531e-32",
+	"60419369e-46",
+	"702990899e-20",
+	"6930161142e-48",
+	"25933168707e+13",
+	"596428896559e+20",
+
+	// Table 15: Stress Inputs for Conversion to 24-bit Binary, >1/2 ULP
+	"3e-23",
+	"57e+18",
+	"789e-35",
+	"2539e-18",
+	"76173e+28",
+	"887745e-11",
+	"5382571e-37",
+	"82381273e-35",
+	"750486563e-38",
+	"3752432815e-39",
+	"75224575729e-45",
+	"459926601011e+15",
+
+	// Constants plundered from strconv/atof_test.go.
+
+	"0",
+	"1",
+	"+1",
+	"1e23",
+	"1E23",
+	"100000000000000000000000",
+	"1e-100",
+	"123456700",
+	"99999999999999974834176",
+	"100000000000000000000001",
+	"100000000000000008388608",
+	"100000000000000016777215",
+	"100000000000000016777216",
+	"-1",
+	"-0.1",
+	"-0", // NB: exception made for this input
+	"1e-20",
+	"625e-3",
+
+	// largest float64
+	"1.7976931348623157e308",
+	"-1.7976931348623157e308",
+	// next float64 - too large
+	"1.7976931348623159e308",
+	"-1.7976931348623159e308",
+	// the border is ...158079
+	// borderline - okay
+	"1.7976931348623158e308",
+	"-1.7976931348623158e308",
+	// borderline - too large
+	"1.797693134862315808e308",
+	"-1.797693134862315808e308",
+
+	// a little too large
+	"1e308",
+	"2e308",
+	"1e309",
+
+	// way too large
+	"1e310",
+	"-1e310",
+	"1e400",
+	"-1e400",
+	"long:1e400000",
+	"long:-1e400000",
+
+	// denormalized
+	"1e-305",
+	"1e-306",
+	"1e-307",
+	"1e-308",
+	"1e-309",
+	"1e-310",
+	"1e-322",
+	// smallest denormal
+	"5e-324",
+	"4e-324",
+	"3e-324",
+	// too small
+	"2e-324",
+	// way too small
+	"1e-350",
+	"long:1e-400000",
+	// way too small, negative
+	"-1e-350",
+	"long:-1e-400000",
+
+	// try to overflow exponent
+	// [Disabled: too slow and memory-hungry with rationals.]
+	// "1e-4294967296",
+	// "1e+4294967296",
+	// "1e-18446744073709551616",
+	// "1e+18446744073709551616",
+
+	// https://www.exploringbinary.com/java-hangs-when-converting-2-2250738585072012e-308/
+	"2.2250738585072012e-308",
+	// https://www.exploringbinary.com/php-hangs-on-numeric-value-2-2250738585072011e-308/
+	"2.2250738585072011e-308",
+
+	// A very large number (initially wrongly parsed by the fast algorithm).
+	"4.630813248087435e+307",
+
+	// A different kind of very large number.
+	"22.222222222222222",
+	"long:2." + strings.Repeat("2", 4000) + "e+1",
+
+	// Exactly halfway between 1 and math.Nextafter(1, 2).
+	// Round to even (down).
+	"1.00000000000000011102230246251565404236316680908203125",
+	// Slightly lower; still round down.
+	"1.00000000000000011102230246251565404236316680908203124",
+	// Slightly higher; round up.
+	"1.00000000000000011102230246251565404236316680908203126",
+	// Slightly higher, but you have to read all the way to the end.
+	"long:1.00000000000000011102230246251565404236316680908203125" + strings.Repeat("0", 10000) + "1",
+
+	// Smallest denormal, 2^(-1022-52)
+	"4.940656458412465441765687928682213723651e-324",
+	// Half of smallest denormal, 2^(-1022-53)
+	"2.470328229206232720882843964341106861825e-324",
+	// A little more than the exact half of smallest denormal
+	// 2^-1075 + 2^-1100.  (Rounds to 1p-1074.)
+	"2.470328302827751011111470718709768633275e-324",
+	// The exact halfway between smallest normal and largest denormal:
+	// 2^-1022 - 2^-1075.  (Rounds to 2^-1022.)
+	"2.225073858507201136057409796709131975935e-308",
+
+	"1152921504606846975",  //   1<<60 - 1
+	"-1152921504606846975", // -(1<<60 - 1)
+	"1152921504606846977",  //   1<<60 + 1
+	"-1152921504606846977", // -(1<<60 + 1)
+
+	"1/3",
+}
+
+// isFinite reports whether f represents a finite rational value.
+// It is equivalent to !math.IsNan(f) && !math.IsInf(f, 0).
+func isFinite(f float64) bool {
+	return math.Abs(f) <= math.MaxFloat64
+}
+
+func TestFloat32SpecialCases(t *testing.T) {
+	for _, input := range float64inputs {
+		if strings.HasPrefix(input, "long:") {
+			if !*long {
+				continue
+			}
+			input = input[len("long:"):]
+		}
+
+		r, ok := new(Rat).SetString(input)
+		if !ok {
+			t.Errorf("Rat.SetString(%q) failed", input)
+			continue
+		}
+		f, exact := r.Float32()
+
+		// 1. Check string -> Rat -> float32 conversions are
+		// consistent with strconv.ParseFloat.
+		// Skip this check if the input uses "a/b" rational syntax.
+		if !strings.Contains(input, "/") {
+			e64, _ := strconv.ParseFloat(input, 32)
+			e := float32(e64)
+
+			// Careful: negative Rats too small for
+			// float64 become -0, but Rat obviously cannot
+			// preserve the sign from SetString("-0").
+			switch {
+			case math.Float32bits(e) == math.Float32bits(f):
+				// Ok: bitwise equal.
+			case f == 0 && r.Num().BitLen() == 0:
+				// Ok: Rat(0) is equivalent to both +/- float64(0).
+			default:
+				t.Errorf("strconv.ParseFloat(%q) = %g (%b), want %g (%b); delta = %g", input, e, e, f, f, f-e)
+			}
+		}
+
+		if !isFinite(float64(f)) {
+			continue
+		}
+
+		// 2. Check f is best approximation to r.
+		if !checkIsBestApprox32(t, f, r) {
+			// Append context information.
+			t.Errorf("(input was %q)", input)
+		}
+
+		// 3. Check f->R->f roundtrip is non-lossy.
+		checkNonLossyRoundtrip32(t, f)
+
+		// 4. Check exactness using slow algorithm.
+		if wasExact := new(Rat).SetFloat64(float64(f)).Cmp(r) == 0; wasExact != exact {
+			t.Errorf("Rat.SetString(%q).Float32().exact = %t, want %t", input, exact, wasExact)
+		}
+	}
+}
+
+func TestFloat64SpecialCases(t *testing.T) {
+	for _, input := range float64inputs {
+		if strings.HasPrefix(input, "long:") {
+			if !*long {
+				continue
+			}
+			input = input[len("long:"):]
+		}
+
+		r, ok := new(Rat).SetString(input)
+		if !ok {
+			t.Errorf("Rat.SetString(%q) failed", input)
+			continue
+		}
+		f, exact := r.Float64()
+
+		// 1. Check string -> Rat -> float64 conversions are
+		// consistent with strconv.ParseFloat.
+		// Skip this check if the input uses "a/b" rational syntax.
+		if !strings.Contains(input, "/") {
+			e, _ := strconv.ParseFloat(input, 64)
+
+			// Careful: negative Rats too small for
+			// float64 become -0, but Rat obviously cannot
+			// preserve the sign from SetString("-0").
+			switch {
+			case math.Float64bits(e) == math.Float64bits(f):
+				// Ok: bitwise equal.
+			case f == 0 && r.Num().BitLen() == 0:
+				// Ok: Rat(0) is equivalent to both +/- float64(0).
+			default:
+				t.Errorf("strconv.ParseFloat(%q) = %g (%b), want %g (%b); delta = %g", input, e, e, f, f, f-e)
+			}
+		}
+
+		if !isFinite(f) {
+			continue
+		}
+
+		// 2. Check f is best approximation to r.
+		if !checkIsBestApprox64(t, f, r) {
+			// Append context information.
+			t.Errorf("(input was %q)", input)
+		}
+
+		// 3. Check f->R->f roundtrip is non-lossy.
+		checkNonLossyRoundtrip64(t, f)
+
+		// 4. Check exactness using slow algorithm.
+		if wasExact := new(Rat).SetFloat64(f).Cmp(r) == 0; wasExact != exact {
+			t.Errorf("Rat.SetString(%q).Float64().exact = %t, want %t", input, exact, wasExact)
+		}
+	}
+}
+
+func TestIssue31184(t *testing.T) {
+	var x Rat
+	for _, want := range []string{
+		"-213.090",
+		"8.192",
+		"16.000",
+	} {
+		x.SetString(want)
+		got := x.FloatString(3)
+		if got != want {
+			t.Errorf("got %s, want %s", got, want)
+		}
+	}
+}
+
+func TestIssue45910(t *testing.T) {
+	var x Rat
+	for _, test := range []struct {
+		input string
+		want  bool
+	}{
+		{"1e-1000001", false},
+		{"1e-1000000", true},
+		{"1e+1000000", true},
+		{"1e+1000001", false},
+
+		{"0p1000000000000", true},
+		{"1p-10000001", false},
+		{"1p-10000000", true},
+		{"1p+10000000", true},
+		{"1p+10000001", false},
+		{"1.770p02041010010011001001", false}, // test case from issue
+	} {
+		_, got := x.SetString(test.input)
+		if got != test.want {
+			t.Errorf("SetString(%s) got ok = %v; want %v", test.input, got, test.want)
+		}
+	}
+}
diff --git a/src/math/big/ratmarsh.go b/src/math/big/ratmarsh.go
new file mode 100644
index 0000000..b69c59d
--- /dev/null
+++ b/src/math/big/ratmarsh.go
@@ -0,0 +1,86 @@
+// Copyright 2015 The Go Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+// This file implements encoding/decoding of Rats.
+
+package big
+
+import (
+	"encoding/binary"
+	"errors"
+	"fmt"
+	"math"
+)
+
+// Gob codec version. Permits backward-compatible changes to the encoding.
+const ratGobVersion byte = 1
+
+// GobEncode implements the gob.GobEncoder interface.
+func (x *Rat) GobEncode() ([]byte, error) {
+	if x == nil {
+		return nil, nil
+	}
+	buf := make([]byte, 1+4+(len(x.a.abs)+len(x.b.abs))*_S) // extra bytes for version and sign bit (1), and numerator length (4)
+	i := x.b.abs.bytes(buf)
+	j := x.a.abs.bytes(buf[:i])
+	n := i - j
+	if int(uint32(n)) != n {
+		// this should never happen
+		return nil, errors.New("Rat.GobEncode: numerator too large")
+	}
+	binary.BigEndian.PutUint32(buf[j-4:j], uint32(n))
+	j -= 1 + 4
+	b := ratGobVersion << 1 // make space for sign bit
+	if x.a.neg {
+		b |= 1
+	}
+	buf[j] = b
+	return buf[j:], nil
+}
+
+// GobDecode implements the gob.GobDecoder interface.
+func (z *Rat) GobDecode(buf []byte) error {
+	if len(buf) == 0 {
+		// Other side sent a nil or default value.
+		*z = Rat{}
+		return nil
+	}
+	if len(buf) < 5 {
+		return errors.New("Rat.GobDecode: buffer too small")
+	}
+	b := buf[0]
+	if b>>1 != ratGobVersion {
+		return fmt.Errorf("Rat.GobDecode: encoding version %d not supported", b>>1)
+	}
+	const j = 1 + 4
+	ln := binary.BigEndian.Uint32(buf[j-4 : j])
+	if uint64(ln) > math.MaxInt-j {
+		return errors.New("Rat.GobDecode: invalid length")
+	}
+	i := j + int(ln)
+	if len(buf) < i {
+		return errors.New("Rat.GobDecode: buffer too small")
+	}
+	z.a.neg = b&1 != 0
+	z.a.abs = z.a.abs.setBytes(buf[j:i])
+	z.b.abs = z.b.abs.setBytes(buf[i:])
+	return nil
+}
+
+// MarshalText implements the encoding.TextMarshaler interface.
+func (x *Rat) MarshalText() (text []byte, err error) {
+	if x.IsInt() {
+		return x.a.MarshalText()
+	}
+	return x.marshal(), nil
+}
+
+// UnmarshalText implements the encoding.TextUnmarshaler interface.
+func (z *Rat) UnmarshalText(text []byte) error {
+	// TODO(gri): get rid of the []byte/string conversion
+	if _, ok := z.SetString(string(text)); !ok {
+		return fmt.Errorf("math/big: cannot unmarshal %q into a *big.Rat", text)
+	}
+	return nil
+}
diff --git a/src/math/big/ratmarsh_test.go b/src/math/big/ratmarsh_test.go
new file mode 100644
index 0000000..15c933e
--- /dev/null
+++ b/src/math/big/ratmarsh_test.go
@@ -0,0 +1,138 @@
+// Copyright 2015 The Go Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+package big
+
+import (
+	"bytes"
+	"encoding/gob"
+	"encoding/json"
+	"encoding/xml"
+	"testing"
+)
+
+func TestRatGobEncoding(t *testing.T) {
+	var medium bytes.Buffer
+	enc := gob.NewEncoder(&medium)
+	dec := gob.NewDecoder(&medium)
+	for _, test := range encodingTests {
+		medium.Reset() // empty buffer for each test case (in case of failures)
+		var tx Rat
+		tx.SetString(test + ".14159265")
+		if err := enc.Encode(&tx); err != nil {
+			t.Errorf("encoding of %s failed: %s", &tx, err)
+			continue
+		}
+		var rx Rat
+		if err := dec.Decode(&rx); err != nil {
+			t.Errorf("decoding of %s failed: %s", &tx, err)
+			continue
+		}
+		if rx.Cmp(&tx) != 0 {
+			t.Errorf("transmission of %s failed: got %s want %s", &tx, &rx, &tx)
+		}
+	}
+}
+
+// Sending a nil Rat pointer (inside a slice) on a round trip through gob should yield a zero.
+// TODO: top-level nils.
+func TestGobEncodingNilRatInSlice(t *testing.T) {
+	buf := new(bytes.Buffer)
+	enc := gob.NewEncoder(buf)
+	dec := gob.NewDecoder(buf)
+
+	var in = make([]*Rat, 1)
+	err := enc.Encode(&in)
+	if err != nil {
+		t.Errorf("gob encode failed: %q", err)
+	}
+	var out []*Rat
+	err = dec.Decode(&out)
+	if err != nil {
+		t.Fatalf("gob decode failed: %q", err)
+	}
+	if len(out) != 1 {
+		t.Fatalf("wrong len; want 1 got %d", len(out))
+	}
+	var zero Rat
+	if out[0].Cmp(&zero) != 0 {
+		t.Fatalf("transmission of (*Int)(nil) failed: got %s want 0", out)
+	}
+}
+
+var ratNums = []string{
+	"-141592653589793238462643383279502884197169399375105820974944592307816406286",
+	"-1415926535897932384626433832795028841971",
+	"-141592653589793",
+	"-1",
+	"0",
+	"1",
+	"141592653589793",
+	"1415926535897932384626433832795028841971",
+	"141592653589793238462643383279502884197169399375105820974944592307816406286",
+}
+
+var ratDenoms = []string{
+	"1",
+	"718281828459045",
+	"7182818284590452353602874713526624977572",
+	"718281828459045235360287471352662497757247093699959574966967627724076630353",
+}
+
+func TestRatJSONEncoding(t *testing.T) {
+	for _, num := range ratNums {
+		for _, denom := range ratDenoms {
+			var tx Rat
+			tx.SetString(num + "/" + denom)
+			b, err := json.Marshal(&tx)
+			if err != nil {
+				t.Errorf("marshaling of %s failed: %s", &tx, err)
+				continue
+			}
+			var rx Rat
+			if err := json.Unmarshal(b, &rx); err != nil {
+				t.Errorf("unmarshaling of %s failed: %s", &tx, err)
+				continue
+			}
+			if rx.Cmp(&tx) != 0 {
+				t.Errorf("JSON encoding of %s failed: got %s want %s", &tx, &rx, &tx)
+			}
+		}
+	}
+}
+
+func TestRatXMLEncoding(t *testing.T) {
+	for _, num := range ratNums {
+		for _, denom := range ratDenoms {
+			var tx Rat
+			tx.SetString(num + "/" + denom)
+			b, err := xml.Marshal(&tx)
+			if err != nil {
+				t.Errorf("marshaling of %s failed: %s", &tx, err)
+				continue
+			}
+			var rx Rat
+			if err := xml.Unmarshal(b, &rx); err != nil {
+				t.Errorf("unmarshaling of %s failed: %s", &tx, err)
+				continue
+			}
+			if rx.Cmp(&tx) != 0 {
+				t.Errorf("XML encoding of %s failed: got %s want %s", &tx, &rx, &tx)
+			}
+		}
+	}
+}
+
+func TestRatGobDecodeShortBuffer(t *testing.T) {
+	for _, tc := range [][]byte{
+		[]byte{0x2},
+		[]byte{0x2, 0x0, 0x0, 0x0, 0xff},
+		[]byte{0x2, 0xff, 0xff, 0xff, 0xff},
+	} {
+		err := NewRat(1, 2).GobDecode(tc)
+		if err == nil {
+			t.Error("expected GobDecode to return error for malformed input")
+		}
+	}
+}
diff --git a/src/math/big/roundingmode_string.go b/src/math/big/roundingmode_string.go
new file mode 100644
index 0000000..e2f13a6
--- /dev/null
+++ b/src/math/big/roundingmode_string.go
@@ -0,0 +1,28 @@
+// Code generated by "stringer -type=RoundingMode"; DO NOT EDIT.
+
+package big
+
+import "strconv"
+
+func _() {
+	// An "invalid array index" compiler error signifies that the constant values have changed.
+	// Re-run the stringer command to generate them again.
+	var x [1]struct{}
+	_ = x[ToNearestEven-0]
+	_ = x[ToNearestAway-1]
+	_ = x[ToZero-2]
+	_ = x[AwayFromZero-3]
+	_ = x[ToNegativeInf-4]
+	_ = x[ToPositiveInf-5]
+}
+
+const _RoundingMode_name = "ToNearestEvenToNearestAwayToZeroAwayFromZeroToNegativeInfToPositiveInf"
+
+var _RoundingMode_index = [...]uint8{0, 13, 26, 32, 44, 57, 70}
+
+func (i RoundingMode) String() string {
+	if i >= RoundingMode(len(_RoundingMode_index)-1) {
+		return "RoundingMode(" + strconv.FormatInt(int64(i), 10) + ")"
+	}
+	return _RoundingMode_name[_RoundingMode_index[i]:_RoundingMode_index[i+1]]
+}
diff --git a/src/math/big/sqrt.go b/src/math/big/sqrt.go
new file mode 100644
index 0000000..b4b0374
--- /dev/null
+++ b/src/math/big/sqrt.go
@@ -0,0 +1,130 @@
+// Copyright 2017 The Go Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+package big
+
+import (
+	"math"
+	"sync"
+)
+
+var threeOnce struct {
+	sync.Once
+	v *Float
+}
+
+func three() *Float {
+	threeOnce.Do(func() {
+		threeOnce.v = NewFloat(3.0)
+	})
+	return threeOnce.v
+}
+
+// Sqrt sets z to the rounded square root of x, and returns it.
+//
+// If z's precision is 0, it is changed to x's precision before the
+// operation. Rounding is performed according to z's precision and
+// rounding mode, but z's accuracy is not computed. Specifically, the
+// result of z.Acc() is undefined.
+//
+// The function panics if z < 0. The value of z is undefined in that
+// case.
+func (z *Float) Sqrt(x *Float) *Float {
+	if debugFloat {
+		x.validate()
+	}
+
+	if z.prec == 0 {
+		z.prec = x.prec
+	}
+
+	if x.Sign() == -1 {
+		// following IEEE754-2008 (section 7.2)
+		panic(ErrNaN{"square root of negative operand"})
+	}
+
+	// handle ±0 and +∞
+	if x.form != finite {
+		z.acc = Exact
+		z.form = x.form
+		z.neg = x.neg // IEEE754-2008 requires √±0 = ±0
+		return z
+	}
+
+	// MantExp sets the argument's precision to the receiver's, and
+	// when z.prec > x.prec this will lower z.prec. Restore it after
+	// the MantExp call.
+	prec := z.prec
+	b := x.MantExp(z)
+	z.prec = prec
+
+	// Compute √(z·2**b) as
+	//   √( z)·2**(½b)     if b is even
+	//   √(2z)·2**(⌊½b⌋)   if b > 0 is odd
+	//   √(½z)·2**(⌈½b⌉)   if b < 0 is odd
+	switch b % 2 {
+	case 0:
+		// nothing to do
+	case 1:
+		z.exp++
+	case -1:
+		z.exp--
+	}
+	// 0.25 <= z < 2.0
+
+	// Solving 1/x² - z = 0 avoids Quo calls and is faster, especially
+	// for high precisions.
+	z.sqrtInverse(z)
+
+	// re-attach halved exponent
+	return z.SetMantExp(z, b/2)
+}
+
+// Compute √x (to z.prec precision) by solving
+//
+//	1/t² - x = 0
+//
+// for t (using Newton's method), and then inverting.
+func (z *Float) sqrtInverse(x *Float) {
+	// let
+	//   f(t) = 1/t² - x
+	// then
+	//   g(t) = f(t)/f'(t) = -½t(1 - xt²)
+	// and the next guess is given by
+	//   t2 = t - g(t) = ½t(3 - xt²)
+	u := newFloat(z.prec)
+	v := newFloat(z.prec)
+	three := three()
+	ng := func(t *Float) *Float {
+		u.prec = t.prec
+		v.prec = t.prec
+		u.Mul(t, t)     // u = t²
+		u.Mul(x, u)     //   = xt²
+		v.Sub(three, u) // v = 3 - xt²
+		u.Mul(t, v)     // u = t(3 - xt²)
+		u.exp--         //   = ½t(3 - xt²)
+		return t.Set(u)
+	}
+
+	xf, _ := x.Float64()
+	sqi := newFloat(z.prec)
+	sqi.SetFloat64(1 / math.Sqrt(xf))
+	for prec := z.prec + 32; sqi.prec < prec; {
+		sqi.prec *= 2
+		sqi = ng(sqi)
+	}
+	// sqi = 1/√x
+
+	// x/√x = √x
+	z.Mul(x, sqi)
+}
+
+// newFloat returns a new *Float with space for twice the given
+// precision.
+func newFloat(prec2 uint32) *Float {
+	z := new(Float)
+	// nat.make ensures the slice length is > 0
+	z.mant = z.mant.make(int(prec2/_W) * 2)
+	return z
+}
diff --git a/src/math/big/sqrt_test.go b/src/math/big/sqrt_test.go
new file mode 100644
index 0000000..d314711
--- /dev/null
+++ b/src/math/big/sqrt_test.go
@@ -0,0 +1,126 @@
+// Copyright 2017 The Go Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+package big
+
+import (
+	"fmt"
+	"math"
+	"math/rand"
+	"testing"
+)
+
+// TestFloatSqrt64 tests that Float.Sqrt of numbers with 53bit mantissa
+// behaves like float math.Sqrt.
+func TestFloatSqrt64(t *testing.T) {
+	for i := 0; i < 1e5; i++ {
+		if i == 1e2 && testing.Short() {
+			break
+		}
+		r := rand.Float64()
+
+		got := new(Float).SetPrec(53)
+		got.Sqrt(NewFloat(r))
+		want := NewFloat(math.Sqrt(r))
+		if got.Cmp(want) != 0 {
+			t.Fatalf("Sqrt(%g) =\n got %g;\nwant %g", r, got, want)
+		}
+	}
+}
+
+func TestFloatSqrt(t *testing.T) {
+	for _, test := range []struct {
+		x    string
+		want string
+	}{
+		// Test values were generated on Wolfram Alpha using query
+		//   'sqrt(N) to 350 digits'
+		// 350 decimal digits give up to 1000 binary digits.
+		{"0.03125", "0.17677669529663688110021109052621225982120898442211850914708496724884155980776337985629844179095519659187673077886403712811560450698134215158051518713749197892665283324093819909447499381264409775757143376369499645074628431682460775184106467733011114982619404115381053858929018135497032545349940642599871090667456829147610370507757690729404938184321879"},
+		{"0.125", "0.35355339059327376220042218105242451964241796884423701829416993449768311961552675971259688358191039318375346155772807425623120901396268430316103037427498395785330566648187639818894998762528819551514286752738999290149256863364921550368212935466022229965238808230762107717858036270994065090699881285199742181334913658295220741015515381458809876368643757"},
+		{"0.5", "0.70710678118654752440084436210484903928483593768847403658833986899536623923105351942519376716382078636750692311545614851246241802792536860632206074854996791570661133296375279637789997525057639103028573505477998580298513726729843100736425870932044459930477616461524215435716072541988130181399762570399484362669827316590441482031030762917619752737287514"},
+		{"2.0", "1.4142135623730950488016887242096980785696718753769480731766797379907324784621070388503875343276415727350138462309122970249248360558507372126441214970999358314132226659275055927557999505011527820605714701095599716059702745345968620147285174186408891986095523292304843087143214508397626036279952514079896872533965463318088296406206152583523950547457503"},
+		{"3.0", "1.7320508075688772935274463415058723669428052538103806280558069794519330169088000370811461867572485756756261414154067030299699450949989524788116555120943736485280932319023055820679748201010846749232650153123432669033228866506722546689218379712270471316603678615880190499865373798593894676503475065760507566183481296061009476021871903250831458295239598"},
+		{"4.0", "2.0"},
+
+		{"1p512", "1p256"},
+		{"4p1024", "2p512"},
+		{"9p2048", "3p1024"},
+
+		{"1p-1024", "1p-512"},
+		{"4p-2048", "2p-1024"},
+		{"9p-4096", "3p-2048"},
+	} {
+		for _, prec := range []uint{24, 53, 64, 65, 100, 128, 129, 200, 256, 400, 600, 800, 1000} {
+			x := new(Float).SetPrec(prec)
+			x.Parse(test.x, 10)
+
+			got := new(Float).SetPrec(prec).Sqrt(x)
+			want := new(Float).SetPrec(prec)
+			want.Parse(test.want, 10)
+			if got.Cmp(want) != 0 {
+				t.Errorf("prec = %d, Sqrt(%v) =\ngot  %g;\nwant %g",
+					prec, test.x, got, want)
+			}
+
+			// Square test.
+			// If got holds the square root of x to precision p, then
+			//   got = √x + k
+			// for some k such that |k| < 2**(-p). Thus,
+			//   got² = (√x + k)² = x + 2k√n + k²
+			// and the error must satisfy
+			//   err = |got² - x| ≈ | 2k√n | < 2**(-p+1)*√n
+			// Ignoring the k² term for simplicity.
+
+			// err = |got² - x|
+			// (but do intermediate steps with 32 guard digits to
+			// avoid introducing spurious rounding-related errors)
+			sq := new(Float).SetPrec(prec+32).Mul(got, got)
+			diff := new(Float).Sub(sq, x)
+			err := diff.Abs(diff).SetPrec(prec)
+
+			// maxErr = 2**(-p+1)*√x
+			one := new(Float).SetPrec(prec).SetInt64(1)
+			maxErr := new(Float).Mul(new(Float).SetMantExp(one, -int(prec)+1), got)
+
+			if err.Cmp(maxErr) >= 0 {
+				t.Errorf("prec = %d, Sqrt(%v) =\ngot err  %g;\nwant maxErr %g",
+					prec, test.x, err, maxErr)
+			}
+		}
+	}
+}
+
+func TestFloatSqrtSpecial(t *testing.T) {
+	for _, test := range []struct {
+		x    *Float
+		want *Float
+	}{
+		{NewFloat(+0), NewFloat(+0)},
+		{NewFloat(-0), NewFloat(-0)},
+		{NewFloat(math.Inf(+1)), NewFloat(math.Inf(+1))},
+	} {
+		got := new(Float).Sqrt(test.x)
+		if got.neg != test.want.neg || got.form != test.want.form {
+			t.Errorf("Sqrt(%v) = %v (neg: %v); want %v (neg: %v)",
+				test.x, got, got.neg, test.want, test.want.neg)
+		}
+	}
+
+}
+
+// Benchmarks
+
+func BenchmarkFloatSqrt(b *testing.B) {
+	for _, prec := range []uint{64, 128, 256, 1e3, 1e4, 1e5, 1e6} {
+		x := NewFloat(2)
+		z := new(Float).SetPrec(prec)
+		b.Run(fmt.Sprintf("%v", prec), func(b *testing.B) {
+			b.ReportAllocs()
+			for n := 0; n < b.N; n++ {
+				z.Sqrt(x)
+			}
+		})
+	}
+}
diff --git a/src/math/bits.go b/src/math/bits.go
new file mode 100644
index 0000000..c5cb93b
--- /dev/null
+++ b/src/math/bits.go
@@ -0,0 +1,62 @@
+// Copyright 2009 The Go Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+package math
+
+const (
+	uvnan    = 0x7FF8000000000001
+	uvinf    = 0x7FF0000000000000
+	uvneginf = 0xFFF0000000000000
+	uvone    = 0x3FF0000000000000
+	mask     = 0x7FF
+	shift    = 64 - 11 - 1
+	bias     = 1023
+	signMask = 1 << 63
+	fracMask = 1<<shift - 1
+)
+
+// Inf returns positive infinity if sign >= 0, negative infinity if sign < 0.
+func Inf(sign int) float64 {
+	var v uint64
+	if sign >= 0 {
+		v = uvinf
+	} else {
+		v = uvneginf
+	}
+	return Float64frombits(v)
+}
+
+// NaN returns an IEEE 754 “not-a-number” value.
+func NaN() float64 { return Float64frombits(uvnan) }
+
+// IsNaN reports whether f is an IEEE 754 “not-a-number” value.
+func IsNaN(f float64) (is bool) {
+	// IEEE 754 says that only NaNs satisfy f != f.
+	// To avoid the floating-point hardware, could use:
+	//	x := Float64bits(f);
+	//	return uint32(x>>shift)&mask == mask && x != uvinf && x != uvneginf
+	return f != f
+}
+
+// IsInf reports whether f is an infinity, according to sign.
+// If sign > 0, IsInf reports whether f is positive infinity.
+// If sign < 0, IsInf reports whether f is negative infinity.
+// If sign == 0, IsInf reports whether f is either infinity.
+func IsInf(f float64, sign int) bool {
+	// Test for infinity by comparing against maximum float.
+	// To avoid the floating-point hardware, could use:
+	//	x := Float64bits(f);
+	//	return sign >= 0 && x == uvinf || sign <= 0 && x == uvneginf;
+	return sign >= 0 && f > MaxFloat64 || sign <= 0 && f < -MaxFloat64
+}
+
+// normalize returns a normal number y and exponent exp
+// satisfying x == y × 2**exp. It assumes x is finite and non-zero.
+func normalize(x float64) (y float64, exp int) {
+	const SmallestNormal = 2.2250738585072014e-308 // 2**-1022
+	if Abs(x) < SmallestNormal {
+		return x * (1 << 52), -52
+	}
+	return x, 0
+}
diff --git a/src/math/bits/bits.go b/src/math/bits/bits.go
new file mode 100644
index 0000000..c1c7b79
--- /dev/null
+++ b/src/math/bits/bits.go
@@ -0,0 +1,599 @@
+// Copyright 2017 The Go Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+//go:generate go run make_tables.go
+
+// Package bits implements bit counting and manipulation
+// functions for the predeclared unsigned integer types.
+//
+// Functions in this package may be implemented directly by
+// the compiler, for better performance. For those functions
+// the code in this package will not be used. Which
+// functions are implemented by the compiler depends on the
+// architecture and the Go release.
+package bits
+
+const uintSize = 32 << (^uint(0) >> 63) // 32 or 64
+
+// UintSize is the size of a uint in bits.
+const UintSize = uintSize
+
+// --- LeadingZeros ---
+
+// LeadingZeros returns the number of leading zero bits in x; the result is UintSize for x == 0.
+func LeadingZeros(x uint) int { return UintSize - Len(x) }
+
+// LeadingZeros8 returns the number of leading zero bits in x; the result is 8 for x == 0.
+func LeadingZeros8(x uint8) int { return 8 - Len8(x) }
+
+// LeadingZeros16 returns the number of leading zero bits in x; the result is 16 for x == 0.
+func LeadingZeros16(x uint16) int { return 16 - Len16(x) }
+
+// LeadingZeros32 returns the number of leading zero bits in x; the result is 32 for x == 0.
+func LeadingZeros32(x uint32) int { return 32 - Len32(x) }
+
+// LeadingZeros64 returns the number of leading zero bits in x; the result is 64 for x == 0.
+func LeadingZeros64(x uint64) int { return 64 - Len64(x) }
+
+// --- TrailingZeros ---
+
+// See http://supertech.csail.mit.edu/papers/debruijn.pdf
+const deBruijn32 = 0x077CB531
+
+var deBruijn32tab = [32]byte{
+	0, 1, 28, 2, 29, 14, 24, 3, 30, 22, 20, 15, 25, 17, 4, 8,
+	31, 27, 13, 23, 21, 19, 16, 7, 26, 12, 18, 6, 11, 5, 10, 9,
+}
+
+const deBruijn64 = 0x03f79d71b4ca8b09
+
+var deBruijn64tab = [64]byte{
+	0, 1, 56, 2, 57, 49, 28, 3, 61, 58, 42, 50, 38, 29, 17, 4,
+	62, 47, 59, 36, 45, 43, 51, 22, 53, 39, 33, 30, 24, 18, 12, 5,
+	63, 55, 48, 27, 60, 41, 37, 16, 46, 35, 44, 21, 52, 32, 23, 11,
+	54, 26, 40, 15, 34, 20, 31, 10, 25, 14, 19, 9, 13, 8, 7, 6,
+}
+
+// TrailingZeros returns the number of trailing zero bits in x; the result is UintSize for x == 0.
+func TrailingZeros(x uint) int {
+	if UintSize == 32 {
+		return TrailingZeros32(uint32(x))
+	}
+	return TrailingZeros64(uint64(x))
+}
+
+// TrailingZeros8 returns the number of trailing zero bits in x; the result is 8 for x == 0.
+func TrailingZeros8(x uint8) int {
+	return int(ntz8tab[x])
+}
+
+// TrailingZeros16 returns the number of trailing zero bits in x; the result is 16 for x == 0.
+func TrailingZeros16(x uint16) int {
+	if x == 0 {
+		return 16
+	}
+	// see comment in TrailingZeros64
+	return int(deBruijn32tab[uint32(x&-x)*deBruijn32>>(32-5)])
+}
+
+// TrailingZeros32 returns the number of trailing zero bits in x; the result is 32 for x == 0.
+func TrailingZeros32(x uint32) int {
+	if x == 0 {
+		return 32
+	}
+	// see comment in TrailingZeros64
+	return int(deBruijn32tab[(x&-x)*deBruijn32>>(32-5)])
+}
+
+// TrailingZeros64 returns the number of trailing zero bits in x; the result is 64 for x == 0.
+func TrailingZeros64(x uint64) int {
+	if x == 0 {
+		return 64
+	}
+	// If popcount is fast, replace code below with return popcount(^x & (x - 1)).
+	//
+	// x & -x leaves only the right-most bit set in the word. Let k be the
+	// index of that bit. Since only a single bit is set, the value is two
+	// to the power of k. Multiplying by a power of two is equivalent to
+	// left shifting, in this case by k bits. The de Bruijn (64 bit) constant
+	// is such that all six bit, consecutive substrings are distinct.
+	// Therefore, if we have a left shifted version of this constant we can
+	// find by how many bits it was shifted by looking at which six bit
+	// substring ended up at the top of the word.
+	// (Knuth, volume 4, section 7.3.1)
+	return int(deBruijn64tab[(x&-x)*deBruijn64>>(64-6)])
+}
+
+// --- OnesCount ---
+
+const m0 = 0x5555555555555555 // 01010101 ...
+const m1 = 0x3333333333333333 // 00110011 ...
+const m2 = 0x0f0f0f0f0f0f0f0f // 00001111 ...
+const m3 = 0x00ff00ff00ff00ff // etc.
+const m4 = 0x0000ffff0000ffff
+
+// OnesCount returns the number of one bits ("population count") in x.
+func OnesCount(x uint) int {
+	if UintSize == 32 {
+		return OnesCount32(uint32(x))
+	}
+	return OnesCount64(uint64(x))
+}
+
+// OnesCount8 returns the number of one bits ("population count") in x.
+func OnesCount8(x uint8) int {
+	return int(pop8tab[x])
+}
+
+// OnesCount16 returns the number of one bits ("population count") in x.
+func OnesCount16(x uint16) int {
+	return int(pop8tab[x>>8] + pop8tab[x&0xff])
+}
+
+// OnesCount32 returns the number of one bits ("population count") in x.
+func OnesCount32(x uint32) int {
+	return int(pop8tab[x>>24] + pop8tab[x>>16&0xff] + pop8tab[x>>8&0xff] + pop8tab[x&0xff])
+}
+
+// OnesCount64 returns the number of one bits ("population count") in x.
+func OnesCount64(x uint64) int {
+	// Implementation: Parallel summing of adjacent bits.
+	// See "Hacker's Delight", Chap. 5: Counting Bits.
+	// The following pattern shows the general approach:
+	//
+	//   x = x>>1&(m0&m) + x&(m0&m)
+	//   x = x>>2&(m1&m) + x&(m1&m)
+	//   x = x>>4&(m2&m) + x&(m2&m)
+	//   x = x>>8&(m3&m) + x&(m3&m)
+	//   x = x>>16&(m4&m) + x&(m4&m)
+	//   x = x>>32&(m5&m) + x&(m5&m)
+	//   return int(x)
+	//
+	// Masking (& operations) can be left away when there's no
+	// danger that a field's sum will carry over into the next
+	// field: Since the result cannot be > 64, 8 bits is enough
+	// and we can ignore the masks for the shifts by 8 and up.
+	// Per "Hacker's Delight", the first line can be simplified
+	// more, but it saves at best one instruction, so we leave
+	// it alone for clarity.
+	const m = 1<<64 - 1
+	x = x>>1&(m0&m) + x&(m0&m)
+	x = x>>2&(m1&m) + x&(m1&m)
+	x = (x>>4 + x) & (m2 & m)
+	x += x >> 8
+	x += x >> 16
+	x += x >> 32
+	return int(x) & (1<<7 - 1)
+}
+
+// --- RotateLeft ---
+
+// RotateLeft returns the value of x rotated left by (k mod UintSize) bits.
+// To rotate x right by k bits, call RotateLeft(x, -k).
+//
+// This function's execution time does not depend on the inputs.
+func RotateLeft(x uint, k int) uint {
+	if UintSize == 32 {
+		return uint(RotateLeft32(uint32(x), k))
+	}
+	return uint(RotateLeft64(uint64(x), k))
+}
+
+// RotateLeft8 returns the value of x rotated left by (k mod 8) bits.
+// To rotate x right by k bits, call RotateLeft8(x, -k).
+//
+// This function's execution time does not depend on the inputs.
+func RotateLeft8(x uint8, k int) uint8 {
+	const n = 8
+	s := uint(k) & (n - 1)
+	return x<<s | x>>(n-s)
+}
+
+// RotateLeft16 returns the value of x rotated left by (k mod 16) bits.
+// To rotate x right by k bits, call RotateLeft16(x, -k).
+//
+// This function's execution time does not depend on the inputs.
+func RotateLeft16(x uint16, k int) uint16 {
+	const n = 16
+	s := uint(k) & (n - 1)
+	return x<<s | x>>(n-s)
+}
+
+// RotateLeft32 returns the value of x rotated left by (k mod 32) bits.
+// To rotate x right by k bits, call RotateLeft32(x, -k).
+//
+// This function's execution time does not depend on the inputs.
+func RotateLeft32(x uint32, k int) uint32 {
+	const n = 32
+	s := uint(k) & (n - 1)
+	return x<<s | x>>(n-s)
+}
+
+// RotateLeft64 returns the value of x rotated left by (k mod 64) bits.
+// To rotate x right by k bits, call RotateLeft64(x, -k).
+//
+// This function's execution time does not depend on the inputs.
+func RotateLeft64(x uint64, k int) uint64 {
+	const n = 64
+	s := uint(k) & (n - 1)
+	return x<<s | x>>(n-s)
+}
+
+// --- Reverse ---
+
+// Reverse returns the value of x with its bits in reversed order.
+func Reverse(x uint) uint {
+	if UintSize == 32 {
+		return uint(Reverse32(uint32(x)))
+	}
+	return uint(Reverse64(uint64(x)))
+}
+
+// Reverse8 returns the value of x with its bits in reversed order.
+func Reverse8(x uint8) uint8 {
+	return rev8tab[x]
+}
+
+// Reverse16 returns the value of x with its bits in reversed order.
+func Reverse16(x uint16) uint16 {
+	return uint16(rev8tab[x>>8]) | uint16(rev8tab[x&0xff])<<8
+}
+
+// Reverse32 returns the value of x with its bits in reversed order.
+func Reverse32(x uint32) uint32 {
+	const m = 1<<32 - 1
+	x = x>>1&(m0&m) | x&(m0&m)<<1
+	x = x>>2&(m1&m) | x&(m1&m)<<2
+	x = x>>4&(m2&m) | x&(m2&m)<<4
+	return ReverseBytes32(x)
+}
+
+// Reverse64 returns the value of x with its bits in reversed order.
+func Reverse64(x uint64) uint64 {
+	const m = 1<<64 - 1
+	x = x>>1&(m0&m) | x&(m0&m)<<1
+	x = x>>2&(m1&m) | x&(m1&m)<<2
+	x = x>>4&(m2&m) | x&(m2&m)<<4
+	return ReverseBytes64(x)
+}
+
+// --- ReverseBytes ---
+
+// ReverseBytes returns the value of x with its bytes in reversed order.
+//
+// This function's execution time does not depend on the inputs.
+func ReverseBytes(x uint) uint {
+	if UintSize == 32 {
+		return uint(ReverseBytes32(uint32(x)))
+	}
+	return uint(ReverseBytes64(uint64(x)))
+}
+
+// ReverseBytes16 returns the value of x with its bytes in reversed order.
+//
+// This function's execution time does not depend on the inputs.
+func ReverseBytes16(x uint16) uint16 {
+	return x>>8 | x<<8
+}
+
+// ReverseBytes32 returns the value of x with its bytes in reversed order.
+//
+// This function's execution time does not depend on the inputs.
+func ReverseBytes32(x uint32) uint32 {
+	const m = 1<<32 - 1
+	x = x>>8&(m3&m) | x&(m3&m)<<8
+	return x>>16 | x<<16
+}
+
+// ReverseBytes64 returns the value of x with its bytes in reversed order.
+//
+// This function's execution time does not depend on the inputs.
+func ReverseBytes64(x uint64) uint64 {
+	const m = 1<<64 - 1
+	x = x>>8&(m3&m) | x&(m3&m)<<8
+	x = x>>16&(m4&m) | x&(m4&m)<<16
+	return x>>32 | x<<32
+}
+
+// --- Len ---
+
+// Len returns the minimum number of bits required to represent x; the result is 0 for x == 0.
+func Len(x uint) int {
+	if UintSize == 32 {
+		return Len32(uint32(x))
+	}
+	return Len64(uint64(x))
+}
+
+// Len8 returns the minimum number of bits required to represent x; the result is 0 for x == 0.
+func Len8(x uint8) int {
+	return int(len8tab[x])
+}
+
+// Len16 returns the minimum number of bits required to represent x; the result is 0 for x == 0.
+func Len16(x uint16) (n int) {
+	if x >= 1<<8 {
+		x >>= 8
+		n = 8
+	}
+	return n + int(len8tab[x])
+}
+
+// Len32 returns the minimum number of bits required to represent x; the result is 0 for x == 0.
+func Len32(x uint32) (n int) {
+	if x >= 1<<16 {
+		x >>= 16
+		n = 16
+	}
+	if x >= 1<<8 {
+		x >>= 8
+		n += 8
+	}
+	return n + int(len8tab[x])
+}
+
+// Len64 returns the minimum number of bits required to represent x; the result is 0 for x == 0.
+func Len64(x uint64) (n int) {
+	if x >= 1<<32 {
+		x >>= 32
+		n = 32
+	}
+	if x >= 1<<16 {
+		x >>= 16
+		n += 16
+	}
+	if x >= 1<<8 {
+		x >>= 8
+		n += 8
+	}
+	return n + int(len8tab[x])
+}
+
+// --- Add with carry ---
+
+// Add returns the sum with carry of x, y and carry: sum = x + y + carry.
+// The carry input must be 0 or 1; otherwise the behavior is undefined.
+// The carryOut output is guaranteed to be 0 or 1.
+//
+// This function's execution time does not depend on the inputs.
+func Add(x, y, carry uint) (sum, carryOut uint) {
+	if UintSize == 32 {
+		s32, c32 := Add32(uint32(x), uint32(y), uint32(carry))
+		return uint(s32), uint(c32)
+	}
+	s64, c64 := Add64(uint64(x), uint64(y), uint64(carry))
+	return uint(s64), uint(c64)
+}
+
+// Add32 returns the sum with carry of x, y and carry: sum = x + y + carry.
+// The carry input must be 0 or 1; otherwise the behavior is undefined.
+// The carryOut output is guaranteed to be 0 or 1.
+//
+// This function's execution time does not depend on the inputs.
+func Add32(x, y, carry uint32) (sum, carryOut uint32) {
+	sum64 := uint64(x) + uint64(y) + uint64(carry)
+	sum = uint32(sum64)
+	carryOut = uint32(sum64 >> 32)
+	return
+}
+
+// Add64 returns the sum with carry of x, y and carry: sum = x + y + carry.
+// The carry input must be 0 or 1; otherwise the behavior is undefined.
+// The carryOut output is guaranteed to be 0 or 1.
+//
+// This function's execution time does not depend on the inputs.
+func Add64(x, y, carry uint64) (sum, carryOut uint64) {
+	sum = x + y + carry
+	// The sum will overflow if both top bits are set (x & y) or if one of them
+	// is (x | y), and a carry from the lower place happened. If such a carry
+	// happens, the top bit will be 1 + 0 + 1 = 0 (&^ sum).
+	carryOut = ((x & y) | ((x | y) &^ sum)) >> 63
+	return
+}
+
+// --- Subtract with borrow ---
+
+// Sub returns the difference of x, y and borrow: diff = x - y - borrow.
+// The borrow input must be 0 or 1; otherwise the behavior is undefined.
+// The borrowOut output is guaranteed to be 0 or 1.
+//
+// This function's execution time does not depend on the inputs.
+func Sub(x, y, borrow uint) (diff, borrowOut uint) {
+	if UintSize == 32 {
+		d32, b32 := Sub32(uint32(x), uint32(y), uint32(borrow))
+		return uint(d32), uint(b32)
+	}
+	d64, b64 := Sub64(uint64(x), uint64(y), uint64(borrow))
+	return uint(d64), uint(b64)
+}
+
+// Sub32 returns the difference of x, y and borrow, diff = x - y - borrow.
+// The borrow input must be 0 or 1; otherwise the behavior is undefined.
+// The borrowOut output is guaranteed to be 0 or 1.
+//
+// This function's execution time does not depend on the inputs.
+func Sub32(x, y, borrow uint32) (diff, borrowOut uint32) {
+	diff = x - y - borrow
+	// The difference will underflow if the top bit of x is not set and the top
+	// bit of y is set (^x & y) or if they are the same (^(x ^ y)) and a borrow
+	// from the lower place happens. If that borrow happens, the result will be
+	// 1 - 1 - 1 = 0 - 0 - 1 = 1 (& diff).
+	borrowOut = ((^x & y) | (^(x ^ y) & diff)) >> 31
+	return
+}
+
+// Sub64 returns the difference of x, y and borrow: diff = x - y - borrow.
+// The borrow input must be 0 or 1; otherwise the behavior is undefined.
+// The borrowOut output is guaranteed to be 0 or 1.
+//
+// This function's execution time does not depend on the inputs.
+func Sub64(x, y, borrow uint64) (diff, borrowOut uint64) {
+	diff = x - y - borrow
+	// See Sub32 for the bit logic.
+	borrowOut = ((^x & y) | (^(x ^ y) & diff)) >> 63
+	return
+}
+
+// --- Full-width multiply ---
+
+// Mul returns the full-width product of x and y: (hi, lo) = x * y
+// with the product bits' upper half returned in hi and the lower
+// half returned in lo.
+//
+// This function's execution time does not depend on the inputs.
+func Mul(x, y uint) (hi, lo uint) {
+	if UintSize == 32 {
+		h, l := Mul32(uint32(x), uint32(y))
+		return uint(h), uint(l)
+	}
+	h, l := Mul64(uint64(x), uint64(y))
+	return uint(h), uint(l)
+}
+
+// Mul32 returns the 64-bit product of x and y: (hi, lo) = x * y
+// with the product bits' upper half returned in hi and the lower
+// half returned in lo.
+//
+// This function's execution time does not depend on the inputs.
+func Mul32(x, y uint32) (hi, lo uint32) {
+	tmp := uint64(x) * uint64(y)
+	hi, lo = uint32(tmp>>32), uint32(tmp)
+	return
+}
+
+// Mul64 returns the 128-bit product of x and y: (hi, lo) = x * y
+// with the product bits' upper half returned in hi and the lower
+// half returned in lo.
+//
+// This function's execution time does not depend on the inputs.
+func Mul64(x, y uint64) (hi, lo uint64) {
+	const mask32 = 1<<32 - 1
+	x0 := x & mask32
+	x1 := x >> 32
+	y0 := y & mask32
+	y1 := y >> 32
+	w0 := x0 * y0
+	t := x1*y0 + w0>>32
+	w1 := t & mask32
+	w2 := t >> 32
+	w1 += x0 * y1
+	hi = x1*y1 + w2 + w1>>32
+	lo = x * y
+	return
+}
+
+// --- Full-width divide ---
+
+// Div returns the quotient and remainder of (hi, lo) divided by y:
+// quo = (hi, lo)/y, rem = (hi, lo)%y with the dividend bits' upper
+// half in parameter hi and the lower half in parameter lo.
+// Div panics for y == 0 (division by zero) or y <= hi (quotient overflow).
+func Div(hi, lo, y uint) (quo, rem uint) {
+	if UintSize == 32 {
+		q, r := Div32(uint32(hi), uint32(lo), uint32(y))
+		return uint(q), uint(r)
+	}
+	q, r := Div64(uint64(hi), uint64(lo), uint64(y))
+	return uint(q), uint(r)
+}
+
+// Div32 returns the quotient and remainder of (hi, lo) divided by y:
+// quo = (hi, lo)/y, rem = (hi, lo)%y with the dividend bits' upper
+// half in parameter hi and the lower half in parameter lo.
+// Div32 panics for y == 0 (division by zero) or y <= hi (quotient overflow).
+func Div32(hi, lo, y uint32) (quo, rem uint32) {
+	if y != 0 && y <= hi {
+		panic(overflowError)
+	}
+	z := uint64(hi)<<32 | uint64(lo)
+	quo, rem = uint32(z/uint64(y)), uint32(z%uint64(y))
+	return
+}
+
+// Div64 returns the quotient and remainder of (hi, lo) divided by y:
+// quo = (hi, lo)/y, rem = (hi, lo)%y with the dividend bits' upper
+// half in parameter hi and the lower half in parameter lo.
+// Div64 panics for y == 0 (division by zero) or y <= hi (quotient overflow).
+func Div64(hi, lo, y uint64) (quo, rem uint64) {
+	if y == 0 {
+		panic(divideError)
+	}
+	if y <= hi {
+		panic(overflowError)
+	}
+
+	// If high part is zero, we can directly return the results.
+	if hi == 0 {
+		return lo / y, lo % y
+	}
+
+	s := uint(LeadingZeros64(y))
+	y <<= s
+
+	const (
+		two32  = 1 << 32
+		mask32 = two32 - 1
+	)
+	yn1 := y >> 32
+	yn0 := y & mask32
+	un32 := hi<<s | lo>>(64-s)
+	un10 := lo << s
+	un1 := un10 >> 32
+	un0 := un10 & mask32
+	q1 := un32 / yn1
+	rhat := un32 - q1*yn1
+
+	for q1 >= two32 || q1*yn0 > two32*rhat+un1 {
+		q1--
+		rhat += yn1
+		if rhat >= two32 {
+			break
+		}
+	}
+
+	un21 := un32*two32 + un1 - q1*y
+	q0 := un21 / yn1
+	rhat = un21 - q0*yn1
+
+	for q0 >= two32 || q0*yn0 > two32*rhat+un0 {
+		q0--
+		rhat += yn1
+		if rhat >= two32 {
+			break
+		}
+	}
+
+	return q1*two32 + q0, (un21*two32 + un0 - q0*y) >> s
+}
+
+// Rem returns the remainder of (hi, lo) divided by y. Rem panics for
+// y == 0 (division by zero) but, unlike Div, it doesn't panic on a
+// quotient overflow.
+func Rem(hi, lo, y uint) uint {
+	if UintSize == 32 {
+		return uint(Rem32(uint32(hi), uint32(lo), uint32(y)))
+	}
+	return uint(Rem64(uint64(hi), uint64(lo), uint64(y)))
+}
+
+// Rem32 returns the remainder of (hi, lo) divided by y. Rem32 panics
+// for y == 0 (division by zero) but, unlike Div32, it doesn't panic
+// on a quotient overflow.
+func Rem32(hi, lo, y uint32) uint32 {
+	return uint32((uint64(hi)<<32 | uint64(lo)) % uint64(y))
+}
+
+// Rem64 returns the remainder of (hi, lo) divided by y. Rem64 panics
+// for y == 0 (division by zero) but, unlike Div64, it doesn't panic
+// on a quotient overflow.
+func Rem64(hi, lo, y uint64) uint64 {
+	// We scale down hi so that hi < y, then use Div64 to compute the
+	// rem with the guarantee that it won't panic on quotient overflow.
+	// Given that
+	//   hi ≡ hi%y    (mod y)
+	// we have
+	//   hi<<64 + lo ≡ (hi%y)<<64 + lo    (mod y)
+	_, rem := Div64(hi%y, lo, y)
+	return rem
+}
diff --git a/src/math/bits/bits_errors.go b/src/math/bits/bits_errors.go
new file mode 100644
index 0000000..61cb5c9
--- /dev/null
+++ b/src/math/bits/bits_errors.go
@@ -0,0 +1,16 @@
+// Copyright 2019 The Go Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+//go:build !compiler_bootstrap
+// +build !compiler_bootstrap
+
+package bits
+
+import _ "unsafe"
+
+//go:linkname overflowError runtime.overflowError
+var overflowError error
+
+//go:linkname divideError runtime.divideError
+var divideError error
diff --git a/src/math/bits/bits_errors_bootstrap.go b/src/math/bits/bits_errors_bootstrap.go
new file mode 100644
index 0000000..4d610d3
--- /dev/null
+++ b/src/math/bits/bits_errors_bootstrap.go
@@ -0,0 +1,23 @@
+// Copyright 2019 The Go Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+//go:build compiler_bootstrap
+// +build compiler_bootstrap
+
+// This version used only for bootstrap (on this path we want
+// to avoid use of go:linkname as applied to variables).
+
+package bits
+
+type errorString string
+
+func (e errorString) RuntimeError() {}
+
+func (e errorString) Error() string {
+	return "runtime error: " + string(e)
+}
+
+var overflowError = error(errorString("integer overflow"))
+
+var divideError = error(errorString("integer divide by zero"))
diff --git a/src/math/bits/bits_tables.go b/src/math/bits/bits_tables.go
new file mode 100644
index 0000000..f869b8d
--- /dev/null
+++ b/src/math/bits/bits_tables.go
@@ -0,0 +1,79 @@
+// Copyright 2017 The Go Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+// Code generated by go run make_tables.go. DO NOT EDIT.
+
+package bits
+
+const ntz8tab = "" +
+	"\x08\x00\x01\x00\x02\x00\x01\x00\x03\x00\x01\x00\x02\x00\x01\x00" +
+	"\x04\x00\x01\x00\x02\x00\x01\x00\x03\x00\x01\x00\x02\x00\x01\x00" +
+	"\x05\x00\x01\x00\x02\x00\x01\x00\x03\x00\x01\x00\x02\x00\x01\x00" +
+	"\x04\x00\x01\x00\x02\x00\x01\x00\x03\x00\x01\x00\x02\x00\x01\x00" +
+	"\x06\x00\x01\x00\x02\x00\x01\x00\x03\x00\x01\x00\x02\x00\x01\x00" +
+	"\x04\x00\x01\x00\x02\x00\x01\x00\x03\x00\x01\x00\x02\x00\x01\x00" +
+	"\x05\x00\x01\x00\x02\x00\x01\x00\x03\x00\x01\x00\x02\x00\x01\x00" +
+	"\x04\x00\x01\x00\x02\x00\x01\x00\x03\x00\x01\x00\x02\x00\x01\x00" +
+	"\x07\x00\x01\x00\x02\x00\x01\x00\x03\x00\x01\x00\x02\x00\x01\x00" +
+	"\x04\x00\x01\x00\x02\x00\x01\x00\x03\x00\x01\x00\x02\x00\x01\x00" +
+	"\x05\x00\x01\x00\x02\x00\x01\x00\x03\x00\x01\x00\x02\x00\x01\x00" +
+	"\x04\x00\x01\x00\x02\x00\x01\x00\x03\x00\x01\x00\x02\x00\x01\x00" +
+	"\x06\x00\x01\x00\x02\x00\x01\x00\x03\x00\x01\x00\x02\x00\x01\x00" +
+	"\x04\x00\x01\x00\x02\x00\x01\x00\x03\x00\x01\x00\x02\x00\x01\x00" +
+	"\x05\x00\x01\x00\x02\x00\x01\x00\x03\x00\x01\x00\x02\x00\x01\x00" +
+	"\x04\x00\x01\x00\x02\x00\x01\x00\x03\x00\x01\x00\x02\x00\x01\x00"
+
+const pop8tab = "" +
+	"\x00\x01\x01\x02\x01\x02\x02\x03\x01\x02\x02\x03\x02\x03\x03\x04" +
+	"\x01\x02\x02\x03\x02\x03\x03\x04\x02\x03\x03\x04\x03\x04\x04\x05" +
+	"\x01\x02\x02\x03\x02\x03\x03\x04\x02\x03\x03\x04\x03\x04\x04\x05" +
+	"\x02\x03\x03\x04\x03\x04\x04\x05\x03\x04\x04\x05\x04\x05\x05\x06" +
+	"\x01\x02\x02\x03\x02\x03\x03\x04\x02\x03\x03\x04\x03\x04\x04\x05" +
+	"\x02\x03\x03\x04\x03\x04\x04\x05\x03\x04\x04\x05\x04\x05\x05\x06" +
+	"\x02\x03\x03\x04\x03\x04\x04\x05\x03\x04\x04\x05\x04\x05\x05\x06" +
+	"\x03\x04\x04\x05\x04\x05\x05\x06\x04\x05\x05\x06\x05\x06\x06\x07" +
+	"\x01\x02\x02\x03\x02\x03\x03\x04\x02\x03\x03\x04\x03\x04\x04\x05" +
+	"\x02\x03\x03\x04\x03\x04\x04\x05\x03\x04\x04\x05\x04\x05\x05\x06" +
+	"\x02\x03\x03\x04\x03\x04\x04\x05\x03\x04\x04\x05\x04\x05\x05\x06" +
+	"\x03\x04\x04\x05\x04\x05\x05\x06\x04\x05\x05\x06\x05\x06\x06\x07" +
+	"\x02\x03\x03\x04\x03\x04\x04\x05\x03\x04\x04\x05\x04\x05\x05\x06" +
+	"\x03\x04\x04\x05\x04\x05\x05\x06\x04\x05\x05\x06\x05\x06\x06\x07" +
+	"\x03\x04\x04\x05\x04\x05\x05\x06\x04\x05\x05\x06\x05\x06\x06\x07" +
+	"\x04\x05\x05\x06\x05\x06\x06\x07\x05\x06\x06\x07\x06\x07\x07\x08"
+
+const rev8tab = "" +
+	"\x00\x80\x40\xc0\x20\xa0\x60\xe0\x10\x90\x50\xd0\x30\xb0\x70\xf0" +
+	"\x08\x88\x48\xc8\x28\xa8\x68\xe8\x18\x98\x58\xd8\x38\xb8\x78\xf8" +
+	"\x04\x84\x44\xc4\x24\xa4\x64\xe4\x14\x94\x54\xd4\x34\xb4\x74\xf4" +
+	"\x0c\x8c\x4c\xcc\x2c\xac\x6c\xec\x1c\x9c\x5c\xdc\x3c\xbc\x7c\xfc" +
+	"\x02\x82\x42\xc2\x22\xa2\x62\xe2\x12\x92\x52\xd2\x32\xb2\x72\xf2" +
+	"\x0a\x8a\x4a\xca\x2a\xaa\x6a\xea\x1a\x9a\x5a\xda\x3a\xba\x7a\xfa" +
+	"\x06\x86\x46\xc6\x26\xa6\x66\xe6\x16\x96\x56\xd6\x36\xb6\x76\xf6" +
+	"\x0e\x8e\x4e\xce\x2e\xae\x6e\xee\x1e\x9e\x5e\xde\x3e\xbe\x7e\xfe" +
+	"\x01\x81\x41\xc1\x21\xa1\x61\xe1\x11\x91\x51\xd1\x31\xb1\x71\xf1" +
+	"\x09\x89\x49\xc9\x29\xa9\x69\xe9\x19\x99\x59\xd9\x39\xb9\x79\xf9" +
+	"\x05\x85\x45\xc5\x25\xa5\x65\xe5\x15\x95\x55\xd5\x35\xb5\x75\xf5" +
+	"\x0d\x8d\x4d\xcd\x2d\xad\x6d\xed\x1d\x9d\x5d\xdd\x3d\xbd\x7d\xfd" +
+	"\x03\x83\x43\xc3\x23\xa3\x63\xe3\x13\x93\x53\xd3\x33\xb3\x73\xf3" +
+	"\x0b\x8b\x4b\xcb\x2b\xab\x6b\xeb\x1b\x9b\x5b\xdb\x3b\xbb\x7b\xfb" +
+	"\x07\x87\x47\xc7\x27\xa7\x67\xe7\x17\x97\x57\xd7\x37\xb7\x77\xf7" +
+	"\x0f\x8f\x4f\xcf\x2f\xaf\x6f\xef\x1f\x9f\x5f\xdf\x3f\xbf\x7f\xff"
+
+const len8tab = "" +
+	"\x00\x01\x02\x02\x03\x03\x03\x03\x04\x04\x04\x04\x04\x04\x04\x04" +
+	"\x05\x05\x05\x05\x05\x05\x05\x05\x05\x05\x05\x05\x05\x05\x05\x05" +
+	"\x06\x06\x06\x06\x06\x06\x06\x06\x06\x06\x06\x06\x06\x06\x06\x06" +
+	"\x06\x06\x06\x06\x06\x06\x06\x06\x06\x06\x06\x06\x06\x06\x06\x06" +
+	"\x07\x07\x07\x07\x07\x07\x07\x07\x07\x07\x07\x07\x07\x07\x07\x07" +
+	"\x07\x07\x07\x07\x07\x07\x07\x07\x07\x07\x07\x07\x07\x07\x07\x07" +
+	"\x07\x07\x07\x07\x07\x07\x07\x07\x07\x07\x07\x07\x07\x07\x07\x07" +
+	"\x07\x07\x07\x07\x07\x07\x07\x07\x07\x07\x07\x07\x07\x07\x07\x07" +
+	"\x08\x08\x08\x08\x08\x08\x08\x08\x08\x08\x08\x08\x08\x08\x08\x08" +
+	"\x08\x08\x08\x08\x08\x08\x08\x08\x08\x08\x08\x08\x08\x08\x08\x08" +
+	"\x08\x08\x08\x08\x08\x08\x08\x08\x08\x08\x08\x08\x08\x08\x08\x08" +
+	"\x08\x08\x08\x08\x08\x08\x08\x08\x08\x08\x08\x08\x08\x08\x08\x08" +
+	"\x08\x08\x08\x08\x08\x08\x08\x08\x08\x08\x08\x08\x08\x08\x08\x08" +
+	"\x08\x08\x08\x08\x08\x08\x08\x08\x08\x08\x08\x08\x08\x08\x08\x08" +
+	"\x08\x08\x08\x08\x08\x08\x08\x08\x08\x08\x08\x08\x08\x08\x08\x08" +
+	"\x08\x08\x08\x08\x08\x08\x08\x08\x08\x08\x08\x08\x08\x08\x08\x08"
diff --git a/src/math/bits/bits_test.go b/src/math/bits/bits_test.go
new file mode 100644
index 0000000..23b4539
--- /dev/null
+++ b/src/math/bits/bits_test.go
@@ -0,0 +1,1347 @@
+// Copyright 2017 The Go Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+package bits_test
+
+import (
+	. "math/bits"
+	"runtime"
+	"testing"
+	"unsafe"
+)
+
+func TestUintSize(t *testing.T) {
+	var x uint
+	if want := unsafe.Sizeof(x) * 8; UintSize != want {
+		t.Fatalf("UintSize = %d; want %d", UintSize, want)
+	}
+}
+
+func TestLeadingZeros(t *testing.T) {
+	for i := 0; i < 256; i++ {
+		nlz := tab[i].nlz
+		for k := 0; k < 64-8; k++ {
+			x := uint64(i) << uint(k)
+			if x <= 1<<8-1 {
+				got := LeadingZeros8(uint8(x))
+				want := nlz - k + (8 - 8)
+				if x == 0 {
+					want = 8
+				}
+				if got != want {
+					t.Fatalf("LeadingZeros8(%#02x) == %d; want %d", x, got, want)
+				}
+			}
+
+			if x <= 1<<16-1 {
+				got := LeadingZeros16(uint16(x))
+				want := nlz - k + (16 - 8)
+				if x == 0 {
+					want = 16
+				}
+				if got != want {
+					t.Fatalf("LeadingZeros16(%#04x) == %d; want %d", x, got, want)
+				}
+			}
+
+			if x <= 1<<32-1 {
+				got := LeadingZeros32(uint32(x))
+				want := nlz - k + (32 - 8)
+				if x == 0 {
+					want = 32
+				}
+				if got != want {
+					t.Fatalf("LeadingZeros32(%#08x) == %d; want %d", x, got, want)
+				}
+				if UintSize == 32 {
+					got = LeadingZeros(uint(x))
+					if got != want {
+						t.Fatalf("LeadingZeros(%#08x) == %d; want %d", x, got, want)
+					}
+				}
+			}
+
+			if x <= 1<<64-1 {
+				got := LeadingZeros64(uint64(x))
+				want := nlz - k + (64 - 8)
+				if x == 0 {
+					want = 64
+				}
+				if got != want {
+					t.Fatalf("LeadingZeros64(%#016x) == %d; want %d", x, got, want)
+				}
+				if UintSize == 64 {
+					got = LeadingZeros(uint(x))
+					if got != want {
+						t.Fatalf("LeadingZeros(%#016x) == %d; want %d", x, got, want)
+					}
+				}
+			}
+		}
+	}
+}
+
+// Exported (global) variable serving as input for some
+// of the benchmarks to ensure side-effect free calls
+// are not optimized away.
+var Input uint64 = DeBruijn64
+
+// Exported (global) variable to store function results
+// during benchmarking to ensure side-effect free calls
+// are not optimized away.
+var Output int
+
+func BenchmarkLeadingZeros(b *testing.B) {
+	var s int
+	for i := 0; i < b.N; i++ {
+		s += LeadingZeros(uint(Input) >> (uint(i) % UintSize))
+	}
+	Output = s
+}
+
+func BenchmarkLeadingZeros8(b *testing.B) {
+	var s int
+	for i := 0; i < b.N; i++ {
+		s += LeadingZeros8(uint8(Input) >> (uint(i) % 8))
+	}
+	Output = s
+}
+
+func BenchmarkLeadingZeros16(b *testing.B) {
+	var s int
+	for i := 0; i < b.N; i++ {
+		s += LeadingZeros16(uint16(Input) >> (uint(i) % 16))
+	}
+	Output = s
+}
+
+func BenchmarkLeadingZeros32(b *testing.B) {
+	var s int
+	for i := 0; i < b.N; i++ {
+		s += LeadingZeros32(uint32(Input) >> (uint(i) % 32))
+	}
+	Output = s
+}
+
+func BenchmarkLeadingZeros64(b *testing.B) {
+	var s int
+	for i := 0; i < b.N; i++ {
+		s += LeadingZeros64(uint64(Input) >> (uint(i) % 64))
+	}
+	Output = s
+}
+
+func TestTrailingZeros(t *testing.T) {
+	for i := 0; i < 256; i++ {
+		ntz := tab[i].ntz
+		for k := 0; k < 64-8; k++ {
+			x := uint64(i) << uint(k)
+			want := ntz + k
+			if x <= 1<<8-1 {
+				got := TrailingZeros8(uint8(x))
+				if x == 0 {
+					want = 8
+				}
+				if got != want {
+					t.Fatalf("TrailingZeros8(%#02x) == %d; want %d", x, got, want)
+				}
+			}
+
+			if x <= 1<<16-1 {
+				got := TrailingZeros16(uint16(x))
+				if x == 0 {
+					want = 16
+				}
+				if got != want {
+					t.Fatalf("TrailingZeros16(%#04x) == %d; want %d", x, got, want)
+				}
+			}
+
+			if x <= 1<<32-1 {
+				got := TrailingZeros32(uint32(x))
+				if x == 0 {
+					want = 32
+				}
+				if got != want {
+					t.Fatalf("TrailingZeros32(%#08x) == %d; want %d", x, got, want)
+				}
+				if UintSize == 32 {
+					got = TrailingZeros(uint(x))
+					if got != want {
+						t.Fatalf("TrailingZeros(%#08x) == %d; want %d", x, got, want)
+					}
+				}
+			}
+
+			if x <= 1<<64-1 {
+				got := TrailingZeros64(uint64(x))
+				if x == 0 {
+					want = 64
+				}
+				if got != want {
+					t.Fatalf("TrailingZeros64(%#016x) == %d; want %d", x, got, want)
+				}
+				if UintSize == 64 {
+					got = TrailingZeros(uint(x))
+					if got != want {
+						t.Fatalf("TrailingZeros(%#016x) == %d; want %d", x, got, want)
+					}
+				}
+			}
+		}
+	}
+}
+
+func BenchmarkTrailingZeros(b *testing.B) {
+	var s int
+	for i := 0; i < b.N; i++ {
+		s += TrailingZeros(uint(Input) << (uint(i) % UintSize))
+	}
+	Output = s
+}
+
+func BenchmarkTrailingZeros8(b *testing.B) {
+	var s int
+	for i := 0; i < b.N; i++ {
+		s += TrailingZeros8(uint8(Input) << (uint(i) % 8))
+	}
+	Output = s
+}
+
+func BenchmarkTrailingZeros16(b *testing.B) {
+	var s int
+	for i := 0; i < b.N; i++ {
+		s += TrailingZeros16(uint16(Input) << (uint(i) % 16))
+	}
+	Output = s
+}
+
+func BenchmarkTrailingZeros32(b *testing.B) {
+	var s int
+	for i := 0; i < b.N; i++ {
+		s += TrailingZeros32(uint32(Input) << (uint(i) % 32))
+	}
+	Output = s
+}
+
+func BenchmarkTrailingZeros64(b *testing.B) {
+	var s int
+	for i := 0; i < b.N; i++ {
+		s += TrailingZeros64(uint64(Input) << (uint(i) % 64))
+	}
+	Output = s
+}
+
+func TestOnesCount(t *testing.T) {
+	var x uint64
+	for i := 0; i <= 64; i++ {
+		testOnesCount(t, x, i)
+		x = x<<1 | 1
+	}
+
+	for i := 64; i >= 0; i-- {
+		testOnesCount(t, x, i)
+		x = x << 1
+	}
+
+	for i := 0; i < 256; i++ {
+		for k := 0; k < 64-8; k++ {
+			testOnesCount(t, uint64(i)<<uint(k), tab[i].pop)
+		}
+	}
+}
+
+func testOnesCount(t *testing.T, x uint64, want int) {
+	if x <= 1<<8-1 {
+		got := OnesCount8(uint8(x))
+		if got != want {
+			t.Fatalf("OnesCount8(%#02x) == %d; want %d", uint8(x), got, want)
+		}
+	}
+
+	if x <= 1<<16-1 {
+		got := OnesCount16(uint16(x))
+		if got != want {
+			t.Fatalf("OnesCount16(%#04x) == %d; want %d", uint16(x), got, want)
+		}
+	}
+
+	if x <= 1<<32-1 {
+		got := OnesCount32(uint32(x))
+		if got != want {
+			t.Fatalf("OnesCount32(%#08x) == %d; want %d", uint32(x), got, want)
+		}
+		if UintSize == 32 {
+			got = OnesCount(uint(x))
+			if got != want {
+				t.Fatalf("OnesCount(%#08x) == %d; want %d", uint32(x), got, want)
+			}
+		}
+	}
+
+	if x <= 1<<64-1 {
+		got := OnesCount64(uint64(x))
+		if got != want {
+			t.Fatalf("OnesCount64(%#016x) == %d; want %d", x, got, want)
+		}
+		if UintSize == 64 {
+			got = OnesCount(uint(x))
+			if got != want {
+				t.Fatalf("OnesCount(%#016x) == %d; want %d", x, got, want)
+			}
+		}
+	}
+}
+
+func BenchmarkOnesCount(b *testing.B) {
+	var s int
+	for i := 0; i < b.N; i++ {
+		s += OnesCount(uint(Input))
+	}
+	Output = s
+}
+
+func BenchmarkOnesCount8(b *testing.B) {
+	var s int
+	for i := 0; i < b.N; i++ {
+		s += OnesCount8(uint8(Input))
+	}
+	Output = s
+}
+
+func BenchmarkOnesCount16(b *testing.B) {
+	var s int
+	for i := 0; i < b.N; i++ {
+		s += OnesCount16(uint16(Input))
+	}
+	Output = s
+}
+
+func BenchmarkOnesCount32(b *testing.B) {
+	var s int
+	for i := 0; i < b.N; i++ {
+		s += OnesCount32(uint32(Input))
+	}
+	Output = s
+}
+
+func BenchmarkOnesCount64(b *testing.B) {
+	var s int
+	for i := 0; i < b.N; i++ {
+		s += OnesCount64(uint64(Input))
+	}
+	Output = s
+}
+
+func TestRotateLeft(t *testing.T) {
+	var m uint64 = DeBruijn64
+
+	for k := uint(0); k < 128; k++ {
+		x8 := uint8(m)
+		got8 := RotateLeft8(x8, int(k))
+		want8 := x8<<(k&0x7) | x8>>(8-k&0x7)
+		if got8 != want8 {
+			t.Fatalf("RotateLeft8(%#02x, %d) == %#02x; want %#02x", x8, k, got8, want8)
+		}
+		got8 = RotateLeft8(want8, -int(k))
+		if got8 != x8 {
+			t.Fatalf("RotateLeft8(%#02x, -%d) == %#02x; want %#02x", want8, k, got8, x8)
+		}
+
+		x16 := uint16(m)
+		got16 := RotateLeft16(x16, int(k))
+		want16 := x16<<(k&0xf) | x16>>(16-k&0xf)
+		if got16 != want16 {
+			t.Fatalf("RotateLeft16(%#04x, %d) == %#04x; want %#04x", x16, k, got16, want16)
+		}
+		got16 = RotateLeft16(want16, -int(k))
+		if got16 != x16 {
+			t.Fatalf("RotateLeft16(%#04x, -%d) == %#04x; want %#04x", want16, k, got16, x16)
+		}
+
+		x32 := uint32(m)
+		got32 := RotateLeft32(x32, int(k))
+		want32 := x32<<(k&0x1f) | x32>>(32-k&0x1f)
+		if got32 != want32 {
+			t.Fatalf("RotateLeft32(%#08x, %d) == %#08x; want %#08x", x32, k, got32, want32)
+		}
+		got32 = RotateLeft32(want32, -int(k))
+		if got32 != x32 {
+			t.Fatalf("RotateLeft32(%#08x, -%d) == %#08x; want %#08x", want32, k, got32, x32)
+		}
+		if UintSize == 32 {
+			x := uint(m)
+			got := RotateLeft(x, int(k))
+			want := x<<(k&0x1f) | x>>(32-k&0x1f)
+			if got != want {
+				t.Fatalf("RotateLeft(%#08x, %d) == %#08x; want %#08x", x, k, got, want)
+			}
+			got = RotateLeft(want, -int(k))
+			if got != x {
+				t.Fatalf("RotateLeft(%#08x, -%d) == %#08x; want %#08x", want, k, got, x)
+			}
+		}
+
+		x64 := uint64(m)
+		got64 := RotateLeft64(x64, int(k))
+		want64 := x64<<(k&0x3f) | x64>>(64-k&0x3f)
+		if got64 != want64 {
+			t.Fatalf("RotateLeft64(%#016x, %d) == %#016x; want %#016x", x64, k, got64, want64)
+		}
+		got64 = RotateLeft64(want64, -int(k))
+		if got64 != x64 {
+			t.Fatalf("RotateLeft64(%#016x, -%d) == %#016x; want %#016x", want64, k, got64, x64)
+		}
+		if UintSize == 64 {
+			x := uint(m)
+			got := RotateLeft(x, int(k))
+			want := x<<(k&0x3f) | x>>(64-k&0x3f)
+			if got != want {
+				t.Fatalf("RotateLeft(%#016x, %d) == %#016x; want %#016x", x, k, got, want)
+			}
+			got = RotateLeft(want, -int(k))
+			if got != x {
+				t.Fatalf("RotateLeft(%#08x, -%d) == %#08x; want %#08x", want, k, got, x)
+			}
+		}
+	}
+}
+
+func BenchmarkRotateLeft(b *testing.B) {
+	var s uint
+	for i := 0; i < b.N; i++ {
+		s += RotateLeft(uint(Input), i)
+	}
+	Output = int(s)
+}
+
+func BenchmarkRotateLeft8(b *testing.B) {
+	var s uint8
+	for i := 0; i < b.N; i++ {
+		s += RotateLeft8(uint8(Input), i)
+	}
+	Output = int(s)
+}
+
+func BenchmarkRotateLeft16(b *testing.B) {
+	var s uint16
+	for i := 0; i < b.N; i++ {
+		s += RotateLeft16(uint16(Input), i)
+	}
+	Output = int(s)
+}
+
+func BenchmarkRotateLeft32(b *testing.B) {
+	var s uint32
+	for i := 0; i < b.N; i++ {
+		s += RotateLeft32(uint32(Input), i)
+	}
+	Output = int(s)
+}
+
+func BenchmarkRotateLeft64(b *testing.B) {
+	var s uint64
+	for i := 0; i < b.N; i++ {
+		s += RotateLeft64(uint64(Input), i)
+	}
+	Output = int(s)
+}
+
+func TestReverse(t *testing.T) {
+	// test each bit
+	for i := uint(0); i < 64; i++ {
+		testReverse(t, uint64(1)<<i, uint64(1)<<(63-i))
+	}
+
+	// test a few patterns
+	for _, test := range []struct {
+		x, r uint64
+	}{
+		{0, 0},
+		{0x1, 0x8 << 60},
+		{0x2, 0x4 << 60},
+		{0x3, 0xc << 60},
+		{0x4, 0x2 << 60},
+		{0x5, 0xa << 60},
+		{0x6, 0x6 << 60},
+		{0x7, 0xe << 60},
+		{0x8, 0x1 << 60},
+		{0x9, 0x9 << 60},
+		{0xa, 0x5 << 60},
+		{0xb, 0xd << 60},
+		{0xc, 0x3 << 60},
+		{0xd, 0xb << 60},
+		{0xe, 0x7 << 60},
+		{0xf, 0xf << 60},
+		{0x5686487, 0xe12616a000000000},
+		{0x0123456789abcdef, 0xf7b3d591e6a2c480},
+	} {
+		testReverse(t, test.x, test.r)
+		testReverse(t, test.r, test.x)
+	}
+}
+
+func testReverse(t *testing.T, x64, want64 uint64) {
+	x8 := uint8(x64)
+	got8 := Reverse8(x8)
+	want8 := uint8(want64 >> (64 - 8))
+	if got8 != want8 {
+		t.Fatalf("Reverse8(%#02x) == %#02x; want %#02x", x8, got8, want8)
+	}
+
+	x16 := uint16(x64)
+	got16 := Reverse16(x16)
+	want16 := uint16(want64 >> (64 - 16))
+	if got16 != want16 {
+		t.Fatalf("Reverse16(%#04x) == %#04x; want %#04x", x16, got16, want16)
+	}
+
+	x32 := uint32(x64)
+	got32 := Reverse32(x32)
+	want32 := uint32(want64 >> (64 - 32))
+	if got32 != want32 {
+		t.Fatalf("Reverse32(%#08x) == %#08x; want %#08x", x32, got32, want32)
+	}
+	if UintSize == 32 {
+		x := uint(x32)
+		got := Reverse(x)
+		want := uint(want32)
+		if got != want {
+			t.Fatalf("Reverse(%#08x) == %#08x; want %#08x", x, got, want)
+		}
+	}
+
+	got64 := Reverse64(x64)
+	if got64 != want64 {
+		t.Fatalf("Reverse64(%#016x) == %#016x; want %#016x", x64, got64, want64)
+	}
+	if UintSize == 64 {
+		x := uint(x64)
+		got := Reverse(x)
+		want := uint(want64)
+		if got != want {
+			t.Fatalf("Reverse(%#08x) == %#016x; want %#016x", x, got, want)
+		}
+	}
+}
+
+func BenchmarkReverse(b *testing.B) {
+	var s uint
+	for i := 0; i < b.N; i++ {
+		s += Reverse(uint(i))
+	}
+	Output = int(s)
+}
+
+func BenchmarkReverse8(b *testing.B) {
+	var s uint8
+	for i := 0; i < b.N; i++ {
+		s += Reverse8(uint8(i))
+	}
+	Output = int(s)
+}
+
+func BenchmarkReverse16(b *testing.B) {
+	var s uint16
+	for i := 0; i < b.N; i++ {
+		s += Reverse16(uint16(i))
+	}
+	Output = int(s)
+}
+
+func BenchmarkReverse32(b *testing.B) {
+	var s uint32
+	for i := 0; i < b.N; i++ {
+		s += Reverse32(uint32(i))
+	}
+	Output = int(s)
+}
+
+func BenchmarkReverse64(b *testing.B) {
+	var s uint64
+	for i := 0; i < b.N; i++ {
+		s += Reverse64(uint64(i))
+	}
+	Output = int(s)
+}
+
+func TestReverseBytes(t *testing.T) {
+	for _, test := range []struct {
+		x, r uint64
+	}{
+		{0, 0},
+		{0x01, 0x01 << 56},
+		{0x0123, 0x2301 << 48},
+		{0x012345, 0x452301 << 40},
+		{0x01234567, 0x67452301 << 32},
+		{0x0123456789, 0x8967452301 << 24},
+		{0x0123456789ab, 0xab8967452301 << 16},
+		{0x0123456789abcd, 0xcdab8967452301 << 8},
+		{0x0123456789abcdef, 0xefcdab8967452301 << 0},
+	} {
+		testReverseBytes(t, test.x, test.r)
+		testReverseBytes(t, test.r, test.x)
+	}
+}
+
+func testReverseBytes(t *testing.T, x64, want64 uint64) {
+	x16 := uint16(x64)
+	got16 := ReverseBytes16(x16)
+	want16 := uint16(want64 >> (64 - 16))
+	if got16 != want16 {
+		t.Fatalf("ReverseBytes16(%#04x) == %#04x; want %#04x", x16, got16, want16)
+	}
+
+	x32 := uint32(x64)
+	got32 := ReverseBytes32(x32)
+	want32 := uint32(want64 >> (64 - 32))
+	if got32 != want32 {
+		t.Fatalf("ReverseBytes32(%#08x) == %#08x; want %#08x", x32, got32, want32)
+	}
+	if UintSize == 32 {
+		x := uint(x32)
+		got := ReverseBytes(x)
+		want := uint(want32)
+		if got != want {
+			t.Fatalf("ReverseBytes(%#08x) == %#08x; want %#08x", x, got, want)
+		}
+	}
+
+	got64 := ReverseBytes64(x64)
+	if got64 != want64 {
+		t.Fatalf("ReverseBytes64(%#016x) == %#016x; want %#016x", x64, got64, want64)
+	}
+	if UintSize == 64 {
+		x := uint(x64)
+		got := ReverseBytes(x)
+		want := uint(want64)
+		if got != want {
+			t.Fatalf("ReverseBytes(%#016x) == %#016x; want %#016x", x, got, want)
+		}
+	}
+}
+
+func BenchmarkReverseBytes(b *testing.B) {
+	var s uint
+	for i := 0; i < b.N; i++ {
+		s += ReverseBytes(uint(i))
+	}
+	Output = int(s)
+}
+
+func BenchmarkReverseBytes16(b *testing.B) {
+	var s uint16
+	for i := 0; i < b.N; i++ {
+		s += ReverseBytes16(uint16(i))
+	}
+	Output = int(s)
+}
+
+func BenchmarkReverseBytes32(b *testing.B) {
+	var s uint32
+	for i := 0; i < b.N; i++ {
+		s += ReverseBytes32(uint32(i))
+	}
+	Output = int(s)
+}
+
+func BenchmarkReverseBytes64(b *testing.B) {
+	var s uint64
+	for i := 0; i < b.N; i++ {
+		s += ReverseBytes64(uint64(i))
+	}
+	Output = int(s)
+}
+
+func TestLen(t *testing.T) {
+	for i := 0; i < 256; i++ {
+		len := 8 - tab[i].nlz
+		for k := 0; k < 64-8; k++ {
+			x := uint64(i) << uint(k)
+			want := 0
+			if x != 0 {
+				want = len + k
+			}
+			if x <= 1<<8-1 {
+				got := Len8(uint8(x))
+				if got != want {
+					t.Fatalf("Len8(%#02x) == %d; want %d", x, got, want)
+				}
+			}
+
+			if x <= 1<<16-1 {
+				got := Len16(uint16(x))
+				if got != want {
+					t.Fatalf("Len16(%#04x) == %d; want %d", x, got, want)
+				}
+			}
+
+			if x <= 1<<32-1 {
+				got := Len32(uint32(x))
+				if got != want {
+					t.Fatalf("Len32(%#08x) == %d; want %d", x, got, want)
+				}
+				if UintSize == 32 {
+					got := Len(uint(x))
+					if got != want {
+						t.Fatalf("Len(%#08x) == %d; want %d", x, got, want)
+					}
+				}
+			}
+
+			if x <= 1<<64-1 {
+				got := Len64(uint64(x))
+				if got != want {
+					t.Fatalf("Len64(%#016x) == %d; want %d", x, got, want)
+				}
+				if UintSize == 64 {
+					got := Len(uint(x))
+					if got != want {
+						t.Fatalf("Len(%#016x) == %d; want %d", x, got, want)
+					}
+				}
+			}
+		}
+	}
+}
+
+const (
+	_M   = 1<<UintSize - 1
+	_M32 = 1<<32 - 1
+	_M64 = 1<<64 - 1
+)
+
+func TestAddSubUint(t *testing.T) {
+	test := func(msg string, f func(x, y, c uint) (z, cout uint), x, y, c, z, cout uint) {
+		z1, cout1 := f(x, y, c)
+		if z1 != z || cout1 != cout {
+			t.Errorf("%s: got z:cout = %#x:%#x; want %#x:%#x", msg, z1, cout1, z, cout)
+		}
+	}
+	for _, a := range []struct{ x, y, c, z, cout uint }{
+		{0, 0, 0, 0, 0},
+		{0, 1, 0, 1, 0},
+		{0, 0, 1, 1, 0},
+		{0, 1, 1, 2, 0},
+		{12345, 67890, 0, 80235, 0},
+		{12345, 67890, 1, 80236, 0},
+		{_M, 1, 0, 0, 1},
+		{_M, 0, 1, 0, 1},
+		{_M, 1, 1, 1, 1},
+		{_M, _M, 0, _M - 1, 1},
+		{_M, _M, 1, _M, 1},
+	} {
+		test("Add", Add, a.x, a.y, a.c, a.z, a.cout)
+		test("Add symmetric", Add, a.y, a.x, a.c, a.z, a.cout)
+		test("Sub", Sub, a.z, a.x, a.c, a.y, a.cout)
+		test("Sub symmetric", Sub, a.z, a.y, a.c, a.x, a.cout)
+		// The above code can't test intrinsic implementation, because the passed function is not called directly.
+		// The following code uses a closure to test the intrinsic version in case the function is intrinsified.
+		test("Add intrinsic", func(x, y, c uint) (uint, uint) { return Add(x, y, c) }, a.x, a.y, a.c, a.z, a.cout)
+		test("Add intrinsic symmetric", func(x, y, c uint) (uint, uint) { return Add(x, y, c) }, a.y, a.x, a.c, a.z, a.cout)
+		test("Sub intrinsic", func(x, y, c uint) (uint, uint) { return Sub(x, y, c) }, a.z, a.x, a.c, a.y, a.cout)
+		test("Sub intrinsic symmetric", func(x, y, c uint) (uint, uint) { return Sub(x, y, c) }, a.z, a.y, a.c, a.x, a.cout)
+
+	}
+}
+
+func TestAddSubUint32(t *testing.T) {
+	test := func(msg string, f func(x, y, c uint32) (z, cout uint32), x, y, c, z, cout uint32) {
+		z1, cout1 := f(x, y, c)
+		if z1 != z || cout1 != cout {
+			t.Errorf("%s: got z:cout = %#x:%#x; want %#x:%#x", msg, z1, cout1, z, cout)
+		}
+	}
+	for _, a := range []struct{ x, y, c, z, cout uint32 }{
+		{0, 0, 0, 0, 0},
+		{0, 1, 0, 1, 0},
+		{0, 0, 1, 1, 0},
+		{0, 1, 1, 2, 0},
+		{12345, 67890, 0, 80235, 0},
+		{12345, 67890, 1, 80236, 0},
+		{_M32, 1, 0, 0, 1},
+		{_M32, 0, 1, 0, 1},
+		{_M32, 1, 1, 1, 1},
+		{_M32, _M32, 0, _M32 - 1, 1},
+		{_M32, _M32, 1, _M32, 1},
+	} {
+		test("Add32", Add32, a.x, a.y, a.c, a.z, a.cout)
+		test("Add32 symmetric", Add32, a.y, a.x, a.c, a.z, a.cout)
+		test("Sub32", Sub32, a.z, a.x, a.c, a.y, a.cout)
+		test("Sub32 symmetric", Sub32, a.z, a.y, a.c, a.x, a.cout)
+	}
+}
+
+func TestAddSubUint64(t *testing.T) {
+	test := func(msg string, f func(x, y, c uint64) (z, cout uint64), x, y, c, z, cout uint64) {
+		z1, cout1 := f(x, y, c)
+		if z1 != z || cout1 != cout {
+			t.Errorf("%s: got z:cout = %#x:%#x; want %#x:%#x", msg, z1, cout1, z, cout)
+		}
+	}
+	for _, a := range []struct{ x, y, c, z, cout uint64 }{
+		{0, 0, 0, 0, 0},
+		{0, 1, 0, 1, 0},
+		{0, 0, 1, 1, 0},
+		{0, 1, 1, 2, 0},
+		{12345, 67890, 0, 80235, 0},
+		{12345, 67890, 1, 80236, 0},
+		{_M64, 1, 0, 0, 1},
+		{_M64, 0, 1, 0, 1},
+		{_M64, 1, 1, 1, 1},
+		{_M64, _M64, 0, _M64 - 1, 1},
+		{_M64, _M64, 1, _M64, 1},
+	} {
+		test("Add64", Add64, a.x, a.y, a.c, a.z, a.cout)
+		test("Add64 symmetric", Add64, a.y, a.x, a.c, a.z, a.cout)
+		test("Sub64", Sub64, a.z, a.x, a.c, a.y, a.cout)
+		test("Sub64 symmetric", Sub64, a.z, a.y, a.c, a.x, a.cout)
+		// The above code can't test intrinsic implementation, because the passed function is not called directly.
+		// The following code uses a closure to test the intrinsic version in case the function is intrinsified.
+		test("Add64 intrinsic", func(x, y, c uint64) (uint64, uint64) { return Add64(x, y, c) }, a.x, a.y, a.c, a.z, a.cout)
+		test("Add64 intrinsic symmetric", func(x, y, c uint64) (uint64, uint64) { return Add64(x, y, c) }, a.y, a.x, a.c, a.z, a.cout)
+		test("Sub64 intrinsic", func(x, y, c uint64) (uint64, uint64) { return Sub64(x, y, c) }, a.z, a.x, a.c, a.y, a.cout)
+		test("Sub64 intrinsic symmetric", func(x, y, c uint64) (uint64, uint64) { return Sub64(x, y, c) }, a.z, a.y, a.c, a.x, a.cout)
+	}
+}
+
+func TestAdd64OverflowPanic(t *testing.T) {
+	// Test that 64-bit overflow panics fire correctly.
+	// These are designed to improve coverage of compiler intrinsics.
+	tests := []func(uint64, uint64) uint64{
+		func(a, b uint64) uint64 {
+			x, c := Add64(a, b, 0)
+			if c > 0 {
+				panic("overflow")
+			}
+			return x
+		},
+		func(a, b uint64) uint64 {
+			x, c := Add64(a, b, 0)
+			if c != 0 {
+				panic("overflow")
+			}
+			return x
+		},
+		func(a, b uint64) uint64 {
+			x, c := Add64(a, b, 0)
+			if c == 1 {
+				panic("overflow")
+			}
+			return x
+		},
+		func(a, b uint64) uint64 {
+			x, c := Add64(a, b, 0)
+			if c != 1 {
+				return x
+			}
+			panic("overflow")
+		},
+		func(a, b uint64) uint64 {
+			x, c := Add64(a, b, 0)
+			if c == 0 {
+				return x
+			}
+			panic("overflow")
+		},
+	}
+	for _, test := range tests {
+		shouldPanic := func(f func()) {
+			defer func() {
+				if err := recover(); err == nil {
+					t.Fatalf("expected panic")
+				}
+			}()
+			f()
+		}
+
+		// overflow
+		shouldPanic(func() { test(_M64, 1) })
+		shouldPanic(func() { test(1, _M64) })
+		shouldPanic(func() { test(_M64, _M64) })
+
+		// no overflow
+		test(_M64, 0)
+		test(0, 0)
+		test(1, 1)
+	}
+}
+
+func TestSub64OverflowPanic(t *testing.T) {
+	// Test that 64-bit overflow panics fire correctly.
+	// These are designed to improve coverage of compiler intrinsics.
+	tests := []func(uint64, uint64) uint64{
+		func(a, b uint64) uint64 {
+			x, c := Sub64(a, b, 0)
+			if c > 0 {
+				panic("overflow")
+			}
+			return x
+		},
+		func(a, b uint64) uint64 {
+			x, c := Sub64(a, b, 0)
+			if c != 0 {
+				panic("overflow")
+			}
+			return x
+		},
+		func(a, b uint64) uint64 {
+			x, c := Sub64(a, b, 0)
+			if c == 1 {
+				panic("overflow")
+			}
+			return x
+		},
+		func(a, b uint64) uint64 {
+			x, c := Sub64(a, b, 0)
+			if c != 1 {
+				return x
+			}
+			panic("overflow")
+		},
+		func(a, b uint64) uint64 {
+			x, c := Sub64(a, b, 0)
+			if c == 0 {
+				return x
+			}
+			panic("overflow")
+		},
+	}
+	for _, test := range tests {
+		shouldPanic := func(f func()) {
+			defer func() {
+				if err := recover(); err == nil {
+					t.Fatalf("expected panic")
+				}
+			}()
+			f()
+		}
+
+		// overflow
+		shouldPanic(func() { test(0, 1) })
+		shouldPanic(func() { test(1, _M64) })
+		shouldPanic(func() { test(_M64-1, _M64) })
+
+		// no overflow
+		test(_M64, 0)
+		test(0, 0)
+		test(1, 1)
+	}
+}
+
+func TestMulDiv(t *testing.T) {
+	testMul := func(msg string, f func(x, y uint) (hi, lo uint), x, y, hi, lo uint) {
+		hi1, lo1 := f(x, y)
+		if hi1 != hi || lo1 != lo {
+			t.Errorf("%s: got hi:lo = %#x:%#x; want %#x:%#x", msg, hi1, lo1, hi, lo)
+		}
+	}
+	testDiv := func(msg string, f func(hi, lo, y uint) (q, r uint), hi, lo, y, q, r uint) {
+		q1, r1 := f(hi, lo, y)
+		if q1 != q || r1 != r {
+			t.Errorf("%s: got q:r = %#x:%#x; want %#x:%#x", msg, q1, r1, q, r)
+		}
+	}
+	for _, a := range []struct {
+		x, y      uint
+		hi, lo, r uint
+	}{
+		{1 << (UintSize - 1), 2, 1, 0, 1},
+		{_M, _M, _M - 1, 1, 42},
+	} {
+		testMul("Mul", Mul, a.x, a.y, a.hi, a.lo)
+		testMul("Mul symmetric", Mul, a.y, a.x, a.hi, a.lo)
+		testDiv("Div", Div, a.hi, a.lo+a.r, a.y, a.x, a.r)
+		testDiv("Div symmetric", Div, a.hi, a.lo+a.r, a.x, a.y, a.r)
+		// The above code can't test intrinsic implementation, because the passed function is not called directly.
+		// The following code uses a closure to test the intrinsic version in case the function is intrinsified.
+		testMul("Mul intrinsic", func(x, y uint) (uint, uint) { return Mul(x, y) }, a.x, a.y, a.hi, a.lo)
+		testMul("Mul intrinsic symmetric", func(x, y uint) (uint, uint) { return Mul(x, y) }, a.y, a.x, a.hi, a.lo)
+		testDiv("Div intrinsic", func(hi, lo, y uint) (uint, uint) { return Div(hi, lo, y) }, a.hi, a.lo+a.r, a.y, a.x, a.r)
+		testDiv("Div intrinsic symmetric", func(hi, lo, y uint) (uint, uint) { return Div(hi, lo, y) }, a.hi, a.lo+a.r, a.x, a.y, a.r)
+	}
+}
+
+func TestMulDiv32(t *testing.T) {
+	testMul := func(msg string, f func(x, y uint32) (hi, lo uint32), x, y, hi, lo uint32) {
+		hi1, lo1 := f(x, y)
+		if hi1 != hi || lo1 != lo {
+			t.Errorf("%s: got hi:lo = %#x:%#x; want %#x:%#x", msg, hi1, lo1, hi, lo)
+		}
+	}
+	testDiv := func(msg string, f func(hi, lo, y uint32) (q, r uint32), hi, lo, y, q, r uint32) {
+		q1, r1 := f(hi, lo, y)
+		if q1 != q || r1 != r {
+			t.Errorf("%s: got q:r = %#x:%#x; want %#x:%#x", msg, q1, r1, q, r)
+		}
+	}
+	for _, a := range []struct {
+		x, y      uint32
+		hi, lo, r uint32
+	}{
+		{1 << 31, 2, 1, 0, 1},
+		{0xc47dfa8c, 50911, 0x98a4, 0x998587f4, 13},
+		{_M32, _M32, _M32 - 1, 1, 42},
+	} {
+		testMul("Mul32", Mul32, a.x, a.y, a.hi, a.lo)
+		testMul("Mul32 symmetric", Mul32, a.y, a.x, a.hi, a.lo)
+		testDiv("Div32", Div32, a.hi, a.lo+a.r, a.y, a.x, a.r)
+		testDiv("Div32 symmetric", Div32, a.hi, a.lo+a.r, a.x, a.y, a.r)
+	}
+}
+
+func TestMulDiv64(t *testing.T) {
+	testMul := func(msg string, f func(x, y uint64) (hi, lo uint64), x, y, hi, lo uint64) {
+		hi1, lo1 := f(x, y)
+		if hi1 != hi || lo1 != lo {
+			t.Errorf("%s: got hi:lo = %#x:%#x; want %#x:%#x", msg, hi1, lo1, hi, lo)
+		}
+	}
+	testDiv := func(msg string, f func(hi, lo, y uint64) (q, r uint64), hi, lo, y, q, r uint64) {
+		q1, r1 := f(hi, lo, y)
+		if q1 != q || r1 != r {
+			t.Errorf("%s: got q:r = %#x:%#x; want %#x:%#x", msg, q1, r1, q, r)
+		}
+	}
+	for _, a := range []struct {
+		x, y      uint64
+		hi, lo, r uint64
+	}{
+		{1 << 63, 2, 1, 0, 1},
+		{0x3626229738a3b9, 0xd8988a9f1cc4a61, 0x2dd0712657fe8, 0x9dd6a3364c358319, 13},
+		{_M64, _M64, _M64 - 1, 1, 42},
+	} {
+		testMul("Mul64", Mul64, a.x, a.y, a.hi, a.lo)
+		testMul("Mul64 symmetric", Mul64, a.y, a.x, a.hi, a.lo)
+		testDiv("Div64", Div64, a.hi, a.lo+a.r, a.y, a.x, a.r)
+		testDiv("Div64 symmetric", Div64, a.hi, a.lo+a.r, a.x, a.y, a.r)
+		// The above code can't test intrinsic implementation, because the passed function is not called directly.
+		// The following code uses a closure to test the intrinsic version in case the function is intrinsified.
+		testMul("Mul64 intrinsic", func(x, y uint64) (uint64, uint64) { return Mul64(x, y) }, a.x, a.y, a.hi, a.lo)
+		testMul("Mul64 intrinsic symmetric", func(x, y uint64) (uint64, uint64) { return Mul64(x, y) }, a.y, a.x, a.hi, a.lo)
+		testDiv("Div64 intrinsic", func(hi, lo, y uint64) (uint64, uint64) { return Div64(hi, lo, y) }, a.hi, a.lo+a.r, a.y, a.x, a.r)
+		testDiv("Div64 intrinsic symmetric", func(hi, lo, y uint64) (uint64, uint64) { return Div64(hi, lo, y) }, a.hi, a.lo+a.r, a.x, a.y, a.r)
+	}
+}
+
+const (
+	divZeroError  = "runtime error: integer divide by zero"
+	overflowError = "runtime error: integer overflow"
+)
+
+func TestDivPanicOverflow(t *testing.T) {
+	// Expect a panic
+	defer func() {
+		if err := recover(); err == nil {
+			t.Error("Div should have panicked when y<=hi")
+		} else if e, ok := err.(runtime.Error); !ok || e.Error() != overflowError {
+			t.Errorf("Div expected panic: %q, got: %q ", overflowError, e.Error())
+		}
+	}()
+	q, r := Div(1, 0, 1)
+	t.Errorf("undefined q, r = %v, %v calculated when Div should have panicked", q, r)
+}
+
+func TestDiv32PanicOverflow(t *testing.T) {
+	// Expect a panic
+	defer func() {
+		if err := recover(); err == nil {
+			t.Error("Div32 should have panicked when y<=hi")
+		} else if e, ok := err.(runtime.Error); !ok || e.Error() != overflowError {
+			t.Errorf("Div32 expected panic: %q, got: %q ", overflowError, e.Error())
+		}
+	}()
+	q, r := Div32(1, 0, 1)
+	t.Errorf("undefined q, r = %v, %v calculated when Div32 should have panicked", q, r)
+}
+
+func TestDiv64PanicOverflow(t *testing.T) {
+	// Expect a panic
+	defer func() {
+		if err := recover(); err == nil {
+			t.Error("Div64 should have panicked when y<=hi")
+		} else if e, ok := err.(runtime.Error); !ok || e.Error() != overflowError {
+			t.Errorf("Div64 expected panic: %q, got: %q ", overflowError, e.Error())
+		}
+	}()
+	q, r := Div64(1, 0, 1)
+	t.Errorf("undefined q, r = %v, %v calculated when Div64 should have panicked", q, r)
+}
+
+func TestDivPanicZero(t *testing.T) {
+	// Expect a panic
+	defer func() {
+		if err := recover(); err == nil {
+			t.Error("Div should have panicked when y==0")
+		} else if e, ok := err.(runtime.Error); !ok || e.Error() != divZeroError {
+			t.Errorf("Div expected panic: %q, got: %q ", divZeroError, e.Error())
+		}
+	}()
+	q, r := Div(1, 1, 0)
+	t.Errorf("undefined q, r = %v, %v calculated when Div should have panicked", q, r)
+}
+
+func TestDiv32PanicZero(t *testing.T) {
+	// Expect a panic
+	defer func() {
+		if err := recover(); err == nil {
+			t.Error("Div32 should have panicked when y==0")
+		} else if e, ok := err.(runtime.Error); !ok || e.Error() != divZeroError {
+			t.Errorf("Div32 expected panic: %q, got: %q ", divZeroError, e.Error())
+		}
+	}()
+	q, r := Div32(1, 1, 0)
+	t.Errorf("undefined q, r = %v, %v calculated when Div32 should have panicked", q, r)
+}
+
+func TestDiv64PanicZero(t *testing.T) {
+	// Expect a panic
+	defer func() {
+		if err := recover(); err == nil {
+			t.Error("Div64 should have panicked when y==0")
+		} else if e, ok := err.(runtime.Error); !ok || e.Error() != divZeroError {
+			t.Errorf("Div64 expected panic: %q, got: %q ", divZeroError, e.Error())
+		}
+	}()
+	q, r := Div64(1, 1, 0)
+	t.Errorf("undefined q, r = %v, %v calculated when Div64 should have panicked", q, r)
+}
+
+func TestRem32(t *testing.T) {
+	// Sanity check: for non-oveflowing dividends, the result is the
+	// same as the rem returned by Div32
+	hi, lo, y := uint32(510510), uint32(9699690), uint32(510510+1) // ensure hi < y
+	for i := 0; i < 1000; i++ {
+		r := Rem32(hi, lo, y)
+		_, r2 := Div32(hi, lo, y)
+		if r != r2 {
+			t.Errorf("Rem32(%v, %v, %v) returned %v, but Div32 returned rem %v", hi, lo, y, r, r2)
+		}
+		y += 13
+	}
+}
+
+func TestRem32Overflow(t *testing.T) {
+	// To trigger a quotient overflow, we need y <= hi
+	hi, lo, y := uint32(510510), uint32(9699690), uint32(7)
+	for i := 0; i < 1000; i++ {
+		r := Rem32(hi, lo, y)
+		_, r2 := Div64(0, uint64(hi)<<32|uint64(lo), uint64(y))
+		if r != uint32(r2) {
+			t.Errorf("Rem32(%v, %v, %v) returned %v, but Div64 returned rem %v", hi, lo, y, r, r2)
+		}
+		y += 13
+	}
+}
+
+func TestRem64(t *testing.T) {
+	// Sanity check: for non-oveflowing dividends, the result is the
+	// same as the rem returned by Div64
+	hi, lo, y := uint64(510510), uint64(9699690), uint64(510510+1) // ensure hi < y
+	for i := 0; i < 1000; i++ {
+		r := Rem64(hi, lo, y)
+		_, r2 := Div64(hi, lo, y)
+		if r != r2 {
+			t.Errorf("Rem64(%v, %v, %v) returned %v, but Div64 returned rem %v", hi, lo, y, r, r2)
+		}
+		y += 13
+	}
+}
+
+func TestRem64Overflow(t *testing.T) {
+	Rem64Tests := []struct {
+		hi, lo, y uint64
+		rem       uint64
+	}{
+		// Testcases computed using Python 3, as:
+		//   >>> hi = 42; lo = 1119; y = 42
+		//   >>> ((hi<<64)+lo) % y
+		{42, 1119, 42, 27},
+		{42, 1119, 38, 9},
+		{42, 1119, 26, 23},
+		{469, 0, 467, 271},
+		{469, 0, 113, 58},
+		{111111, 111111, 1171, 803},
+		{3968194946088682615, 3192705705065114702, 1000037, 56067},
+	}
+
+	for _, rt := range Rem64Tests {
+		if rt.hi < rt.y {
+			t.Fatalf("Rem64(%v, %v, %v) is not a test with quo overflow", rt.hi, rt.lo, rt.y)
+		}
+		rem := Rem64(rt.hi, rt.lo, rt.y)
+		if rem != rt.rem {
+			t.Errorf("Rem64(%v, %v, %v) returned %v, wanted %v",
+				rt.hi, rt.lo, rt.y, rem, rt.rem)
+		}
+	}
+}
+
+func BenchmarkAdd(b *testing.B) {
+	var z, c uint
+	for i := 0; i < b.N; i++ {
+		z, c = Add(uint(Input), uint(i), c)
+	}
+	Output = int(z + c)
+}
+
+func BenchmarkAdd32(b *testing.B) {
+	var z, c uint32
+	for i := 0; i < b.N; i++ {
+		z, c = Add32(uint32(Input), uint32(i), c)
+	}
+	Output = int(z + c)
+}
+
+func BenchmarkAdd64(b *testing.B) {
+	var z, c uint64
+	for i := 0; i < b.N; i++ {
+		z, c = Add64(uint64(Input), uint64(i), c)
+	}
+	Output = int(z + c)
+}
+
+func BenchmarkAdd64multiple(b *testing.B) {
+	var z0 = uint64(Input)
+	var z1 = uint64(Input)
+	var z2 = uint64(Input)
+	var z3 = uint64(Input)
+	for i := 0; i < b.N; i++ {
+		var c uint64
+		z0, c = Add64(z0, uint64(i), c)
+		z1, c = Add64(z1, uint64(i), c)
+		z2, c = Add64(z2, uint64(i), c)
+		z3, _ = Add64(z3, uint64(i), c)
+	}
+	Output = int(z0 + z1 + z2 + z3)
+}
+
+func BenchmarkSub(b *testing.B) {
+	var z, c uint
+	for i := 0; i < b.N; i++ {
+		z, c = Sub(uint(Input), uint(i), c)
+	}
+	Output = int(z + c)
+}
+
+func BenchmarkSub32(b *testing.B) {
+	var z, c uint32
+	for i := 0; i < b.N; i++ {
+		z, c = Sub32(uint32(Input), uint32(i), c)
+	}
+	Output = int(z + c)
+}
+
+func BenchmarkSub64(b *testing.B) {
+	var z, c uint64
+	for i := 0; i < b.N; i++ {
+		z, c = Sub64(uint64(Input), uint64(i), c)
+	}
+	Output = int(z + c)
+}
+
+func BenchmarkSub64multiple(b *testing.B) {
+	var z0 = uint64(Input)
+	var z1 = uint64(Input)
+	var z2 = uint64(Input)
+	var z3 = uint64(Input)
+	for i := 0; i < b.N; i++ {
+		var c uint64
+		z0, c = Sub64(z0, uint64(i), c)
+		z1, c = Sub64(z1, uint64(i), c)
+		z2, c = Sub64(z2, uint64(i), c)
+		z3, _ = Sub64(z3, uint64(i), c)
+	}
+	Output = int(z0 + z1 + z2 + z3)
+}
+
+func BenchmarkMul(b *testing.B) {
+	var hi, lo uint
+	for i := 0; i < b.N; i++ {
+		hi, lo = Mul(uint(Input), uint(i))
+	}
+	Output = int(hi + lo)
+}
+
+func BenchmarkMul32(b *testing.B) {
+	var hi, lo uint32
+	for i := 0; i < b.N; i++ {
+		hi, lo = Mul32(uint32(Input), uint32(i))
+	}
+	Output = int(hi + lo)
+}
+
+func BenchmarkMul64(b *testing.B) {
+	var hi, lo uint64
+	for i := 0; i < b.N; i++ {
+		hi, lo = Mul64(uint64(Input), uint64(i))
+	}
+	Output = int(hi + lo)
+}
+
+func BenchmarkDiv(b *testing.B) {
+	var q, r uint
+	for i := 0; i < b.N; i++ {
+		q, r = Div(1, uint(i), uint(Input))
+	}
+	Output = int(q + r)
+}
+
+func BenchmarkDiv32(b *testing.B) {
+	var q, r uint32
+	for i := 0; i < b.N; i++ {
+		q, r = Div32(1, uint32(i), uint32(Input))
+	}
+	Output = int(q + r)
+}
+
+func BenchmarkDiv64(b *testing.B) {
+	var q, r uint64
+	for i := 0; i < b.N; i++ {
+		q, r = Div64(1, uint64(i), uint64(Input))
+	}
+	Output = int(q + r)
+}
+
+// ----------------------------------------------------------------------------
+// Testing support
+
+type entry = struct {
+	nlz, ntz, pop int
+}
+
+// tab contains results for all uint8 values
+var tab [256]entry
+
+func init() {
+	tab[0] = entry{8, 8, 0}
+	for i := 1; i < len(tab); i++ {
+		// nlz
+		x := i // x != 0
+		n := 0
+		for x&0x80 == 0 {
+			n++
+			x <<= 1
+		}
+		tab[i].nlz = n
+
+		// ntz
+		x = i // x != 0
+		n = 0
+		for x&1 == 0 {
+			n++
+			x >>= 1
+		}
+		tab[i].ntz = n
+
+		// pop
+		x = i // x != 0
+		n = 0
+		for x != 0 {
+			n += int(x & 1)
+			x >>= 1
+		}
+		tab[i].pop = n
+	}
+}
diff --git a/src/math/bits/example_math_test.go b/src/math/bits/example_math_test.go
new file mode 100644
index 0000000..4bb466f
--- /dev/null
+++ b/src/math/bits/example_math_test.go
@@ -0,0 +1,202 @@
+// Copyright 2021 The Go Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+package bits_test
+
+import (
+	"fmt"
+	"math/bits"
+)
+
+func ExampleAdd32() {
+	// First number is 33<<32 + 12
+	n1 := []uint32{33, 12}
+	// Second number is 21<<32 + 23
+	n2 := []uint32{21, 23}
+	// Add them together without producing carry.
+	d1, carry := bits.Add32(n1[1], n2[1], 0)
+	d0, _ := bits.Add32(n1[0], n2[0], carry)
+	nsum := []uint32{d0, d1}
+	fmt.Printf("%v + %v = %v (carry bit was %v)\n", n1, n2, nsum, carry)
+
+	// First number is 1<<32 + 2147483648
+	n1 = []uint32{1, 0x80000000}
+	// Second number is 1<<32 + 2147483648
+	n2 = []uint32{1, 0x80000000}
+	// Add them together producing carry.
+	d1, carry = bits.Add32(n1[1], n2[1], 0)
+	d0, _ = bits.Add32(n1[0], n2[0], carry)
+	nsum = []uint32{d0, d1}
+	fmt.Printf("%v + %v = %v (carry bit was %v)\n", n1, n2, nsum, carry)
+	// Output:
+	// [33 12] + [21 23] = [54 35] (carry bit was 0)
+	// [1 2147483648] + [1 2147483648] = [3 0] (carry bit was 1)
+}
+
+func ExampleAdd64() {
+	// First number is 33<<64 + 12
+	n1 := []uint64{33, 12}
+	// Second number is 21<<64 + 23
+	n2 := []uint64{21, 23}
+	// Add them together without producing carry.
+	d1, carry := bits.Add64(n1[1], n2[1], 0)
+	d0, _ := bits.Add64(n1[0], n2[0], carry)
+	nsum := []uint64{d0, d1}
+	fmt.Printf("%v + %v = %v (carry bit was %v)\n", n1, n2, nsum, carry)
+
+	// First number is 1<<64 + 9223372036854775808
+	n1 = []uint64{1, 0x8000000000000000}
+	// Second number is 1<<64 + 9223372036854775808
+	n2 = []uint64{1, 0x8000000000000000}
+	// Add them together producing carry.
+	d1, carry = bits.Add64(n1[1], n2[1], 0)
+	d0, _ = bits.Add64(n1[0], n2[0], carry)
+	nsum = []uint64{d0, d1}
+	fmt.Printf("%v + %v = %v (carry bit was %v)\n", n1, n2, nsum, carry)
+	// Output:
+	// [33 12] + [21 23] = [54 35] (carry bit was 0)
+	// [1 9223372036854775808] + [1 9223372036854775808] = [3 0] (carry bit was 1)
+}
+
+func ExampleSub32() {
+	// First number is 33<<32 + 23
+	n1 := []uint32{33, 23}
+	// Second number is 21<<32 + 12
+	n2 := []uint32{21, 12}
+	// Sub them together without producing carry.
+	d1, carry := bits.Sub32(n1[1], n2[1], 0)
+	d0, _ := bits.Sub32(n1[0], n2[0], carry)
+	nsum := []uint32{d0, d1}
+	fmt.Printf("%v - %v = %v (carry bit was %v)\n", n1, n2, nsum, carry)
+
+	// First number is 3<<32 + 2147483647
+	n1 = []uint32{3, 0x7fffffff}
+	// Second number is 1<<32 + 2147483648
+	n2 = []uint32{1, 0x80000000}
+	// Sub them together producing carry.
+	d1, carry = bits.Sub32(n1[1], n2[1], 0)
+	d0, _ = bits.Sub32(n1[0], n2[0], carry)
+	nsum = []uint32{d0, d1}
+	fmt.Printf("%v - %v = %v (carry bit was %v)\n", n1, n2, nsum, carry)
+	// Output:
+	// [33 23] - [21 12] = [12 11] (carry bit was 0)
+	// [3 2147483647] - [1 2147483648] = [1 4294967295] (carry bit was 1)
+}
+
+func ExampleSub64() {
+	// First number is 33<<64 + 23
+	n1 := []uint64{33, 23}
+	// Second number is 21<<64 + 12
+	n2 := []uint64{21, 12}
+	// Sub them together without producing carry.
+	d1, carry := bits.Sub64(n1[1], n2[1], 0)
+	d0, _ := bits.Sub64(n1[0], n2[0], carry)
+	nsum := []uint64{d0, d1}
+	fmt.Printf("%v - %v = %v (carry bit was %v)\n", n1, n2, nsum, carry)
+
+	// First number is 3<<64 + 9223372036854775807
+	n1 = []uint64{3, 0x7fffffffffffffff}
+	// Second number is 1<<64 + 9223372036854775808
+	n2 = []uint64{1, 0x8000000000000000}
+	// Sub them together producing carry.
+	d1, carry = bits.Sub64(n1[1], n2[1], 0)
+	d0, _ = bits.Sub64(n1[0], n2[0], carry)
+	nsum = []uint64{d0, d1}
+	fmt.Printf("%v - %v = %v (carry bit was %v)\n", n1, n2, nsum, carry)
+	// Output:
+	// [33 23] - [21 12] = [12 11] (carry bit was 0)
+	// [3 9223372036854775807] - [1 9223372036854775808] = [1 18446744073709551615] (carry bit was 1)
+}
+
+func ExampleMul32() {
+	// First number is 0<<32 + 12
+	n1 := []uint32{0, 12}
+	// Second number is 0<<32 + 12
+	n2 := []uint32{0, 12}
+	// Multiply them together without producing overflow.
+	hi, lo := bits.Mul32(n1[1], n2[1])
+	nsum := []uint32{hi, lo}
+	fmt.Printf("%v * %v = %v\n", n1[1], n2[1], nsum)
+
+	// First number is 0<<32 + 2147483648
+	n1 = []uint32{0, 0x80000000}
+	// Second number is 0<<32 + 2
+	n2 = []uint32{0, 2}
+	// Multiply them together producing overflow.
+	hi, lo = bits.Mul32(n1[1], n2[1])
+	nsum = []uint32{hi, lo}
+	fmt.Printf("%v * %v = %v\n", n1[1], n2[1], nsum)
+	// Output:
+	// 12 * 12 = [0 144]
+	// 2147483648 * 2 = [1 0]
+}
+
+func ExampleMul64() {
+	// First number is 0<<64 + 12
+	n1 := []uint64{0, 12}
+	// Second number is 0<<64 + 12
+	n2 := []uint64{0, 12}
+	// Multiply them together without producing overflow.
+	hi, lo := bits.Mul64(n1[1], n2[1])
+	nsum := []uint64{hi, lo}
+	fmt.Printf("%v * %v = %v\n", n1[1], n2[1], nsum)
+
+	// First number is 0<<64 + 9223372036854775808
+	n1 = []uint64{0, 0x8000000000000000}
+	// Second number is 0<<64 + 2
+	n2 = []uint64{0, 2}
+	// Multiply them together producing overflow.
+	hi, lo = bits.Mul64(n1[1], n2[1])
+	nsum = []uint64{hi, lo}
+	fmt.Printf("%v * %v = %v\n", n1[1], n2[1], nsum)
+	// Output:
+	// 12 * 12 = [0 144]
+	// 9223372036854775808 * 2 = [1 0]
+}
+
+func ExampleDiv32() {
+	// First number is 0<<32 + 6
+	n1 := []uint32{0, 6}
+	// Second number is 0<<32 + 3
+	n2 := []uint32{0, 3}
+	// Divide them together.
+	quo, rem := bits.Div32(n1[0], n1[1], n2[1])
+	nsum := []uint32{quo, rem}
+	fmt.Printf("[%v %v] / %v = %v\n", n1[0], n1[1], n2[1], nsum)
+
+	// First number is 2<<32 + 2147483648
+	n1 = []uint32{2, 0x80000000}
+	// Second number is 0<<32 + 2147483648
+	n2 = []uint32{0, 0x80000000}
+	// Divide them together.
+	quo, rem = bits.Div32(n1[0], n1[1], n2[1])
+	nsum = []uint32{quo, rem}
+	fmt.Printf("[%v %v] / %v = %v\n", n1[0], n1[1], n2[1], nsum)
+	// Output:
+	// [0 6] / 3 = [2 0]
+	// [2 2147483648] / 2147483648 = [5 0]
+}
+
+func ExampleDiv64() {
+	// First number is 0<<64 + 6
+	n1 := []uint64{0, 6}
+	// Second number is 0<<64 + 3
+	n2 := []uint64{0, 3}
+	// Divide them together.
+	quo, rem := bits.Div64(n1[0], n1[1], n2[1])
+	nsum := []uint64{quo, rem}
+	fmt.Printf("[%v %v] / %v = %v\n", n1[0], n1[1], n2[1], nsum)
+
+	// First number is 2<<64 + 9223372036854775808
+	n1 = []uint64{2, 0x8000000000000000}
+	// Second number is 0<<64 + 9223372036854775808
+	n2 = []uint64{0, 0x8000000000000000}
+	// Divide them together.
+	quo, rem = bits.Div64(n1[0], n1[1], n2[1])
+	nsum = []uint64{quo, rem}
+	fmt.Printf("[%v %v] / %v = %v\n", n1[0], n1[1], n2[1], nsum)
+	// Output:
+	// [0 6] / 3 = [2 0]
+	// [2 9223372036854775808] / 9223372036854775808 = [5 0]
+}
diff --git a/src/math/bits/example_test.go b/src/math/bits/example_test.go
new file mode 100644
index 0000000..b2ed2cb
--- /dev/null
+++ b/src/math/bits/example_test.go
@@ -0,0 +1,210 @@
+// Copyright 2017 The Go Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+// Code generated by go run make_examples.go. DO NOT EDIT.
+
+package bits_test
+
+import (
+	"fmt"
+	"math/bits"
+)
+
+func ExampleLeadingZeros8() {
+	fmt.Printf("LeadingZeros8(%08b) = %d\n", 1, bits.LeadingZeros8(1))
+	// Output:
+	// LeadingZeros8(00000001) = 7
+}
+
+func ExampleLeadingZeros16() {
+	fmt.Printf("LeadingZeros16(%016b) = %d\n", 1, bits.LeadingZeros16(1))
+	// Output:
+	// LeadingZeros16(0000000000000001) = 15
+}
+
+func ExampleLeadingZeros32() {
+	fmt.Printf("LeadingZeros32(%032b) = %d\n", 1, bits.LeadingZeros32(1))
+	// Output:
+	// LeadingZeros32(00000000000000000000000000000001) = 31
+}
+
+func ExampleLeadingZeros64() {
+	fmt.Printf("LeadingZeros64(%064b) = %d\n", 1, bits.LeadingZeros64(1))
+	// Output:
+	// LeadingZeros64(0000000000000000000000000000000000000000000000000000000000000001) = 63
+}
+
+func ExampleTrailingZeros8() {
+	fmt.Printf("TrailingZeros8(%08b) = %d\n", 14, bits.TrailingZeros8(14))
+	// Output:
+	// TrailingZeros8(00001110) = 1
+}
+
+func ExampleTrailingZeros16() {
+	fmt.Printf("TrailingZeros16(%016b) = %d\n", 14, bits.TrailingZeros16(14))
+	// Output:
+	// TrailingZeros16(0000000000001110) = 1
+}
+
+func ExampleTrailingZeros32() {
+	fmt.Printf("TrailingZeros32(%032b) = %d\n", 14, bits.TrailingZeros32(14))
+	// Output:
+	// TrailingZeros32(00000000000000000000000000001110) = 1
+}
+
+func ExampleTrailingZeros64() {
+	fmt.Printf("TrailingZeros64(%064b) = %d\n", 14, bits.TrailingZeros64(14))
+	// Output:
+	// TrailingZeros64(0000000000000000000000000000000000000000000000000000000000001110) = 1
+}
+
+func ExampleOnesCount() {
+	fmt.Printf("OnesCount(%b) = %d\n", 14, bits.OnesCount(14))
+	// Output:
+	// OnesCount(1110) = 3
+}
+
+func ExampleOnesCount8() {
+	fmt.Printf("OnesCount8(%08b) = %d\n", 14, bits.OnesCount8(14))
+	// Output:
+	// OnesCount8(00001110) = 3
+}
+
+func ExampleOnesCount16() {
+	fmt.Printf("OnesCount16(%016b) = %d\n", 14, bits.OnesCount16(14))
+	// Output:
+	// OnesCount16(0000000000001110) = 3
+}
+
+func ExampleOnesCount32() {
+	fmt.Printf("OnesCount32(%032b) = %d\n", 14, bits.OnesCount32(14))
+	// Output:
+	// OnesCount32(00000000000000000000000000001110) = 3
+}
+
+func ExampleOnesCount64() {
+	fmt.Printf("OnesCount64(%064b) = %d\n", 14, bits.OnesCount64(14))
+	// Output:
+	// OnesCount64(0000000000000000000000000000000000000000000000000000000000001110) = 3
+}
+
+func ExampleRotateLeft8() {
+	fmt.Printf("%08b\n", 15)
+	fmt.Printf("%08b\n", bits.RotateLeft8(15, 2))
+	fmt.Printf("%08b\n", bits.RotateLeft8(15, -2))
+	// Output:
+	// 00001111
+	// 00111100
+	// 11000011
+}
+
+func ExampleRotateLeft16() {
+	fmt.Printf("%016b\n", 15)
+	fmt.Printf("%016b\n", bits.RotateLeft16(15, 2))
+	fmt.Printf("%016b\n", bits.RotateLeft16(15, -2))
+	// Output:
+	// 0000000000001111
+	// 0000000000111100
+	// 1100000000000011
+}
+
+func ExampleRotateLeft32() {
+	fmt.Printf("%032b\n", 15)
+	fmt.Printf("%032b\n", bits.RotateLeft32(15, 2))
+	fmt.Printf("%032b\n", bits.RotateLeft32(15, -2))
+	// Output:
+	// 00000000000000000000000000001111
+	// 00000000000000000000000000111100
+	// 11000000000000000000000000000011
+}
+
+func ExampleRotateLeft64() {
+	fmt.Printf("%064b\n", 15)
+	fmt.Printf("%064b\n", bits.RotateLeft64(15, 2))
+	fmt.Printf("%064b\n", bits.RotateLeft64(15, -2))
+	// Output:
+	// 0000000000000000000000000000000000000000000000000000000000001111
+	// 0000000000000000000000000000000000000000000000000000000000111100
+	// 1100000000000000000000000000000000000000000000000000000000000011
+}
+
+func ExampleReverse8() {
+	fmt.Printf("%08b\n", 19)
+	fmt.Printf("%08b\n", bits.Reverse8(19))
+	// Output:
+	// 00010011
+	// 11001000
+}
+
+func ExampleReverse16() {
+	fmt.Printf("%016b\n", 19)
+	fmt.Printf("%016b\n", bits.Reverse16(19))
+	// Output:
+	// 0000000000010011
+	// 1100100000000000
+}
+
+func ExampleReverse32() {
+	fmt.Printf("%032b\n", 19)
+	fmt.Printf("%032b\n", bits.Reverse32(19))
+	// Output:
+	// 00000000000000000000000000010011
+	// 11001000000000000000000000000000
+}
+
+func ExampleReverse64() {
+	fmt.Printf("%064b\n", 19)
+	fmt.Printf("%064b\n", bits.Reverse64(19))
+	// Output:
+	// 0000000000000000000000000000000000000000000000000000000000010011
+	// 1100100000000000000000000000000000000000000000000000000000000000
+}
+
+func ExampleReverseBytes16() {
+	fmt.Printf("%016b\n", 15)
+	fmt.Printf("%016b\n", bits.ReverseBytes16(15))
+	// Output:
+	// 0000000000001111
+	// 0000111100000000
+}
+
+func ExampleReverseBytes32() {
+	fmt.Printf("%032b\n", 15)
+	fmt.Printf("%032b\n", bits.ReverseBytes32(15))
+	// Output:
+	// 00000000000000000000000000001111
+	// 00001111000000000000000000000000
+}
+
+func ExampleReverseBytes64() {
+	fmt.Printf("%064b\n", 15)
+	fmt.Printf("%064b\n", bits.ReverseBytes64(15))
+	// Output:
+	// 0000000000000000000000000000000000000000000000000000000000001111
+	// 0000111100000000000000000000000000000000000000000000000000000000
+}
+
+func ExampleLen8() {
+	fmt.Printf("Len8(%08b) = %d\n", 8, bits.Len8(8))
+	// Output:
+	// Len8(00001000) = 4
+}
+
+func ExampleLen16() {
+	fmt.Printf("Len16(%016b) = %d\n", 8, bits.Len16(8))
+	// Output:
+	// Len16(0000000000001000) = 4
+}
+
+func ExampleLen32() {
+	fmt.Printf("Len32(%032b) = %d\n", 8, bits.Len32(8))
+	// Output:
+	// Len32(00000000000000000000000000001000) = 4
+}
+
+func ExampleLen64() {
+	fmt.Printf("Len64(%064b) = %d\n", 8, bits.Len64(8))
+	// Output:
+	// Len64(0000000000000000000000000000000000000000000000000000000000001000) = 4
+}
diff --git a/src/math/bits/export_test.go b/src/math/bits/export_test.go
new file mode 100644
index 0000000..8c6f933
--- /dev/null
+++ b/src/math/bits/export_test.go
@@ -0,0 +1,7 @@
+// Copyright 2018 The Go Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+package bits
+
+const DeBruijn64 = deBruijn64
diff --git a/src/math/bits/make_examples.go b/src/math/bits/make_examples.go
new file mode 100644
index 0000000..92e9aab
--- /dev/null
+++ b/src/math/bits/make_examples.go
@@ -0,0 +1,113 @@
+// Copyright 2017 The Go Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+//go:build ignore
+// +build ignore
+
+// This program generates example_test.go.
+
+package main
+
+import (
+	"bytes"
+	"fmt"
+	"log"
+	"math/bits"
+	"os"
+)
+
+const header = `// Copyright 2017 The Go Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+// Code generated by go run make_examples.go. DO NOT EDIT.
+
+package bits_test
+
+import (
+	"fmt"
+	"math/bits"
+)
+`
+
+func main() {
+	w := bytes.NewBuffer([]byte(header))
+
+	for _, e := range []struct {
+		name string
+		in   int
+		out  [4]any
+		out2 [4]any
+	}{
+		{
+			name: "LeadingZeros",
+			in:   1,
+			out:  [4]any{bits.LeadingZeros8(1), bits.LeadingZeros16(1), bits.LeadingZeros32(1), bits.LeadingZeros64(1)},
+		},
+		{
+			name: "TrailingZeros",
+			in:   14,
+			out:  [4]any{bits.TrailingZeros8(14), bits.TrailingZeros16(14), bits.TrailingZeros32(14), bits.TrailingZeros64(14)},
+		},
+		{
+			name: "OnesCount",
+			in:   14,
+			out:  [4]any{bits.OnesCount8(14), bits.OnesCount16(14), bits.OnesCount32(14), bits.OnesCount64(14)},
+		},
+		{
+			name: "RotateLeft",
+			in:   15,
+			out:  [4]any{bits.RotateLeft8(15, 2), bits.RotateLeft16(15, 2), bits.RotateLeft32(15, 2), bits.RotateLeft64(15, 2)},
+			out2: [4]any{bits.RotateLeft8(15, -2), bits.RotateLeft16(15, -2), bits.RotateLeft32(15, -2), bits.RotateLeft64(15, -2)},
+		},
+		{
+			name: "Reverse",
+			in:   19,
+			out:  [4]any{bits.Reverse8(19), bits.Reverse16(19), bits.Reverse32(19), bits.Reverse64(19)},
+		},
+		{
+			name: "ReverseBytes",
+			in:   15,
+			out:  [4]any{nil, bits.ReverseBytes16(15), bits.ReverseBytes32(15), bits.ReverseBytes64(15)},
+		},
+		{
+			name: "Len",
+			in:   8,
+			out:  [4]any{bits.Len8(8), bits.Len16(8), bits.Len32(8), bits.Len64(8)},
+		},
+	} {
+		for i, size := range []int{8, 16, 32, 64} {
+			if e.out[i] == nil {
+				continue // function doesn't exist
+			}
+			f := fmt.Sprintf("%s%d", e.name, size)
+			fmt.Fprintf(w, "\nfunc Example%s() {\n", f)
+			switch e.name {
+			case "RotateLeft", "Reverse", "ReverseBytes":
+				fmt.Fprintf(w, "\tfmt.Printf(\"%%0%db\\n\", %d)\n", size, e.in)
+				if e.name == "RotateLeft" {
+					fmt.Fprintf(w, "\tfmt.Printf(\"%%0%db\\n\", bits.%s(%d, 2))\n", size, f, e.in)
+					fmt.Fprintf(w, "\tfmt.Printf(\"%%0%db\\n\", bits.%s(%d, -2))\n", size, f, e.in)
+				} else {
+					fmt.Fprintf(w, "\tfmt.Printf(\"%%0%db\\n\", bits.%s(%d))\n", size, f, e.in)
+				}
+				fmt.Fprintf(w, "\t// Output:\n")
+				fmt.Fprintf(w, "\t// %0*b\n", size, e.in)
+				fmt.Fprintf(w, "\t// %0*b\n", size, e.out[i])
+				if e.name == "RotateLeft" && e.out2[i] != nil {
+					fmt.Fprintf(w, "\t// %0*b\n", size, e.out2[i])
+				}
+			default:
+				fmt.Fprintf(w, "\tfmt.Printf(\"%s(%%0%db) = %%d\\n\", %d, bits.%s(%d))\n", f, size, e.in, f, e.in)
+				fmt.Fprintf(w, "\t// Output:\n")
+				fmt.Fprintf(w, "\t// %s(%0*b) = %d\n", f, size, e.in, e.out[i])
+			}
+			fmt.Fprintf(w, "}\n")
+		}
+	}
+
+	if err := os.WriteFile("example_test.go", w.Bytes(), 0666); err != nil {
+		log.Fatal(err)
+	}
+}
diff --git a/src/math/bits/make_tables.go b/src/math/bits/make_tables.go
new file mode 100644
index 0000000..867025e
--- /dev/null
+++ b/src/math/bits/make_tables.go
@@ -0,0 +1,92 @@
+// Copyright 2017 The Go Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+//go:build ignore
+// +build ignore
+
+// This program generates bits_tables.go.
+
+package main
+
+import (
+	"bytes"
+	"fmt"
+	"go/format"
+	"io"
+	"log"
+	"os"
+)
+
+var header = []byte(`// Copyright 2017 The Go Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+// Code generated by go run make_tables.go. DO NOT EDIT.
+
+package bits
+
+`)
+
+func main() {
+	buf := bytes.NewBuffer(header)
+
+	gen(buf, "ntz8tab", ntz8)
+	gen(buf, "pop8tab", pop8)
+	gen(buf, "rev8tab", rev8)
+	gen(buf, "len8tab", len8)
+
+	out, err := format.Source(buf.Bytes())
+	if err != nil {
+		log.Fatal(err)
+	}
+
+	err = os.WriteFile("bits_tables.go", out, 0666)
+	if err != nil {
+		log.Fatal(err)
+	}
+}
+
+func gen(w io.Writer, name string, f func(uint8) uint8) {
+	// Use a const string to allow the compiler to constant-evaluate lookups at constant index.
+	fmt.Fprintf(w, "const %s = \"\"+\n\"", name)
+	for i := 0; i < 256; i++ {
+		fmt.Fprintf(w, "\\x%02x", f(uint8(i)))
+		if i%16 == 15 && i != 255 {
+			fmt.Fprint(w, "\"+\n\"")
+		}
+	}
+	fmt.Fprint(w, "\"\n\n")
+}
+
+func ntz8(x uint8) (n uint8) {
+	for x&1 == 0 && n < 8 {
+		x >>= 1
+		n++
+	}
+	return
+}
+
+func pop8(x uint8) (n uint8) {
+	for x != 0 {
+		x &= x - 1
+		n++
+	}
+	return
+}
+
+func rev8(x uint8) (r uint8) {
+	for i := 8; i > 0; i-- {
+		r = r<<1 | x&1
+		x >>= 1
+	}
+	return
+}
+
+func len8(x uint8) (n uint8) {
+	for x != 0 {
+		x >>= 1
+		n++
+	}
+	return
+}
diff --git a/src/math/cbrt.go b/src/math/cbrt.go
new file mode 100644
index 0000000..e5e9548
--- /dev/null
+++ b/src/math/cbrt.go
@@ -0,0 +1,85 @@
+// Copyright 2009 The Go Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+package math
+
+// The go code is a modified version of the original C code from
+// http://www.netlib.org/fdlibm/s_cbrt.c and came with this notice.
+//
+// ====================================================
+// Copyright (C) 1993 by Sun Microsystems, Inc. All rights reserved.
+//
+// Developed at SunSoft, a Sun Microsystems, Inc. business.
+// Permission to use, copy, modify, and distribute this
+// software is freely granted, provided that this notice
+// is preserved.
+// ====================================================
+
+// Cbrt returns the cube root of x.
+//
+// Special cases are:
+//
+//	Cbrt(±0) = ±0
+//	Cbrt(±Inf) = ±Inf
+//	Cbrt(NaN) = NaN
+func Cbrt(x float64) float64 {
+	if haveArchCbrt {
+		return archCbrt(x)
+	}
+	return cbrt(x)
+}
+
+func cbrt(x float64) float64 {
+	const (
+		B1             = 715094163                   // (682-0.03306235651)*2**20
+		B2             = 696219795                   // (664-0.03306235651)*2**20
+		C              = 5.42857142857142815906e-01  // 19/35     = 0x3FE15F15F15F15F1
+		D              = -7.05306122448979611050e-01 // -864/1225 = 0xBFE691DE2532C834
+		E              = 1.41428571428571436819e+00  // 99/70     = 0x3FF6A0EA0EA0EA0F
+		F              = 1.60714285714285720630e+00  // 45/28     = 0x3FF9B6DB6DB6DB6E
+		G              = 3.57142857142857150787e-01  // 5/14      = 0x3FD6DB6DB6DB6DB7
+		SmallestNormal = 2.22507385850720138309e-308 // 2**-1022  = 0x0010000000000000
+	)
+	// special cases
+	switch {
+	case x == 0 || IsNaN(x) || IsInf(x, 0):
+		return x
+	}
+
+	sign := false
+	if x < 0 {
+		x = -x
+		sign = true
+	}
+
+	// rough cbrt to 5 bits
+	t := Float64frombits(Float64bits(x)/3 + B1<<32)
+	if x < SmallestNormal {
+		// subnormal number
+		t = float64(1 << 54) // set t= 2**54
+		t *= x
+		t = Float64frombits(Float64bits(t)/3 + B2<<32)
+	}
+
+	// new cbrt to 23 bits
+	r := t * t / x
+	s := C + r*t
+	t *= G + F/(s+E+D/s)
+
+	// chop to 22 bits, make larger than cbrt(x)
+	t = Float64frombits(Float64bits(t)&(0xFFFFFFFFC<<28) + 1<<30)
+
+	// one step newton iteration to 53 bits with error less than 0.667ulps
+	s = t * t // t*t is exact
+	r = x / s
+	w := t + t
+	r = (r - t) / (w + r) // r-s is exact
+	t = t + t*r
+
+	// restore the sign bit
+	if sign {
+		t = -t
+	}
+	return t
+}
diff --git a/src/math/cbrt_s390x.s b/src/math/cbrt_s390x.s
new file mode 100644
index 0000000..87bba53
--- /dev/null
+++ b/src/math/cbrt_s390x.s
@@ -0,0 +1,156 @@
+// Copyright 2017 The Go Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+#include "textflag.h"
+
+// Minimax polynomial coefficients and other constants
+DATA ·cbrtrodataL9<> + 0(SB)/8, $-.00016272731015974436E+00
+DATA ·cbrtrodataL9<> + 8(SB)/8, $0.66639548758285293179E+00
+DATA ·cbrtrodataL9<> + 16(SB)/8, $0.55519402697349815993E+00
+DATA ·cbrtrodataL9<> + 24(SB)/8, $0.49338566048766782004E+00
+DATA ·cbrtrodataL9<> + 32(SB)/8, $0.45208160036325611486E+00
+DATA ·cbrtrodataL9<> + 40(SB)/8, $0.43099892837778637816E+00
+DATA ·cbrtrodataL9<> + 48(SB)/8, $1.000244140625
+DATA ·cbrtrodataL9<> + 56(SB)/8, $0.33333333333333333333E+00
+DATA ·cbrtrodataL9<> + 64(SB)/8, $79228162514264337593543950336.
+GLOBL ·cbrtrodataL9<> + 0(SB), RODATA, $72
+
+// Index tables
+DATA ·cbrttab32069<> + 0(SB)/8, $0x404030303020202
+DATA ·cbrttab32069<> + 8(SB)/8, $0x101010101000000
+DATA ·cbrttab32069<> + 16(SB)/8, $0x808070706060605
+DATA ·cbrttab32069<> + 24(SB)/8, $0x505040404040303
+DATA ·cbrttab32069<> + 32(SB)/8, $0xe0d0c0c0b0b0b0a
+DATA ·cbrttab32069<> + 40(SB)/8, $0xa09090908080808
+DATA ·cbrttab32069<> + 48(SB)/8, $0x11111010100f0f0f
+DATA ·cbrttab32069<> + 56(SB)/8, $0xe0e0e0e0e0d0d0d
+DATA ·cbrttab32069<> + 64(SB)/8, $0x1515141413131312
+DATA ·cbrttab32069<> + 72(SB)/8, $0x1212111111111010
+GLOBL ·cbrttab32069<> + 0(SB), RODATA, $80
+
+DATA ·cbrttab22068<> + 0(SB)/8, $0x151015001420141
+DATA ·cbrttab22068<> + 8(SB)/8, $0x140013201310130
+DATA ·cbrttab22068<> + 16(SB)/8, $0x122012101200112
+DATA ·cbrttab22068<> + 24(SB)/8, $0x111011001020101
+DATA ·cbrttab22068<> + 32(SB)/8, $0x10000f200f100f0
+DATA ·cbrttab22068<> + 40(SB)/8, $0xe200e100e000d2
+DATA ·cbrttab22068<> + 48(SB)/8, $0xd100d000c200c1
+DATA ·cbrttab22068<> + 56(SB)/8, $0xc000b200b100b0
+DATA ·cbrttab22068<> + 64(SB)/8, $0xa200a100a00092
+DATA ·cbrttab22068<> + 72(SB)/8, $0x91009000820081
+DATA ·cbrttab22068<> + 80(SB)/8, $0x80007200710070
+DATA ·cbrttab22068<> + 88(SB)/8, $0x62006100600052
+DATA ·cbrttab22068<> + 96(SB)/8, $0x51005000420041
+DATA ·cbrttab22068<> + 104(SB)/8, $0x40003200310030
+DATA ·cbrttab22068<> + 112(SB)/8, $0x22002100200012
+DATA ·cbrttab22068<> + 120(SB)/8, $0x11001000020001
+GLOBL ·cbrttab22068<> + 0(SB), RODATA, $128
+
+DATA ·cbrttab12067<> + 0(SB)/8, $0x53e1529051324fe1
+DATA ·cbrttab12067<> + 8(SB)/8, $0x4e904d324be14a90
+DATA ·cbrttab12067<> + 16(SB)/8, $0x493247e146904532
+DATA ·cbrttab12067<> + 24(SB)/8, $0x43e1429041323fe1
+DATA ·cbrttab12067<> + 32(SB)/8, $0x3e903d323be13a90
+DATA ·cbrttab12067<> + 40(SB)/8, $0x393237e136903532
+DATA ·cbrttab12067<> + 48(SB)/8, $0x33e1329031322fe1
+DATA ·cbrttab12067<> + 56(SB)/8, $0x2e902d322be12a90
+DATA ·cbrttab12067<> + 64(SB)/8, $0xd3e1d290d132cfe1
+DATA ·cbrttab12067<> + 72(SB)/8, $0xce90cd32cbe1ca90
+DATA ·cbrttab12067<> + 80(SB)/8, $0xc932c7e1c690c532
+DATA ·cbrttab12067<> + 88(SB)/8, $0xc3e1c290c132bfe1
+DATA ·cbrttab12067<> + 96(SB)/8, $0xbe90bd32bbe1ba90
+DATA ·cbrttab12067<> + 104(SB)/8, $0xb932b7e1b690b532
+DATA ·cbrttab12067<> + 112(SB)/8, $0xb3e1b290b132afe1
+DATA ·cbrttab12067<> + 120(SB)/8, $0xae90ad32abe1aa90
+GLOBL ·cbrttab12067<> + 0(SB), RODATA, $128
+
+// Cbrt returns the cube root of the argument.
+//
+// Special cases are:
+//      Cbrt(±0) = ±0
+//      Cbrt(±Inf) = ±Inf
+//      Cbrt(NaN) = NaN
+// The algorithm used is minimax polynomial approximation
+// with coefficients determined with a Remez exchange algorithm.
+
+TEXT	·cbrtAsm(SB), NOSPLIT, $0-16
+	FMOVD	x+0(FP), F0
+	MOVD	$·cbrtrodataL9<>+0(SB), R9
+	LGDR	F0, R2
+	WORD	$0xC039000F	//iilf	%r3,1048575
+	BYTE	$0xFF
+	BYTE	$0xFF
+	SRAD	$32, R2
+	WORD	$0xB9170012	//llgtr	%r1,%r2
+	MOVW	R1, R6
+	MOVW	R3, R7
+	CMPBLE	R6, R7, L2
+	WORD	$0xC0397FEF	//iilf	%r3,2146435071
+	BYTE	$0xFF
+	BYTE	$0xFF
+	MOVW	R3, R7
+	CMPBLE	R6, R7, L8
+L1:
+	FMOVD	F0, ret+8(FP)
+	RET
+L3:
+L2:
+	LTDBR	F0, F0
+	BEQ	L1
+	FMOVD	F0, F2
+	WORD	$0xED209040	//mdb	%f2,.L10-.L9(%r9)
+	BYTE	$0x00
+	BYTE	$0x1C
+	MOVH	$0x200, R4
+	LGDR	F2, R2
+	SRAD	$32, R2
+L4:
+	RISBGZ	$57, $62, $39, R2, R3
+	MOVD	$·cbrttab12067<>+0(SB), R1
+	WORD	$0x48131000	//lh	%r1,0(%r3,%r1)
+	RISBGZ	$57, $62, $45, R2, R3
+	MOVD	$·cbrttab22068<>+0(SB), R5
+	RISBGNZ	$60, $63, $48, R2, R2
+	WORD	$0x4A135000	//ah	%r1,0(%r3,%r5)
+	BYTE	$0x18	//lr	%r3,%r1
+	BYTE	$0x31
+	MOVD	$·cbrttab32069<>+0(SB), R1
+	FMOVD	56(R9), F1
+	FMOVD	48(R9), F5
+	WORD	$0xEC23393B	//rosbg	%r2,%r3,57,59,4
+	BYTE	$0x04
+	BYTE	$0x56
+	WORD	$0xE3121000	//llc	%r1,0(%r2,%r1)
+	BYTE	$0x00
+	BYTE	$0x94
+	ADDW	R3, R1
+	ADDW	R4, R1
+	SLW	$16, R1, R1
+	SLD	$32, R1, R1
+	LDGR	R1, F2
+	WFMDB	V2, V2, V4
+	WFMDB	V4, V0, V6
+	WFMSDB	V4, V6, V2, V4
+	FMOVD	40(R9), F6
+	FMSUB	F1, F4, F2
+	FMOVD	32(R9), F4
+	WFMDB	V2, V2, V3
+	FMOVD	24(R9), F1
+	FMUL	F3, F0
+	FMOVD	16(R9), F3
+	WFMADB	V2, V0, V5, V2
+	FMOVD	8(R9), F5
+	FMADD	F6, F2, F4
+	WFMADB	V2, V1, V3, V1
+	WFMDB	V2, V2, V6
+	FMOVD	0(R9), F3
+	WFMADB	V4, V6, V1, V4
+	WFMADB	V2, V5, V3, V2
+	FMADD	F4, F6, F2
+	FMADD	F2, F0, F0
+	FMOVD	F0, ret+8(FP)
+	RET
+L8:
+	MOVH	$0x0, R4
+	BR	L4
diff --git a/src/math/cmplx/abs.go b/src/math/cmplx/abs.go
new file mode 100644
index 0000000..2f89d1b
--- /dev/null
+++ b/src/math/cmplx/abs.go
@@ -0,0 +1,13 @@
+// Copyright 2010 The Go Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+// Package cmplx provides basic constants and mathematical functions for
+// complex numbers. Special case handling conforms to the C99 standard
+// Annex G IEC 60559-compatible complex arithmetic.
+package cmplx
+
+import "math"
+
+// Abs returns the absolute value (also called the modulus) of x.
+func Abs(x complex128) float64 { return math.Hypot(real(x), imag(x)) }
diff --git a/src/math/cmplx/asin.go b/src/math/cmplx/asin.go
new file mode 100644
index 0000000..30d019e
--- /dev/null
+++ b/src/math/cmplx/asin.go
@@ -0,0 +1,221 @@
+// Copyright 2010 The Go Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+package cmplx
+
+import "math"
+
+// The original C code, the long comment, and the constants
+// below are from http://netlib.sandia.gov/cephes/c9x-complex/clog.c.
+// The go code is a simplified version of the original C.
+//
+// Cephes Math Library Release 2.8:  June, 2000
+// Copyright 1984, 1987, 1989, 1992, 2000 by Stephen L. Moshier
+//
+// The readme file at http://netlib.sandia.gov/cephes/ says:
+//    Some software in this archive may be from the book _Methods and
+// Programs for Mathematical Functions_ (Prentice-Hall or Simon & Schuster
+// International, 1989) or from the Cephes Mathematical Library, a
+// commercial product. In either event, it is copyrighted by the author.
+// What you see here may be used freely but it comes with no support or
+// guarantee.
+//
+//   The two known misprints in the book are repaired here in the
+// source listings for the gamma function and the incomplete beta
+// integral.
+//
+//   Stephen L. Moshier
+//   moshier@na-net.ornl.gov
+
+// Complex circular arc sine
+//
+// DESCRIPTION:
+//
+// Inverse complex sine:
+//                               2
+// w = -i clog( iz + csqrt( 1 - z ) ).
+//
+// casin(z) = -i casinh(iz)
+//
+// ACCURACY:
+//
+//                      Relative error:
+// arithmetic   domain     # trials      peak         rms
+//    DEC       -10,+10     10100       2.1e-15     3.4e-16
+//    IEEE      -10,+10     30000       2.2e-14     2.7e-15
+// Larger relative error can be observed for z near zero.
+// Also tested by csin(casin(z)) = z.
+
+// Asin returns the inverse sine of x.
+func Asin(x complex128) complex128 {
+	switch re, im := real(x), imag(x); {
+	case im == 0 && math.Abs(re) <= 1:
+		return complex(math.Asin(re), im)
+	case re == 0 && math.Abs(im) <= 1:
+		return complex(re, math.Asinh(im))
+	case math.IsNaN(im):
+		switch {
+		case re == 0:
+			return complex(re, math.NaN())
+		case math.IsInf(re, 0):
+			return complex(math.NaN(), re)
+		default:
+			return NaN()
+		}
+	case math.IsInf(im, 0):
+		switch {
+		case math.IsNaN(re):
+			return x
+		case math.IsInf(re, 0):
+			return complex(math.Copysign(math.Pi/4, re), im)
+		default:
+			return complex(math.Copysign(0, re), im)
+		}
+	case math.IsInf(re, 0):
+		return complex(math.Copysign(math.Pi/2, re), math.Copysign(re, im))
+	}
+	ct := complex(-imag(x), real(x)) // i * x
+	xx := x * x
+	x1 := complex(1-real(xx), -imag(xx)) // 1 - x*x
+	x2 := Sqrt(x1)                       // x2 = sqrt(1 - x*x)
+	w := Log(ct + x2)
+	return complex(imag(w), -real(w)) // -i * w
+}
+
+// Asinh returns the inverse hyperbolic sine of x.
+func Asinh(x complex128) complex128 {
+	switch re, im := real(x), imag(x); {
+	case im == 0 && math.Abs(re) <= 1:
+		return complex(math.Asinh(re), im)
+	case re == 0 && math.Abs(im) <= 1:
+		return complex(re, math.Asin(im))
+	case math.IsInf(re, 0):
+		switch {
+		case math.IsInf(im, 0):
+			return complex(re, math.Copysign(math.Pi/4, im))
+		case math.IsNaN(im):
+			return x
+		default:
+			return complex(re, math.Copysign(0.0, im))
+		}
+	case math.IsNaN(re):
+		switch {
+		case im == 0:
+			return x
+		case math.IsInf(im, 0):
+			return complex(im, re)
+		default:
+			return NaN()
+		}
+	case math.IsInf(im, 0):
+		return complex(math.Copysign(im, re), math.Copysign(math.Pi/2, im))
+	}
+	xx := x * x
+	x1 := complex(1+real(xx), imag(xx)) // 1 + x*x
+	return Log(x + Sqrt(x1))            // log(x + sqrt(1 + x*x))
+}
+
+// Complex circular arc cosine
+//
+// DESCRIPTION:
+//
+// w = arccos z  =  PI/2 - arcsin z.
+//
+// ACCURACY:
+//
+//                      Relative error:
+// arithmetic   domain     # trials      peak         rms
+//    DEC       -10,+10      5200      1.6e-15      2.8e-16
+//    IEEE      -10,+10     30000      1.8e-14      2.2e-15
+
+// Acos returns the inverse cosine of x.
+func Acos(x complex128) complex128 {
+	w := Asin(x)
+	return complex(math.Pi/2-real(w), -imag(w))
+}
+
+// Acosh returns the inverse hyperbolic cosine of x.
+func Acosh(x complex128) complex128 {
+	if x == 0 {
+		return complex(0, math.Copysign(math.Pi/2, imag(x)))
+	}
+	w := Acos(x)
+	if imag(w) <= 0 {
+		return complex(-imag(w), real(w)) // i * w
+	}
+	return complex(imag(w), -real(w)) // -i * w
+}
+
+// Complex circular arc tangent
+//
+// DESCRIPTION:
+//
+// If
+//     z = x + iy,
+//
+// then
+//          1       (    2x     )
+// Re w  =  - arctan(-----------)  +  k PI
+//          2       (     2    2)
+//                  (1 - x  - y )
+//
+//               ( 2         2)
+//          1    (x  +  (y+1) )
+// Im w  =  - log(------------)
+//          4    ( 2         2)
+//               (x  +  (y-1) )
+//
+// Where k is an arbitrary integer.
+//
+// catan(z) = -i catanh(iz).
+//
+// ACCURACY:
+//
+//                      Relative error:
+// arithmetic   domain     # trials      peak         rms
+//    DEC       -10,+10      5900       1.3e-16     7.8e-18
+//    IEEE      -10,+10     30000       2.3e-15     8.5e-17
+// The check catan( ctan(z) )  =  z, with |x| and |y| < PI/2,
+// had peak relative error 1.5e-16, rms relative error
+// 2.9e-17.  See also clog().
+
+// Atan returns the inverse tangent of x.
+func Atan(x complex128) complex128 {
+	switch re, im := real(x), imag(x); {
+	case im == 0:
+		return complex(math.Atan(re), im)
+	case re == 0 && math.Abs(im) <= 1:
+		return complex(re, math.Atanh(im))
+	case math.IsInf(im, 0) || math.IsInf(re, 0):
+		if math.IsNaN(re) {
+			return complex(math.NaN(), math.Copysign(0, im))
+		}
+		return complex(math.Copysign(math.Pi/2, re), math.Copysign(0, im))
+	case math.IsNaN(re) || math.IsNaN(im):
+		return NaN()
+	}
+	x2 := real(x) * real(x)
+	a := 1 - x2 - imag(x)*imag(x)
+	if a == 0 {
+		return NaN()
+	}
+	t := 0.5 * math.Atan2(2*real(x), a)
+	w := reducePi(t)
+
+	t = imag(x) - 1
+	b := x2 + t*t
+	if b == 0 {
+		return NaN()
+	}
+	t = imag(x) + 1
+	c := (x2 + t*t) / b
+	return complex(w, 0.25*math.Log(c))
+}
+
+// Atanh returns the inverse hyperbolic tangent of x.
+func Atanh(x complex128) complex128 {
+	z := complex(-imag(x), real(x)) // z = i * x
+	z = Atan(z)
+	return complex(imag(z), -real(z)) // z = -i * z
+}
diff --git a/src/math/cmplx/cmath_test.go b/src/math/cmplx/cmath_test.go
new file mode 100644
index 0000000..3011e83
--- /dev/null
+++ b/src/math/cmplx/cmath_test.go
@@ -0,0 +1,1589 @@
+// Copyright 2010 The Go Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+package cmplx
+
+import (
+	"math"
+	"testing"
+)
+
+// The higher-precision values in vc26 were used to derive the
+// input arguments vc (see also comment below). For reference
+// only (do not delete).
+var vc26 = []complex128{
+	(4.97901192488367350108546816 + 7.73887247457810456552351752i),
+	(7.73887247457810456552351752 - 0.27688005719200159404635997i),
+	(-0.27688005719200159404635997 - 5.01060361827107492160848778i),
+	(-5.01060361827107492160848778 + 9.63629370719841737980004837i),
+	(9.63629370719841737980004837 + 2.92637723924396464525443662i),
+	(2.92637723924396464525443662 + 5.22908343145930665230025625i),
+	(5.22908343145930665230025625 + 2.72793991043601025126008608i),
+	(2.72793991043601025126008608 + 1.82530809168085506044576505i),
+	(1.82530809168085506044576505 - 8.68592476857560136238589621i),
+	(-8.68592476857560136238589621 + 4.97901192488367350108546816i),
+}
+
+var vc = []complex128{
+	(4.9790119248836735e+00 + 7.7388724745781045e+00i),
+	(7.7388724745781045e+00 - 2.7688005719200159e-01i),
+	(-2.7688005719200159e-01 - 5.0106036182710749e+00i),
+	(-5.0106036182710749e+00 + 9.6362937071984173e+00i),
+	(9.6362937071984173e+00 + 2.9263772392439646e+00i),
+	(2.9263772392439646e+00 + 5.2290834314593066e+00i),
+	(5.2290834314593066e+00 + 2.7279399104360102e+00i),
+	(2.7279399104360102e+00 + 1.8253080916808550e+00i),
+	(1.8253080916808550e+00 - 8.6859247685756013e+00i),
+	(-8.6859247685756013e+00 + 4.9790119248836735e+00i),
+}
+
+// The expected results below were computed by the high precision calculators
+// at https://keisan.casio.com/.  More exact input values (array vc[], above)
+// were obtained by printing them with "%.26f".  The answers were calculated
+// to 26 digits (by using the "Digit number" drop-down control of each
+// calculator).
+
+var abs = []float64{
+	9.2022120669932650313380972e+00,
+	7.7438239742296106616261394e+00,
+	5.0182478202557746902556648e+00,
+	1.0861137372799545160704002e+01,
+	1.0070841084922199607011905e+01,
+	5.9922447613166942183705192e+00,
+	5.8978784056736762299945176e+00,
+	3.2822866700678709020367184e+00,
+	8.8756430028990417290744307e+00,
+	1.0011785496777731986390856e+01,
+}
+
+var acos = []complex128{
+	(1.0017679804707456328694569 - 2.9138232718554953784519807i),
+	(0.03606427612041407369636057 + 2.7358584434576260925091256i),
+	(1.6249365462333796703711823 + 2.3159537454335901187730929i),
+	(2.0485650849650740120660391 - 3.0795576791204117911123886i),
+	(0.29621132089073067282488147 - 3.0007392508200622519398814i),
+	(1.0664555914934156601503632 - 2.4872865024796011364747111i),
+	(0.48681307452231387690013905 - 2.463655912283054555225301i),
+	(0.6116977071277574248407752 - 1.8734458851737055262693056i),
+	(1.3649311280370181331184214 + 2.8793528632328795424123832i),
+	(2.6189310485682988308904501 - 2.9956543302898767795858704i),
+}
+var acosh = []complex128{
+	(2.9138232718554953784519807 + 1.0017679804707456328694569i),
+	(2.7358584434576260925091256 - 0.03606427612041407369636057i),
+	(2.3159537454335901187730929 - 1.6249365462333796703711823i),
+	(3.0795576791204117911123886 + 2.0485650849650740120660391i),
+	(3.0007392508200622519398814 + 0.29621132089073067282488147i),
+	(2.4872865024796011364747111 + 1.0664555914934156601503632i),
+	(2.463655912283054555225301 + 0.48681307452231387690013905i),
+	(1.8734458851737055262693056 + 0.6116977071277574248407752i),
+	(2.8793528632328795424123832 - 1.3649311280370181331184214i),
+	(2.9956543302898767795858704 + 2.6189310485682988308904501i),
+}
+var asin = []complex128{
+	(0.56902834632415098636186476 + 2.9138232718554953784519807i),
+	(1.5347320506744825455349611 - 2.7358584434576260925091256i),
+	(-0.054140219438483051139860579 - 2.3159537454335901187730929i),
+	(-0.47776875817017739283471738 + 3.0795576791204117911123886i),
+	(1.2745850059041659464064402 + 3.0007392508200622519398814i),
+	(0.50434073530148095908095852 + 2.4872865024796011364747111i),
+	(1.0839832522725827423311826 + 2.463655912283054555225301i),
+	(0.9590986196671391943905465 + 1.8734458851737055262693056i),
+	(0.20586519875787848611290031 - 2.8793528632328795424123832i),
+	(-1.0481347217734022116591284 + 2.9956543302898767795858704i),
+}
+var asinh = []complex128{
+	(2.9113760469415295679342185 + 0.99639459545704326759805893i),
+	(2.7441755423994259061579029 - 0.035468308789000500601119392i),
+	(-2.2962136462520690506126678 - 1.5144663565690151885726707i),
+	(-3.0771233459295725965402455 + 1.0895577967194013849422294i),
+	(3.0048366100923647417557027 + 0.29346979169819220036454168i),
+	(2.4800059370795363157364643 + 1.0545868606049165710424232i),
+	(2.4718773838309585611141821 + 0.47502344364250803363708842i),
+	(1.8910743588080159144378396 + 0.56882925572563602341139174i),
+	(2.8735426423367341878069406 - 1.362376149648891420997548i),
+	(-2.9981750586172477217567878 + 0.5183571985225367505624207i),
+}
+var atan = []complex128{
+	(1.5115747079332741358607654 + 0.091324403603954494382276776i),
+	(1.4424504323482602560806727 - 0.0045416132642803911503770933i),
+	(-1.5593488703630532674484026 - 0.20163295409248362456446431i),
+	(-1.5280619472445889867794105 + 0.081721556230672003746956324i),
+	(1.4759909163240799678221039 + 0.028602969320691644358773586i),
+	(1.4877353772046548932715555 + 0.14566877153207281663773599i),
+	(1.4206983927779191889826 + 0.076830486127880702249439993i),
+	(1.3162236060498933364869556 + 0.16031313000467530644933363i),
+	(1.5473450684303703578810093 - 0.11064907507939082484935782i),
+	(-1.4841462340185253987375812 + 0.049341850305024399493142411i),
+}
+var atanh = []complex128{
+	(0.058375027938968509064640438 + 1.4793488495105334458167782i),
+	(0.12977343497790381229915667 - 1.5661009410463561327262499i),
+	(-0.010576456067347252072200088 - 1.3743698658402284549750563i),
+	(-0.042218595678688358882784918 + 1.4891433968166405606692604i),
+	(0.095218997991316722061828397 + 1.5416884098777110330499698i),
+	(0.079965459366890323857556487 + 1.4252510353873192700350435i),
+	(0.15051245471980726221708301 + 1.4907432533016303804884461i),
+	(0.25082072933993987714470373 + 1.392057665392187516442986i),
+	(0.022896108815797135846276662 - 1.4609224989282864208963021i),
+	(-0.08665624101841876130537396 + 1.5207902036935093480142159i),
+}
+var conj = []complex128{
+	(4.9790119248836735e+00 - 7.7388724745781045e+00i),
+	(7.7388724745781045e+00 + 2.7688005719200159e-01i),
+	(-2.7688005719200159e-01 + 5.0106036182710749e+00i),
+	(-5.0106036182710749e+00 - 9.6362937071984173e+00i),
+	(9.6362937071984173e+00 - 2.9263772392439646e+00i),
+	(2.9263772392439646e+00 - 5.2290834314593066e+00i),
+	(5.2290834314593066e+00 - 2.7279399104360102e+00i),
+	(2.7279399104360102e+00 - 1.8253080916808550e+00i),
+	(1.8253080916808550e+00 + 8.6859247685756013e+00i),
+	(-8.6859247685756013e+00 - 4.9790119248836735e+00i),
+}
+var cos = []complex128{
+	(3.024540920601483938336569e+02 + 1.1073797572517071650045357e+03i),
+	(1.192858682649064973252758e-01 + 2.7857554122333065540970207e-01i),
+	(7.2144394304528306603857962e+01 - 2.0500129667076044169954205e+01i),
+	(2.24921952538403984190541e+03 - 7.317363745602773587049329e+03i),
+	(-9.148222970032421760015498e+00 + 1.953124661113563541862227e+00i),
+	(-9.116081175857732248227078e+01 - 1.992669213569952232487371e+01i),
+	(3.795639179042704640002918e+00 + 6.623513350981458399309662e+00i),
+	(-2.9144840732498869560679084e+00 - 1.214620271628002917638748e+00i),
+	(-7.45123482501299743872481e+02 + 2.8641692314488080814066734e+03i),
+	(-5.371977967039319076416747e+01 + 4.893348341339375830564624e+01i),
+}
+var cosh = []complex128{
+	(8.34638383523018249366948e+00 + 7.2181057886425846415112064e+01i),
+	(1.10421967379919366952251e+03 - 3.1379638689277575379469861e+02i),
+	(3.051485206773701584738512e-01 - 2.6805384730105297848044485e-01i),
+	(-7.33294728684187933370938e+01 + 1.574445942284918251038144e+01i),
+	(-7.478643293945957535757355e+03 + 1.6348382209913353929473321e+03i),
+	(4.622316522966235701630926e+00 - 8.088695185566375256093098e+00i),
+	(-8.544333183278877406197712e+01 + 3.7505836120128166455231717e+01i),
+	(-1.934457815021493925115198e+00 + 7.3725859611767228178358673e+00i),
+	(-2.352958770061749348353548e+00 - 2.034982010440878358915409e+00i),
+	(7.79756457532134748165069e+02 + 2.8549350716819176560377717e+03i),
+}
+var exp = []complex128{
+	(1.669197736864670815125146e+01 + 1.4436895109507663689174096e+02i),
+	(2.2084389286252583447276212e+03 - 6.2759289284909211238261917e+02i),
+	(2.227538273122775173434327e-01 + 7.2468284028334191250470034e-01i),
+	(-6.5182985958153548997881627e-03 - 1.39965837915193860879044e-03i),
+	(-1.4957286524084015746110777e+04 + 3.269676455931135688988042e+03i),
+	(9.218158701983105935659273e+00 - 1.6223985291084956009304582e+01i),
+	(-1.7088175716853040841444505e+02 + 7.501382609870410713795546e+01i),
+	(-3.852461315830959613132505e+00 + 1.4808420423156073221970892e+01i),
+	(-4.586775503301407379786695e+00 - 4.178501081246873415144744e+00i),
+	(4.451337963005453491095747e-05 - 1.62977574205442915935263e-04i),
+}
+var log = []complex128{
+	(2.2194438972179194425697051e+00 + 9.9909115046919291062461269e-01i),
+	(2.0468956191154167256337289e+00 - 3.5762575021856971295156489e-02i),
+	(1.6130808329853860438751244e+00 - 1.6259990074019058442232221e+00i),
+	(2.3851910394823008710032651e+00 + 2.0502936359659111755031062e+00i),
+	(2.3096442270679923004800651e+00 + 2.9483213155446756211881774e-01i),
+	(1.7904660933974656106951860e+00 + 1.0605860367252556281902109e+00i),
+	(1.7745926939841751666177512e+00 + 4.8084556083358307819310911e-01i),
+	(1.1885403350045342425648780e+00 + 5.8969634164776659423195222e-01i),
+	(2.1833107837679082586772505e+00 - 1.3636647724582455028314573e+00i),
+	(2.3037629487273259170991671e+00 + 2.6210913895386013290915234e+00i),
+}
+var log10 = []complex128{
+	(9.6389223745559042474184943e-01 + 4.338997735671419492599631e-01i),
+	(8.8895547241376579493490892e-01 - 1.5531488990643548254864806e-02i),
+	(7.0055210462945412305244578e-01 - 7.0616239649481243222248404e-01i),
+	(1.0358753067322445311676952e+00 + 8.9043121238134980156490909e-01i),
+	(1.003065742975330237172029e+00 + 1.2804396782187887479857811e-01i),
+	(7.7758954439739162532085157e-01 + 4.6060666333341810869055108e-01i),
+	(7.7069581462315327037689152e-01 + 2.0882857371769952195512475e-01i),
+	(5.1617650901191156135137239e-01 + 2.5610186717615977620363299e-01i),
+	(9.4819982567026639742663212e-01 - 5.9223208584446952284914289e-01i),
+	(1.0005115362454417135973429e+00 + 1.1383255270407412817250921e+00i),
+}
+
+type ff struct {
+	r, theta float64
+}
+
+var polar = []ff{
+	{9.2022120669932650313380972e+00, 9.9909115046919291062461269e-01},
+	{7.7438239742296106616261394e+00, -3.5762575021856971295156489e-02},
+	{5.0182478202557746902556648e+00, -1.6259990074019058442232221e+00},
+	{1.0861137372799545160704002e+01, 2.0502936359659111755031062e+00},
+	{1.0070841084922199607011905e+01, 2.9483213155446756211881774e-01},
+	{5.9922447613166942183705192e+00, 1.0605860367252556281902109e+00},
+	{5.8978784056736762299945176e+00, 4.8084556083358307819310911e-01},
+	{3.2822866700678709020367184e+00, 5.8969634164776659423195222e-01},
+	{8.8756430028990417290744307e+00, -1.3636647724582455028314573e+00},
+	{1.0011785496777731986390856e+01, 2.6210913895386013290915234e+00},
+}
+var pow = []complex128{
+	(-2.499956739197529585028819e+00 + 1.759751724335650228957144e+00i),
+	(7.357094338218116311191939e+04 - 5.089973412479151648145882e+04i),
+	(1.320777296067768517259592e+01 - 3.165621914333901498921986e+01i),
+	(-3.123287828297300934072149e-07 - 1.9849567521490553032502223e-7i),
+	(8.0622651468477229614813e+04 - 7.80028727944573092944363e+04i),
+	(-1.0268824572103165858577141e+00 - 4.716844738244989776610672e-01i),
+	(-4.35953819012244175753187e+01 + 2.2036445974645306917648585e+02i),
+	(8.3556092283250594950239e-01 - 1.2261571947167240272593282e+01i),
+	(1.582292972120769306069625e+03 + 1.273564263524278244782512e+04i),
+	(6.592208301642122149025369e-08 + 2.584887236651661903526389e-08i),
+}
+var sin = []complex128{
+	(-1.1073801774240233539648544e+03 + 3.024539773002502192425231e+02i),
+	(1.0317037521400759359744682e+00 - 3.2208979799929570242818e-02i),
+	(-2.0501952097271429804261058e+01 - 7.2137981348240798841800967e+01i),
+	(7.3173638080346338642193078e+03 + 2.249219506193664342566248e+03i),
+	(-1.964375633631808177565226e+00 - 9.0958264713870404464159683e+00i),
+	(1.992783647158514838337674e+01 - 9.11555769410191350416942e+01i),
+	(-6.680335650741921444300349e+00 + 3.763353833142432513086117e+00i),
+	(1.2794028166657459148245993e+00 - 2.7669092099795781155109602e+00i),
+	(2.8641693949535259594188879e+03 + 7.451234399649871202841615e+02i),
+	(-4.893811726244659135553033e+01 - 5.371469305562194635957655e+01i),
+}
+var sinh = []complex128{
+	(8.34559353341652565758198e+00 + 7.2187893208650790476628899e+01i),
+	(1.1042192548260646752051112e+03 - 3.1379650595631635858792056e+02i),
+	(-8.239469336509264113041849e-02 + 9.9273668758439489098514519e-01i),
+	(7.332295456982297798219401e+01 - 1.574585908122833444899023e+01i),
+	(-7.4786432301380582103534216e+03 + 1.63483823493980029604071e+03i),
+	(4.595842179016870234028347e+00 - 8.135290105518580753211484e+00i),
+	(-8.543842533574163435246793e+01 + 3.750798997857594068272375e+01i),
+	(-1.918003500809465688017307e+00 + 7.4358344619793504041350251e+00i),
+	(-2.233816733239658031433147e+00 - 2.143519070805995056229335e+00i),
+	(-7.797564130187551181105341e+02 - 2.8549352346594918614806877e+03i),
+}
+var sqrt = []complex128{
+	(2.6628203086086130543813948e+00 + 1.4531345674282185229796902e+00i),
+	(2.7823278427251986247149295e+00 - 4.9756907317005224529115567e-02i),
+	(1.5397025302089642757361015e+00 - 1.6271336573016637535695727e+00i),
+	(1.7103411581506875260277898e+00 + 2.8170677122737589676157029e+00i),
+	(3.1390392472953103383607947e+00 + 4.6612625849858653248980849e-01i),
+	(2.1117080764822417640789287e+00 + 1.2381170223514273234967850e+00i),
+	(2.3587032281672256703926939e+00 + 5.7827111903257349935720172e-01i),
+	(1.7335262588873410476661577e+00 + 5.2647258220721269141550382e-01i),
+	(2.3131094974708716531499282e+00 - 1.8775429304303785570775490e+00i),
+	(8.1420535745048086240947359e-01 + 3.0575897587277248522656113e+00i),
+}
+var tan = []complex128{
+	(-1.928757919086441129134525e-07 + 1.0000003267499169073251826e+00i),
+	(1.242412685364183792138948e+00 - 3.17149693883133370106696e+00i),
+	(-4.6745126251587795225571826e-05 - 9.9992439225263959286114298e-01i),
+	(4.792363401193648192887116e-09 + 1.0000000070589333451557723e+00i),
+	(2.345740824080089140287315e-03 + 9.947733046570988661022763e-01i),
+	(-2.396030789494815566088809e-05 + 9.9994781345418591429826779e-01i),
+	(-7.370204836644931340905303e-03 + 1.0043553413417138987717748e+00i),
+	(-3.691803847992048527007457e-02 + 9.6475071993469548066328894e-01i),
+	(-2.781955256713729368401878e-08 - 1.000000049848910609006646e+00i),
+	(9.4281590064030478879791249e-05 + 9.9999119340863718183758545e-01i),
+}
+var tanh = []complex128{
+	(1.0000921981225144748819918e+00 + 2.160986245871518020231507e-05i),
+	(9.9999967727531993209562591e-01 - 1.9953763222959658873657676e-07i),
+	(-1.765485739548037260789686e+00 + 1.7024216325552852445168471e+00i),
+	(-9.999189442732736452807108e-01 + 3.64906070494473701938098e-05i),
+	(9.9999999224622333738729767e-01 - 3.560088949517914774813046e-09i),
+	(1.0029324933367326862499343e+00 - 4.948790309797102353137528e-03i),
+	(9.9996113064788012488693567e-01 - 4.226995742097032481451259e-05i),
+	(1.0074784189316340029873945e+00 - 4.194050814891697808029407e-03i),
+	(9.9385534229718327109131502e-01 + 5.144217985914355502713437e-02i),
+	(-1.0000000491604982429364892e+00 - 2.901873195374433112227349e-08i),
+}
+
+// huge values along the real axis for testing reducePi in Tan
+var hugeIn = []complex128{
+	1 << 28,
+	1 << 29,
+	1 << 30,
+	1 << 35,
+	-1 << 120,
+	1 << 240,
+	1 << 300,
+	-1 << 480,
+	1234567891234567 << 180,
+	-1234567891234567 << 300,
+}
+
+// Results for tanHuge[i] calculated with https://github.com/robpike/ivy
+// using 4096 bits of working precision.
+var tanHuge = []complex128{
+	5.95641897939639421,
+	-0.34551069233430392,
+	-0.78469661331920043,
+	0.84276385870875983,
+	0.40806638884180424,
+	-0.37603456702698076,
+	4.60901287677810962,
+	3.39135965054779932,
+	-6.76813854009065030,
+	-0.76417695016604922,
+}
+
+// special cases conform to C99 standard appendix G.6 Complex arithmetic
+var inf, nan = math.Inf(1), math.NaN()
+
+var vcAbsSC = []complex128{
+	NaN(),
+}
+var absSC = []float64{
+	math.NaN(),
+}
+var acosSC = []struct {
+	in,
+	want complex128
+}{
+	// G.6.1.1
+	{complex(zero, zero),
+		complex(math.Pi/2, -zero)},
+	{complex(-zero, zero),
+		complex(math.Pi/2, -zero)},
+	{complex(zero, nan),
+		complex(math.Pi/2, nan)},
+	{complex(-zero, nan),
+		complex(math.Pi/2, nan)},
+	{complex(1.0, inf),
+		complex(math.Pi/2, -inf)},
+	{complex(1.0, nan),
+		NaN()},
+	{complex(-inf, 1.0),
+		complex(math.Pi, -inf)},
+	{complex(inf, 1.0),
+		complex(0.0, -inf)},
+	{complex(-inf, inf),
+		complex(3*math.Pi/4, -inf)},
+	{complex(inf, inf),
+		complex(math.Pi/4, -inf)},
+	{complex(inf, nan),
+		complex(nan, -inf)}, // imaginary sign unspecified
+	{complex(-inf, nan),
+		complex(nan, inf)}, // imaginary sign unspecified
+	{complex(nan, 1.0),
+		NaN()},
+	{complex(nan, inf),
+		complex(nan, -inf)},
+	{NaN(),
+		NaN()},
+}
+var acoshSC = []struct {
+	in,
+	want complex128
+}{
+	// G.6.2.1
+	{complex(zero, zero),
+		complex(zero, math.Pi/2)},
+	{complex(-zero, zero),
+		complex(zero, math.Pi/2)},
+	{complex(1.0, inf),
+		complex(inf, math.Pi/2)},
+	{complex(1.0, nan),
+		NaN()},
+	{complex(-inf, 1.0),
+		complex(inf, math.Pi)},
+	{complex(inf, 1.0),
+		complex(inf, zero)},
+	{complex(-inf, inf),
+		complex(inf, 3*math.Pi/4)},
+	{complex(inf, inf),
+		complex(inf, math.Pi/4)},
+	{complex(inf, nan),
+		complex(inf, nan)},
+	{complex(-inf, nan),
+		complex(inf, nan)},
+	{complex(nan, 1.0),
+		NaN()},
+	{complex(nan, inf),
+		complex(inf, nan)},
+	{NaN(),
+		NaN()},
+}
+var asinSC = []struct {
+	in,
+	want complex128
+}{
+	// Derived from Asin(z) = -i * Asinh(i * z), G.6 #7
+	{complex(zero, zero),
+		complex(zero, zero)},
+	{complex(1.0, inf),
+		complex(0, inf)},
+	{complex(1.0, nan),
+		NaN()},
+	{complex(inf, 1),
+		complex(math.Pi/2, inf)},
+	{complex(inf, inf),
+		complex(math.Pi/4, inf)},
+	{complex(inf, nan),
+		complex(nan, inf)}, // imaginary sign unspecified
+	{complex(nan, zero),
+		NaN()},
+	{complex(nan, 1),
+		NaN()},
+	{complex(nan, inf),
+		complex(nan, inf)},
+	{NaN(),
+		NaN()},
+}
+var asinhSC = []struct {
+	in,
+	want complex128
+}{
+	// G.6.2.2
+	{complex(zero, zero),
+		complex(zero, zero)},
+	{complex(1.0, inf),
+		complex(inf, math.Pi/2)},
+	{complex(1.0, nan),
+		NaN()},
+	{complex(inf, 1.0),
+		complex(inf, zero)},
+	{complex(inf, inf),
+		complex(inf, math.Pi/4)},
+	{complex(inf, nan),
+		complex(inf, nan)},
+	{complex(nan, zero),
+		complex(nan, zero)},
+	{complex(nan, 1.0),
+		NaN()},
+	{complex(nan, inf),
+		complex(inf, nan)}, // sign of real part unspecified
+	{NaN(),
+		NaN()},
+}
+var atanSC = []struct {
+	in,
+	want complex128
+}{
+	// Derived from Atan(z) = -i * Atanh(i * z), G.6 #7
+	{complex(0, zero),
+		complex(0, zero)},
+	{complex(0, nan),
+		NaN()},
+	{complex(1.0, zero),
+		complex(math.Pi/4, zero)},
+	{complex(1.0, inf),
+		complex(math.Pi/2, zero)},
+	{complex(1.0, nan),
+		NaN()},
+	{complex(inf, 1),
+		complex(math.Pi/2, zero)},
+	{complex(inf, inf),
+		complex(math.Pi/2, zero)},
+	{complex(inf, nan),
+		complex(math.Pi/2, zero)},
+	{complex(nan, 1),
+		NaN()},
+	{complex(nan, inf),
+		complex(nan, zero)},
+	{NaN(),
+		NaN()},
+}
+var atanhSC = []struct {
+	in,
+	want complex128
+}{
+	// G.6.2.3
+	{complex(zero, zero),
+		complex(zero, zero)},
+	{complex(zero, nan),
+		complex(zero, nan)},
+	{complex(1.0, zero),
+		complex(inf, zero)},
+	{complex(1.0, inf),
+		complex(0, math.Pi/2)},
+	{complex(1.0, nan),
+		NaN()},
+	{complex(inf, 1.0),
+		complex(zero, math.Pi/2)},
+	{complex(inf, inf),
+		complex(zero, math.Pi/2)},
+	{complex(inf, nan),
+		complex(0, nan)},
+	{complex(nan, 1.0),
+		NaN()},
+	{complex(nan, inf),
+		complex(zero, math.Pi/2)}, // sign of real part not specified.
+	{NaN(),
+		NaN()},
+}
+var vcConjSC = []complex128{
+	NaN(),
+}
+var conjSC = []complex128{
+	NaN(),
+}
+var cosSC = []struct {
+	in,
+	want complex128
+}{
+	// Derived from Cos(z) = Cosh(i * z), G.6 #7
+	{complex(zero, zero),
+		complex(1.0, -zero)},
+	{complex(zero, inf),
+		complex(inf, -zero)},
+	{complex(zero, nan),
+		complex(nan, zero)}, // imaginary sign unspecified
+	{complex(1.0, inf),
+		complex(inf, -inf)},
+	{complex(1.0, nan),
+		NaN()},
+	{complex(inf, zero),
+		complex(nan, -zero)},
+	{complex(inf, 1.0),
+		NaN()},
+	{complex(inf, inf),
+		complex(inf, nan)}, // real sign unspecified
+	{complex(inf, nan),
+		NaN()},
+	{complex(nan, zero),
+		complex(nan, -zero)}, // imaginary sign unspecified
+	{complex(nan, 1.0),
+		NaN()},
+	{complex(nan, inf),
+		complex(inf, nan)},
+	{NaN(),
+		NaN()},
+}
+var coshSC = []struct {
+	in,
+	want complex128
+}{
+	// G.6.2.4
+	{complex(zero, zero),
+		complex(1.0, zero)},
+	{complex(zero, inf),
+		complex(nan, zero)}, // imaginary sign unspecified
+	{complex(zero, nan),
+		complex(nan, zero)}, // imaginary sign unspecified
+	{complex(1.0, inf),
+		NaN()},
+	{complex(1.0, nan),
+		NaN()},
+	{complex(inf, zero),
+		complex(inf, zero)},
+	{complex(inf, 1.0),
+		complex(inf*math.Cos(1.0), inf*math.Sin(1.0))}, // +inf  cis(y)
+	{complex(inf, inf),
+		complex(inf, nan)}, // real sign unspecified
+	{complex(inf, nan),
+		complex(inf, nan)},
+	{complex(nan, zero),
+		complex(nan, zero)}, // imaginary sign unspecified
+	{complex(nan, 1.0),
+		NaN()},
+	{complex(nan, inf),
+		NaN()},
+	{NaN(),
+		NaN()},
+}
+var expSC = []struct {
+	in,
+	want complex128
+}{
+	// G.6.3.1
+	{complex(zero, zero),
+		complex(1.0, zero)},
+	{complex(-zero, zero),
+		complex(1.0, zero)},
+	{complex(1.0, inf),
+		NaN()},
+	{complex(1.0, nan),
+		NaN()},
+	{complex(inf, zero),
+		complex(inf, zero)},
+	{complex(-inf, 1.0),
+		complex(math.Copysign(0.0, math.Cos(1.0)), math.Copysign(0.0, math.Sin(1.0)))}, // +0 cis(y)
+	{complex(inf, 1.0),
+		complex(inf*math.Cos(1.0), inf*math.Sin(1.0))}, // +inf  cis(y)
+	{complex(-inf, inf),
+		complex(zero, zero)}, // real and imaginary sign unspecified
+	{complex(inf, inf),
+		complex(inf, nan)}, // real sign unspecified
+	{complex(-inf, nan),
+		complex(zero, zero)}, // real and imaginary sign unspecified
+	{complex(inf, nan),
+		complex(inf, nan)}, // real sign unspecified
+	{complex(nan, zero),
+		complex(nan, zero)},
+	{complex(nan, 1.0),
+		NaN()},
+	{complex(nan, inf),
+		NaN()},
+	{NaN(),
+		NaN()},
+}
+var vcIsNaNSC = []complex128{
+	complex(math.Inf(-1), math.Inf(-1)),
+	complex(math.Inf(-1), math.NaN()),
+	complex(math.NaN(), math.Inf(-1)),
+	complex(0, math.NaN()),
+	complex(math.NaN(), 0),
+	complex(math.Inf(1), math.Inf(1)),
+	complex(math.Inf(1), math.NaN()),
+	complex(math.NaN(), math.Inf(1)),
+	complex(math.NaN(), math.NaN()),
+}
+var isNaNSC = []bool{
+	false,
+	false,
+	false,
+	true,
+	true,
+	false,
+	false,
+	false,
+	true,
+}
+
+var logSC = []struct {
+	in,
+	want complex128
+}{
+	// G.6.3.2
+	{complex(zero, zero),
+		complex(-inf, zero)},
+	{complex(-zero, zero),
+		complex(-inf, math.Pi)},
+	{complex(1.0, inf),
+		complex(inf, math.Pi/2)},
+	{complex(1.0, nan),
+		NaN()},
+	{complex(-inf, 1.0),
+		complex(inf, math.Pi)},
+	{complex(inf, 1.0),
+		complex(inf, 0.0)},
+	{complex(-inf, inf),
+		complex(inf, 3*math.Pi/4)},
+	{complex(inf, inf),
+		complex(inf, math.Pi/4)},
+	{complex(-inf, nan),
+		complex(inf, nan)},
+	{complex(inf, nan),
+		complex(inf, nan)},
+	{complex(nan, 1.0),
+		NaN()},
+	{complex(nan, inf),
+		complex(inf, nan)},
+	{NaN(),
+		NaN()},
+}
+var log10SC = []struct {
+	in,
+	want complex128
+}{
+	// derived from Log special cases via Log10(x) = math.Log10E*Log(x)
+	{complex(zero, zero),
+		complex(-inf, zero)},
+	{complex(-zero, zero),
+		complex(-inf, float64(math.Log10E)*float64(math.Pi))},
+	{complex(1.0, inf),
+		complex(inf, float64(math.Log10E)*float64(math.Pi/2))},
+	{complex(1.0, nan),
+		NaN()},
+	{complex(-inf, 1.0),
+		complex(inf, float64(math.Log10E)*float64(math.Pi))},
+	{complex(inf, 1.0),
+		complex(inf, 0.0)},
+	{complex(-inf, inf),
+		complex(inf, float64(math.Log10E)*float64(3*math.Pi/4))},
+	{complex(inf, inf),
+		complex(inf, float64(math.Log10E)*float64(math.Pi/4))},
+	{complex(-inf, nan),
+		complex(inf, nan)},
+	{complex(inf, nan),
+		complex(inf, nan)},
+	{complex(nan, 1.0),
+		NaN()},
+	{complex(nan, inf),
+		complex(inf, nan)},
+	{NaN(),
+		NaN()},
+}
+var vcPolarSC = []complex128{
+	NaN(),
+}
+var polarSC = []ff{
+	{math.NaN(), math.NaN()},
+}
+var vcPowSC = [][2]complex128{
+	{NaN(), NaN()},
+	{0, NaN()},
+}
+var powSC = []complex128{
+	NaN(),
+	NaN(),
+}
+var sinSC = []struct {
+	in,
+	want complex128
+}{
+	// Derived from Sin(z) = -i * Sinh(i * z), G.6 #7
+	{complex(zero, zero),
+		complex(zero, zero)},
+	{complex(zero, inf),
+		complex(zero, inf)},
+	{complex(zero, nan),
+		complex(zero, nan)},
+	{complex(1.0, inf),
+		complex(inf, inf)},
+	{complex(1.0, nan),
+		NaN()},
+	{complex(inf, zero),
+		complex(nan, zero)},
+	{complex(inf, 1.0),
+		NaN()},
+	{complex(inf, inf),
+		complex(nan, inf)},
+	{complex(inf, nan),
+		NaN()},
+	{complex(nan, zero),
+		complex(nan, zero)},
+	{complex(nan, 1.0),
+		NaN()},
+	{complex(nan, inf),
+		complex(nan, inf)},
+	{NaN(),
+		NaN()},
+}
+
+var sinhSC = []struct {
+	in,
+	want complex128
+}{
+	// G.6.2.5
+	{complex(zero, zero),
+		complex(zero, zero)},
+	{complex(zero, inf),
+		complex(zero, nan)}, // real sign unspecified
+	{complex(zero, nan),
+		complex(zero, nan)}, // real sign unspecified
+	{complex(1.0, inf),
+		NaN()},
+	{complex(1.0, nan),
+		NaN()},
+	{complex(inf, zero),
+		complex(inf, zero)},
+	{complex(inf, 1.0),
+		complex(inf*math.Cos(1.0), inf*math.Sin(1.0))}, // +inf  cis(y)
+	{complex(inf, inf),
+		complex(inf, nan)}, // real sign unspecified
+	{complex(inf, nan),
+		complex(inf, nan)}, // real sign unspecified
+	{complex(nan, zero),
+		complex(nan, zero)},
+	{complex(nan, 1.0),
+		NaN()},
+	{complex(nan, inf),
+		NaN()},
+	{NaN(),
+		NaN()},
+}
+
+var sqrtSC = []struct {
+	in,
+	want complex128
+}{
+	// G.6.4.2
+	{complex(zero, zero),
+		complex(zero, zero)},
+	{complex(-zero, zero),
+		complex(zero, zero)},
+	{complex(1.0, inf),
+		complex(inf, inf)},
+	{complex(nan, inf),
+		complex(inf, inf)},
+	{complex(1.0, nan),
+		NaN()},
+	{complex(-inf, 1.0),
+		complex(zero, inf)},
+	{complex(inf, 1.0),
+		complex(inf, zero)},
+	{complex(-inf, nan),
+		complex(nan, inf)}, // imaginary sign unspecified
+	{complex(inf, nan),
+		complex(inf, nan)},
+	{complex(nan, 1.0),
+		NaN()},
+	{NaN(),
+		NaN()},
+}
+var tanSC = []struct {
+	in,
+	want complex128
+}{
+	// Derived from Tan(z) = -i * Tanh(i * z), G.6 #7
+	{complex(zero, zero),
+		complex(zero, zero)},
+	{complex(zero, nan),
+		complex(zero, nan)},
+	{complex(1.0, inf),
+		complex(zero, 1.0)},
+	{complex(1.0, nan),
+		NaN()},
+	{complex(inf, 1.0),
+		NaN()},
+	{complex(inf, inf),
+		complex(zero, 1.0)},
+	{complex(inf, nan),
+		NaN()},
+	{complex(nan, zero),
+		NaN()},
+	{complex(nan, 1.0),
+		NaN()},
+	{complex(nan, inf),
+		complex(zero, 1.0)},
+	{NaN(),
+		NaN()},
+}
+var tanhSC = []struct {
+	in,
+	want complex128
+}{
+	// G.6.2.6
+	{complex(zero, zero),
+		complex(zero, zero)},
+	{complex(1.0, inf),
+		NaN()},
+	{complex(1.0, nan),
+		NaN()},
+	{complex(inf, 1.0),
+		complex(1.0, math.Copysign(0.0, math.Sin(2*1.0)))}, // 1 + i 0 sin(2y)
+	{complex(inf, inf),
+		complex(1.0, zero)}, // imaginary sign unspecified
+	{complex(inf, nan),
+		complex(1.0, zero)}, // imaginary sign unspecified
+	{complex(nan, zero),
+		complex(nan, zero)},
+	{complex(nan, 1.0),
+		NaN()},
+	{complex(nan, inf),
+		NaN()},
+	{NaN(),
+		NaN()},
+}
+
+// branch cut continuity checks
+// points on each axis at |z| > 1 are checked for one-sided continuity from both the positive and negative side
+// all possible branch cuts for the elementary functions are at one of these points
+
+var zero = 0.0
+var eps = 1.0 / (1 << 53)
+
+var branchPoints = [][2]complex128{
+	{complex(2.0, zero), complex(2.0, eps)},
+	{complex(2.0, -zero), complex(2.0, -eps)},
+	{complex(-2.0, zero), complex(-2.0, eps)},
+	{complex(-2.0, -zero), complex(-2.0, -eps)},
+	{complex(zero, 2.0), complex(eps, 2.0)},
+	{complex(-zero, 2.0), complex(-eps, 2.0)},
+	{complex(zero, -2.0), complex(eps, -2.0)},
+	{complex(-zero, -2.0), complex(-eps, -2.0)},
+}
+
+// functions borrowed from pkg/math/all_test.go
+func tolerance(a, b, e float64) bool {
+	d := a - b
+	if d < 0 {
+		d = -d
+	}
+
+	// note: b is correct (expected) value, a is actual value.
+	// make error tolerance a fraction of b, not a.
+	if b != 0 {
+		e = e * b
+		if e < 0 {
+			e = -e
+		}
+	}
+	return d < e
+}
+func veryclose(a, b float64) bool { return tolerance(a, b, 4e-16) }
+func alike(a, b float64) bool {
+	switch {
+	case a != a && b != b: // math.IsNaN(a) && math.IsNaN(b):
+		return true
+	case a == b:
+		return math.Signbit(a) == math.Signbit(b)
+	}
+	return false
+}
+
+func cTolerance(a, b complex128, e float64) bool {
+	d := Abs(a - b)
+	if b != 0 {
+		e = e * Abs(b)
+		if e < 0 {
+			e = -e
+		}
+	}
+	return d < e
+}
+func cSoclose(a, b complex128, e float64) bool { return cTolerance(a, b, e) }
+func cVeryclose(a, b complex128) bool          { return cTolerance(a, b, 4e-16) }
+func cAlike(a, b complex128) bool {
+	var realAlike, imagAlike bool
+	if isExact(real(b)) {
+		realAlike = alike(real(a), real(b))
+	} else {
+		// Allow non-exact special cases to have errors in ULP.
+		realAlike = veryclose(real(a), real(b))
+	}
+	if isExact(imag(b)) {
+		imagAlike = alike(imag(a), imag(b))
+	} else {
+		// Allow non-exact special cases to have errors in ULP.
+		imagAlike = veryclose(imag(a), imag(b))
+	}
+	return realAlike && imagAlike
+}
+func isExact(x float64) bool {
+	// Special cases that should match exactly.  Other cases are multiples
+	// of Pi that may not be last bit identical on all platforms.
+	return math.IsNaN(x) || math.IsInf(x, 0) || x == 0 || x == 1 || x == -1
+}
+
+func TestAbs(t *testing.T) {
+	for i := 0; i < len(vc); i++ {
+		if f := Abs(vc[i]); !veryclose(abs[i], f) {
+			t.Errorf("Abs(%g) = %g, want %g", vc[i], f, abs[i])
+		}
+	}
+	for i := 0; i < len(vcAbsSC); i++ {
+		if f := Abs(vcAbsSC[i]); !alike(absSC[i], f) {
+			t.Errorf("Abs(%g) = %g, want %g", vcAbsSC[i], f, absSC[i])
+		}
+	}
+}
+func TestAcos(t *testing.T) {
+	for i := 0; i < len(vc); i++ {
+		if f := Acos(vc[i]); !cSoclose(acos[i], f, 1e-14) {
+			t.Errorf("Acos(%g) = %g, want %g", vc[i], f, acos[i])
+		}
+	}
+	for _, v := range acosSC {
+		if f := Acos(v.in); !cAlike(v.want, f) {
+			t.Errorf("Acos(%g) = %g, want %g", v.in, f, v.want)
+		}
+		if math.IsNaN(imag(v.in)) || math.IsNaN(imag(v.want)) {
+			// Negating NaN is undefined with regard to the sign bit produced.
+			continue
+		}
+		// Acos(Conj(z))  == Conj(Acos(z))
+		if f := Acos(Conj(v.in)); !cAlike(Conj(v.want), f) && !cAlike(v.in, Conj(v.in)) {
+			t.Errorf("Acos(%g) = %g, want %g", Conj(v.in), f, Conj(v.want))
+		}
+	}
+	for _, pt := range branchPoints {
+		if f0, f1 := Acos(pt[0]), Acos(pt[1]); !cVeryclose(f0, f1) {
+			t.Errorf("Acos(%g) not continuous, got %g want %g", pt[0], f0, f1)
+		}
+	}
+}
+func TestAcosh(t *testing.T) {
+	for i := 0; i < len(vc); i++ {
+		if f := Acosh(vc[i]); !cSoclose(acosh[i], f, 1e-14) {
+			t.Errorf("Acosh(%g) = %g, want %g", vc[i], f, acosh[i])
+		}
+	}
+	for _, v := range acoshSC {
+		if f := Acosh(v.in); !cAlike(v.want, f) {
+			t.Errorf("Acosh(%g) = %g, want %g", v.in, f, v.want)
+		}
+		if math.IsNaN(imag(v.in)) || math.IsNaN(imag(v.want)) {
+			// Negating NaN is undefined with regard to the sign bit produced.
+			continue
+		}
+		// Acosh(Conj(z))  == Conj(Acosh(z))
+		if f := Acosh(Conj(v.in)); !cAlike(Conj(v.want), f) && !cAlike(v.in, Conj(v.in)) {
+			t.Errorf("Acosh(%g) = %g, want %g", Conj(v.in), f, Conj(v.want))
+		}
+
+	}
+	for _, pt := range branchPoints {
+		if f0, f1 := Acosh(pt[0]), Acosh(pt[1]); !cVeryclose(f0, f1) {
+			t.Errorf("Acosh(%g) not continuous, got %g want %g", pt[0], f0, f1)
+		}
+	}
+}
+func TestAsin(t *testing.T) {
+	for i := 0; i < len(vc); i++ {
+		if f := Asin(vc[i]); !cSoclose(asin[i], f, 1e-14) {
+			t.Errorf("Asin(%g) = %g, want %g", vc[i], f, asin[i])
+		}
+	}
+	for _, v := range asinSC {
+		if f := Asin(v.in); !cAlike(v.want, f) {
+			t.Errorf("Asin(%g) = %g, want %g", v.in, f, v.want)
+		}
+		if math.IsNaN(imag(v.in)) || math.IsNaN(imag(v.want)) {
+			// Negating NaN is undefined with regard to the sign bit produced.
+			continue
+		}
+		// Asin(Conj(z))  == Asin(Sinh(z))
+		if f := Asin(Conj(v.in)); !cAlike(Conj(v.want), f) && !cAlike(v.in, Conj(v.in)) {
+			t.Errorf("Asin(%g) = %g, want %g", Conj(v.in), f, Conj(v.want))
+		}
+		if math.IsNaN(real(v.in)) || math.IsNaN(real(v.want)) {
+			// Negating NaN is undefined with regard to the sign bit produced.
+			continue
+		}
+		// Asin(-z)  == -Asin(z)
+		if f := Asin(-v.in); !cAlike(-v.want, f) && !cAlike(v.in, -v.in) {
+			t.Errorf("Asin(%g) = %g, want %g", -v.in, f, -v.want)
+		}
+	}
+	for _, pt := range branchPoints {
+		if f0, f1 := Asin(pt[0]), Asin(pt[1]); !cVeryclose(f0, f1) {
+			t.Errorf("Asin(%g) not continuous, got %g want %g", pt[0], f0, f1)
+		}
+	}
+}
+func TestAsinh(t *testing.T) {
+	for i := 0; i < len(vc); i++ {
+		if f := Asinh(vc[i]); !cSoclose(asinh[i], f, 4e-15) {
+			t.Errorf("Asinh(%g) = %g, want %g", vc[i], f, asinh[i])
+		}
+	}
+	for _, v := range asinhSC {
+		if f := Asinh(v.in); !cAlike(v.want, f) {
+			t.Errorf("Asinh(%g) = %g, want %g", v.in, f, v.want)
+		}
+		if math.IsNaN(imag(v.in)) || math.IsNaN(imag(v.want)) {
+			// Negating NaN is undefined with regard to the sign bit produced.
+			continue
+		}
+		// Asinh(Conj(z))  == Asinh(Sinh(z))
+		if f := Asinh(Conj(v.in)); !cAlike(Conj(v.want), f) && !cAlike(v.in, Conj(v.in)) {
+			t.Errorf("Asinh(%g) = %g, want %g", Conj(v.in), f, Conj(v.want))
+		}
+		if math.IsNaN(real(v.in)) || math.IsNaN(real(v.want)) {
+			// Negating NaN is undefined with regard to the sign bit produced.
+			continue
+		}
+		// Asinh(-z)  == -Asinh(z)
+		if f := Asinh(-v.in); !cAlike(-v.want, f) && !cAlike(v.in, -v.in) {
+			t.Errorf("Asinh(%g) = %g, want %g", -v.in, f, -v.want)
+		}
+	}
+	for _, pt := range branchPoints {
+		if f0, f1 := Asinh(pt[0]), Asinh(pt[1]); !cVeryclose(f0, f1) {
+			t.Errorf("Asinh(%g) not continuous, got %g want %g", pt[0], f0, f1)
+		}
+	}
+}
+func TestAtan(t *testing.T) {
+	for i := 0; i < len(vc); i++ {
+		if f := Atan(vc[i]); !cVeryclose(atan[i], f) {
+			t.Errorf("Atan(%g) = %g, want %g", vc[i], f, atan[i])
+		}
+	}
+	for _, v := range atanSC {
+		if f := Atan(v.in); !cAlike(v.want, f) {
+			t.Errorf("Atan(%g) = %g, want %g", v.in, f, v.want)
+		}
+		if math.IsNaN(imag(v.in)) || math.IsNaN(imag(v.want)) {
+			// Negating NaN is undefined with regard to the sign bit produced.
+			continue
+		}
+		// Atan(Conj(z))  == Conj(Atan(z))
+		if f := Atan(Conj(v.in)); !cAlike(Conj(v.want), f) && !cAlike(v.in, Conj(v.in)) {
+			t.Errorf("Atan(%g) = %g, want %g", Conj(v.in), f, Conj(v.want))
+		}
+		if math.IsNaN(real(v.in)) || math.IsNaN(real(v.want)) {
+			// Negating NaN is undefined with regard to the sign bit produced.
+			continue
+		}
+		// Atan(-z)  == -Atan(z)
+		if f := Atan(-v.in); !cAlike(-v.want, f) && !cAlike(v.in, -v.in) {
+			t.Errorf("Atan(%g) = %g, want %g", -v.in, f, -v.want)
+		}
+	}
+	for _, pt := range branchPoints {
+		if f0, f1 := Atan(pt[0]), Atan(pt[1]); !cVeryclose(f0, f1) {
+			t.Errorf("Atan(%g) not continuous, got %g want %g", pt[0], f0, f1)
+		}
+	}
+}
+func TestAtanh(t *testing.T) {
+	for i := 0; i < len(vc); i++ {
+		if f := Atanh(vc[i]); !cVeryclose(atanh[i], f) {
+			t.Errorf("Atanh(%g) = %g, want %g", vc[i], f, atanh[i])
+		}
+	}
+	for _, v := range atanhSC {
+		if f := Atanh(v.in); !cAlike(v.want, f) {
+			t.Errorf("Atanh(%g) = %g, want %g", v.in, f, v.want)
+		}
+		if math.IsNaN(imag(v.in)) || math.IsNaN(imag(v.want)) {
+			// Negating NaN is undefined with regard to the sign bit produced.
+			continue
+		}
+		// Atanh(Conj(z))  == Conj(Atanh(z))
+		if f := Atanh(Conj(v.in)); !cAlike(Conj(v.want), f) && !cAlike(v.in, Conj(v.in)) {
+			t.Errorf("Atanh(%g) = %g, want %g", Conj(v.in), f, Conj(v.want))
+		}
+		if math.IsNaN(real(v.in)) || math.IsNaN(real(v.want)) {
+			// Negating NaN is undefined with regard to the sign bit produced.
+			continue
+		}
+		// Atanh(-z)  == -Atanh(z)
+		if f := Atanh(-v.in); !cAlike(-v.want, f) && !cAlike(v.in, -v.in) {
+			t.Errorf("Atanh(%g) = %g, want %g", -v.in, f, -v.want)
+		}
+	}
+	for _, pt := range branchPoints {
+		if f0, f1 := Atanh(pt[0]), Atanh(pt[1]); !cVeryclose(f0, f1) {
+			t.Errorf("Atanh(%g) not continuous, got %g want %g", pt[0], f0, f1)
+		}
+	}
+}
+func TestConj(t *testing.T) {
+	for i := 0; i < len(vc); i++ {
+		if f := Conj(vc[i]); !cVeryclose(conj[i], f) {
+			t.Errorf("Conj(%g) = %g, want %g", vc[i], f, conj[i])
+		}
+	}
+	for i := 0; i < len(vcConjSC); i++ {
+		if f := Conj(vcConjSC[i]); !cAlike(conjSC[i], f) {
+			t.Errorf("Conj(%g) = %g, want %g", vcConjSC[i], f, conjSC[i])
+		}
+	}
+}
+func TestCos(t *testing.T) {
+	for i := 0; i < len(vc); i++ {
+		if f := Cos(vc[i]); !cSoclose(cos[i], f, 3e-15) {
+			t.Errorf("Cos(%g) = %g, want %g", vc[i], f, cos[i])
+		}
+	}
+	for _, v := range cosSC {
+		if f := Cos(v.in); !cAlike(v.want, f) {
+			t.Errorf("Cos(%g) = %g, want %g", v.in, f, v.want)
+		}
+		if math.IsNaN(imag(v.in)) || math.IsNaN(imag(v.want)) {
+			// Negating NaN is undefined with regard to the sign bit produced.
+			continue
+		}
+		// Cos(Conj(z))  == Cos(Cosh(z))
+		if f := Cos(Conj(v.in)); !cAlike(Conj(v.want), f) && !cAlike(v.in, Conj(v.in)) {
+			t.Errorf("Cos(%g) = %g, want %g", Conj(v.in), f, Conj(v.want))
+		}
+		if math.IsNaN(real(v.in)) || math.IsNaN(real(v.want)) {
+			// Negating NaN is undefined with regard to the sign bit produced.
+			continue
+		}
+		// Cos(-z)  == Cos(z)
+		if f := Cos(-v.in); !cAlike(v.want, f) && !cAlike(v.in, -v.in) {
+			t.Errorf("Cos(%g) = %g, want %g", -v.in, f, v.want)
+		}
+	}
+}
+func TestCosh(t *testing.T) {
+	for i := 0; i < len(vc); i++ {
+		if f := Cosh(vc[i]); !cSoclose(cosh[i], f, 2e-15) {
+			t.Errorf("Cosh(%g) = %g, want %g", vc[i], f, cosh[i])
+		}
+	}
+	for _, v := range coshSC {
+		if f := Cosh(v.in); !cAlike(v.want, f) {
+			t.Errorf("Cosh(%g) = %g, want %g", v.in, f, v.want)
+		}
+		if math.IsNaN(imag(v.in)) || math.IsNaN(imag(v.want)) {
+			// Negating NaN is undefined with regard to the sign bit produced.
+			continue
+		}
+		// Cosh(Conj(z))  == Conj(Cosh(z))
+		if f := Cosh(Conj(v.in)); !cAlike(Conj(v.want), f) && !cAlike(v.in, Conj(v.in)) {
+			t.Errorf("Cosh(%g) = %g, want %g", Conj(v.in), f, Conj(v.want))
+		}
+		if math.IsNaN(real(v.in)) || math.IsNaN(real(v.want)) {
+			// Negating NaN is undefined with regard to the sign bit produced.
+			continue
+		}
+		// Cosh(-z)  == Cosh(z)
+		if f := Cosh(-v.in); !cAlike(v.want, f) && !cAlike(v.in, -v.in) {
+			t.Errorf("Cosh(%g) = %g, want %g", -v.in, f, v.want)
+		}
+	}
+}
+func TestExp(t *testing.T) {
+	for i := 0; i < len(vc); i++ {
+		if f := Exp(vc[i]); !cSoclose(exp[i], f, 1e-15) {
+			t.Errorf("Exp(%g) = %g, want %g", vc[i], f, exp[i])
+		}
+	}
+	for _, v := range expSC {
+		if f := Exp(v.in); !cAlike(v.want, f) {
+			t.Errorf("Exp(%g) = %g, want %g", v.in, f, v.want)
+		}
+		if math.IsNaN(imag(v.in)) || math.IsNaN(imag(v.want)) {
+			// Negating NaN is undefined with regard to the sign bit produced.
+			continue
+		}
+		// Exp(Conj(z))  == Exp(Cosh(z))
+		if f := Exp(Conj(v.in)); !cAlike(Conj(v.want), f) && !cAlike(v.in, Conj(v.in)) {
+			t.Errorf("Exp(%g) = %g, want %g", Conj(v.in), f, Conj(v.want))
+		}
+	}
+}
+func TestIsNaN(t *testing.T) {
+	for i := 0; i < len(vcIsNaNSC); i++ {
+		if f := IsNaN(vcIsNaNSC[i]); isNaNSC[i] != f {
+			t.Errorf("IsNaN(%v) = %v, want %v", vcIsNaNSC[i], f, isNaNSC[i])
+		}
+	}
+}
+func TestLog(t *testing.T) {
+	for i := 0; i < len(vc); i++ {
+		if f := Log(vc[i]); !cVeryclose(log[i], f) {
+			t.Errorf("Log(%g) = %g, want %g", vc[i], f, log[i])
+		}
+	}
+	for _, v := range logSC {
+		if f := Log(v.in); !cAlike(v.want, f) {
+			t.Errorf("Log(%g) = %g, want %g", v.in, f, v.want)
+		}
+		if math.IsNaN(imag(v.in)) || math.IsNaN(imag(v.want)) {
+			// Negating NaN is undefined with regard to the sign bit produced.
+			continue
+		}
+		// Log(Conj(z))  == Conj(Log(z))
+		if f := Log(Conj(v.in)); !cAlike(Conj(v.want), f) && !cAlike(v.in, Conj(v.in)) {
+			t.Errorf("Log(%g) = %g, want %g", Conj(v.in), f, Conj(v.want))
+		}
+	}
+	for _, pt := range branchPoints {
+		if f0, f1 := Log(pt[0]), Log(pt[1]); !cVeryclose(f0, f1) {
+			t.Errorf("Log(%g) not continuous, got %g want %g", pt[0], f0, f1)
+		}
+	}
+}
+func TestLog10(t *testing.T) {
+	for i := 0; i < len(vc); i++ {
+		if f := Log10(vc[i]); !cVeryclose(log10[i], f) {
+			t.Errorf("Log10(%g) = %g, want %g", vc[i], f, log10[i])
+		}
+	}
+	for _, v := range log10SC {
+		if f := Log10(v.in); !cAlike(v.want, f) {
+			t.Errorf("Log10(%g) = %g, want %g", v.in, f, v.want)
+		}
+		if math.IsNaN(imag(v.in)) || math.IsNaN(imag(v.want)) {
+			// Negating NaN is undefined with regard to the sign bit produced.
+			continue
+		}
+		// Log10(Conj(z))  == Conj(Log10(z))
+		if f := Log10(Conj(v.in)); !cAlike(Conj(v.want), f) && !cAlike(v.in, Conj(v.in)) {
+			t.Errorf("Log10(%g) = %g, want %g", Conj(v.in), f, Conj(v.want))
+		}
+	}
+}
+func TestPolar(t *testing.T) {
+	for i := 0; i < len(vc); i++ {
+		if r, theta := Polar(vc[i]); !veryclose(polar[i].r, r) && !veryclose(polar[i].theta, theta) {
+			t.Errorf("Polar(%g) = %g, %g want %g, %g", vc[i], r, theta, polar[i].r, polar[i].theta)
+		}
+	}
+	for i := 0; i < len(vcPolarSC); i++ {
+		if r, theta := Polar(vcPolarSC[i]); !alike(polarSC[i].r, r) && !alike(polarSC[i].theta, theta) {
+			t.Errorf("Polar(%g) = %g, %g, want %g, %g", vcPolarSC[i], r, theta, polarSC[i].r, polarSC[i].theta)
+		}
+	}
+}
+func TestPow(t *testing.T) {
+	// Special cases for Pow(0, c).
+	var zero = complex(0, 0)
+	zeroPowers := [][2]complex128{
+		{0, 1 + 0i},
+		{1.5, 0 + 0i},
+		{-1.5, complex(math.Inf(0), 0)},
+		{-1.5 + 1.5i, Inf()},
+	}
+	for _, zp := range zeroPowers {
+		if f := Pow(zero, zp[0]); f != zp[1] {
+			t.Errorf("Pow(%g, %g) = %g, want %g", zero, zp[0], f, zp[1])
+		}
+	}
+	var a = complex(3.0, 3.0)
+	for i := 0; i < len(vc); i++ {
+		if f := Pow(a, vc[i]); !cSoclose(pow[i], f, 4e-15) {
+			t.Errorf("Pow(%g, %g) = %g, want %g", a, vc[i], f, pow[i])
+		}
+	}
+	for i := 0; i < len(vcPowSC); i++ {
+		if f := Pow(vcPowSC[i][0], vcPowSC[i][1]); !cAlike(powSC[i], f) {
+			t.Errorf("Pow(%g, %g) = %g, want %g", vcPowSC[i][0], vcPowSC[i][1], f, powSC[i])
+		}
+	}
+	for _, pt := range branchPoints {
+		if f0, f1 := Pow(pt[0], 0.1), Pow(pt[1], 0.1); !cVeryclose(f0, f1) {
+			t.Errorf("Pow(%g, 0.1) not continuous, got %g want %g", pt[0], f0, f1)
+		}
+	}
+}
+func TestRect(t *testing.T) {
+	for i := 0; i < len(vc); i++ {
+		if f := Rect(polar[i].r, polar[i].theta); !cVeryclose(vc[i], f) {
+			t.Errorf("Rect(%g, %g) = %g want %g", polar[i].r, polar[i].theta, f, vc[i])
+		}
+	}
+	for i := 0; i < len(vcPolarSC); i++ {
+		if f := Rect(polarSC[i].r, polarSC[i].theta); !cAlike(vcPolarSC[i], f) {
+			t.Errorf("Rect(%g, %g) = %g, want %g", polarSC[i].r, polarSC[i].theta, f, vcPolarSC[i])
+		}
+	}
+}
+func TestSin(t *testing.T) {
+	for i := 0; i < len(vc); i++ {
+		if f := Sin(vc[i]); !cSoclose(sin[i], f, 2e-15) {
+			t.Errorf("Sin(%g) = %g, want %g", vc[i], f, sin[i])
+		}
+	}
+	for _, v := range sinSC {
+		if f := Sin(v.in); !cAlike(v.want, f) {
+			t.Errorf("Sin(%g) = %g, want %g", v.in, f, v.want)
+		}
+		if math.IsNaN(imag(v.in)) || math.IsNaN(imag(v.want)) {
+			// Negating NaN is undefined with regard to the sign bit produced.
+			continue
+		}
+		// Sin(Conj(z))  == Conj(Sin(z))
+		if f := Sin(Conj(v.in)); !cAlike(Conj(v.want), f) && !cAlike(v.in, Conj(v.in)) {
+			t.Errorf("Sinh(%g) = %g, want %g", Conj(v.in), f, Conj(v.want))
+		}
+		if math.IsNaN(real(v.in)) || math.IsNaN(real(v.want)) {
+			// Negating NaN is undefined with regard to the sign bit produced.
+			continue
+		}
+		// Sin(-z)  == -Sin(z)
+		if f := Sin(-v.in); !cAlike(-v.want, f) && !cAlike(v.in, -v.in) {
+			t.Errorf("Sinh(%g) = %g, want %g", -v.in, f, -v.want)
+		}
+	}
+}
+func TestSinh(t *testing.T) {
+	for i := 0; i < len(vc); i++ {
+		if f := Sinh(vc[i]); !cSoclose(sinh[i], f, 2e-15) {
+			t.Errorf("Sinh(%g) = %g, want %g", vc[i], f, sinh[i])
+		}
+	}
+	for _, v := range sinhSC {
+		if f := Sinh(v.in); !cAlike(v.want, f) {
+			t.Errorf("Sinh(%g) = %g, want %g", v.in, f, v.want)
+		}
+		if math.IsNaN(imag(v.in)) || math.IsNaN(imag(v.want)) {
+			// Negating NaN is undefined with regard to the sign bit produced.
+			continue
+		}
+		// Sinh(Conj(z))  == Conj(Sinh(z))
+		if f := Sinh(Conj(v.in)); !cAlike(Conj(v.want), f) && !cAlike(v.in, Conj(v.in)) {
+			t.Errorf("Sinh(%g) = %g, want %g", Conj(v.in), f, Conj(v.want))
+		}
+		if math.IsNaN(real(v.in)) || math.IsNaN(real(v.want)) {
+			// Negating NaN is undefined with regard to the sign bit produced.
+			continue
+		}
+		// Sinh(-z)  == -Sinh(z)
+		if f := Sinh(-v.in); !cAlike(-v.want, f) && !cAlike(v.in, -v.in) {
+			t.Errorf("Sinh(%g) = %g, want %g", -v.in, f, -v.want)
+		}
+	}
+}
+func TestSqrt(t *testing.T) {
+	for i := 0; i < len(vc); i++ {
+		if f := Sqrt(vc[i]); !cVeryclose(sqrt[i], f) {
+			t.Errorf("Sqrt(%g) = %g, want %g", vc[i], f, sqrt[i])
+		}
+	}
+	for _, v := range sqrtSC {
+		if f := Sqrt(v.in); !cAlike(v.want, f) {
+			t.Errorf("Sqrt(%g) = %g, want %g", v.in, f, v.want)
+		}
+		if math.IsNaN(imag(v.in)) || math.IsNaN(imag(v.want)) {
+			// Negating NaN is undefined with regard to the sign bit produced.
+			continue
+		}
+		// Sqrt(Conj(z)) == Conj(Sqrt(z))
+		if f := Sqrt(Conj(v.in)); !cAlike(Conj(v.want), f) && !cAlike(v.in, Conj(v.in)) {
+			t.Errorf("Sqrt(%g) = %g, want %g", Conj(v.in), f, Conj(v.want))
+		}
+	}
+	for _, pt := range branchPoints {
+		if f0, f1 := Sqrt(pt[0]), Sqrt(pt[1]); !cVeryclose(f0, f1) {
+			t.Errorf("Sqrt(%g) not continuous, got %g want %g", pt[0], f0, f1)
+		}
+	}
+}
+func TestTan(t *testing.T) {
+	for i := 0; i < len(vc); i++ {
+		if f := Tan(vc[i]); !cSoclose(tan[i], f, 3e-15) {
+			t.Errorf("Tan(%g) = %g, want %g", vc[i], f, tan[i])
+		}
+	}
+	for _, v := range tanSC {
+		if f := Tan(v.in); !cAlike(v.want, f) {
+			t.Errorf("Tan(%g) = %g, want %g", v.in, f, v.want)
+		}
+		if math.IsNaN(imag(v.in)) || math.IsNaN(imag(v.want)) {
+			// Negating NaN is undefined with regard to the sign bit produced.
+			continue
+		}
+		// Tan(Conj(z))  == Conj(Tan(z))
+		if f := Tan(Conj(v.in)); !cAlike(Conj(v.want), f) && !cAlike(v.in, Conj(v.in)) {
+			t.Errorf("Tan(%g) = %g, want %g", Conj(v.in), f, Conj(v.want))
+		}
+		if math.IsNaN(real(v.in)) || math.IsNaN(real(v.want)) {
+			// Negating NaN is undefined with regard to the sign bit produced.
+			continue
+		}
+		// Tan(-z)  == -Tan(z)
+		if f := Tan(-v.in); !cAlike(-v.want, f) && !cAlike(v.in, -v.in) {
+			t.Errorf("Tan(%g) = %g, want %g", -v.in, f, -v.want)
+		}
+	}
+}
+func TestTanh(t *testing.T) {
+	for i := 0; i < len(vc); i++ {
+		if f := Tanh(vc[i]); !cSoclose(tanh[i], f, 2e-15) {
+			t.Errorf("Tanh(%g) = %g, want %g", vc[i], f, tanh[i])
+		}
+	}
+	for _, v := range tanhSC {
+		if f := Tanh(v.in); !cAlike(v.want, f) {
+			t.Errorf("Tanh(%g) = %g, want %g", v.in, f, v.want)
+		}
+		if math.IsNaN(imag(v.in)) || math.IsNaN(imag(v.want)) {
+			// Negating NaN is undefined with regard to the sign bit produced.
+			continue
+		}
+		// Tanh(Conj(z))  == Conj(Tanh(z))
+		if f := Tanh(Conj(v.in)); !cAlike(Conj(v.want), f) && !cAlike(v.in, Conj(v.in)) {
+			t.Errorf("Tanh(%g) = %g, want %g", Conj(v.in), f, Conj(v.want))
+		}
+		if math.IsNaN(real(v.in)) || math.IsNaN(real(v.want)) {
+			// Negating NaN is undefined with regard to the sign bit produced.
+			continue
+		}
+		// Tanh(-z)  == -Tanh(z)
+		if f := Tanh(-v.in); !cAlike(-v.want, f) && !cAlike(v.in, -v.in) {
+			t.Errorf("Tanh(%g) = %g, want %g", -v.in, f, -v.want)
+		}
+	}
+}
+
+// See issue 17577
+func TestInfiniteLoopIntanSeries(t *testing.T) {
+	want := Inf()
+	if got := Cot(0); got != want {
+		t.Errorf("Cot(0): got %g, want %g", got, want)
+	}
+}
+
+func BenchmarkAbs(b *testing.B) {
+	for i := 0; i < b.N; i++ {
+		Abs(complex(2.5, 3.5))
+	}
+}
+func BenchmarkAcos(b *testing.B) {
+	for i := 0; i < b.N; i++ {
+		Acos(complex(2.5, 3.5))
+	}
+}
+func BenchmarkAcosh(b *testing.B) {
+	for i := 0; i < b.N; i++ {
+		Acosh(complex(2.5, 3.5))
+	}
+}
+func BenchmarkAsin(b *testing.B) {
+	for i := 0; i < b.N; i++ {
+		Asin(complex(2.5, 3.5))
+	}
+}
+func BenchmarkAsinh(b *testing.B) {
+	for i := 0; i < b.N; i++ {
+		Asinh(complex(2.5, 3.5))
+	}
+}
+func BenchmarkAtan(b *testing.B) {
+	for i := 0; i < b.N; i++ {
+		Atan(complex(2.5, 3.5))
+	}
+}
+func BenchmarkAtanh(b *testing.B) {
+	for i := 0; i < b.N; i++ {
+		Atanh(complex(2.5, 3.5))
+	}
+}
+func BenchmarkConj(b *testing.B) {
+	for i := 0; i < b.N; i++ {
+		Conj(complex(2.5, 3.5))
+	}
+}
+func BenchmarkCos(b *testing.B) {
+	for i := 0; i < b.N; i++ {
+		Cos(complex(2.5, 3.5))
+	}
+}
+func BenchmarkCosh(b *testing.B) {
+	for i := 0; i < b.N; i++ {
+		Cosh(complex(2.5, 3.5))
+	}
+}
+func BenchmarkExp(b *testing.B) {
+	for i := 0; i < b.N; i++ {
+		Exp(complex(2.5, 3.5))
+	}
+}
+func BenchmarkLog(b *testing.B) {
+	for i := 0; i < b.N; i++ {
+		Log(complex(2.5, 3.5))
+	}
+}
+func BenchmarkLog10(b *testing.B) {
+	for i := 0; i < b.N; i++ {
+		Log10(complex(2.5, 3.5))
+	}
+}
+func BenchmarkPhase(b *testing.B) {
+	for i := 0; i < b.N; i++ {
+		Phase(complex(2.5, 3.5))
+	}
+}
+func BenchmarkPolar(b *testing.B) {
+	for i := 0; i < b.N; i++ {
+		Polar(complex(2.5, 3.5))
+	}
+}
+func BenchmarkPow(b *testing.B) {
+	for i := 0; i < b.N; i++ {
+		Pow(complex(2.5, 3.5), complex(2.5, 3.5))
+	}
+}
+func BenchmarkRect(b *testing.B) {
+	for i := 0; i < b.N; i++ {
+		Rect(2.5, 1.5)
+	}
+}
+func BenchmarkSin(b *testing.B) {
+	for i := 0; i < b.N; i++ {
+		Sin(complex(2.5, 3.5))
+	}
+}
+func BenchmarkSinh(b *testing.B) {
+	for i := 0; i < b.N; i++ {
+		Sinh(complex(2.5, 3.5))
+	}
+}
+func BenchmarkSqrt(b *testing.B) {
+	for i := 0; i < b.N; i++ {
+		Sqrt(complex(2.5, 3.5))
+	}
+}
+func BenchmarkTan(b *testing.B) {
+	for i := 0; i < b.N; i++ {
+		Tan(complex(2.5, 3.5))
+	}
+}
+func BenchmarkTanh(b *testing.B) {
+	for i := 0; i < b.N; i++ {
+		Tanh(complex(2.5, 3.5))
+	}
+}
diff --git a/src/math/cmplx/conj.go b/src/math/cmplx/conj.go
new file mode 100644
index 0000000..34a4277
--- /dev/null
+++ b/src/math/cmplx/conj.go
@@ -0,0 +1,8 @@
+// Copyright 2010 The Go Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+package cmplx
+
+// Conj returns the complex conjugate of x.
+func Conj(x complex128) complex128 { return complex(real(x), -imag(x)) }
diff --git a/src/math/cmplx/example_test.go b/src/math/cmplx/example_test.go
new file mode 100644
index 0000000..f0ed963
--- /dev/null
+++ b/src/math/cmplx/example_test.go
@@ -0,0 +1,28 @@
+// Copyright 2016 The Go Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+package cmplx_test
+
+import (
+	"fmt"
+	"math"
+	"math/cmplx"
+)
+
+func ExampleAbs() {
+	fmt.Printf("%.1f", cmplx.Abs(3+4i))
+	// Output: 5.0
+}
+
+// ExampleExp computes Euler's identity.
+func ExampleExp() {
+	fmt.Printf("%.1f", cmplx.Exp(1i*math.Pi)+1)
+	// Output: (0.0+0.0i)
+}
+
+func ExamplePolar() {
+	r, theta := cmplx.Polar(2i)
+	fmt.Printf("r: %.1f, θ: %.1f*π", r, theta/math.Pi)
+	// Output: r: 2.0, θ: 0.5*π
+}
diff --git a/src/math/cmplx/exp.go b/src/math/cmplx/exp.go
new file mode 100644
index 0000000..d5d0a5d
--- /dev/null
+++ b/src/math/cmplx/exp.go
@@ -0,0 +1,72 @@
+// Copyright 2010 The Go Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+package cmplx
+
+import "math"
+
+// The original C code, the long comment, and the constants
+// below are from http://netlib.sandia.gov/cephes/c9x-complex/clog.c.
+// The go code is a simplified version of the original C.
+//
+// Cephes Math Library Release 2.8:  June, 2000
+// Copyright 1984, 1987, 1989, 1992, 2000 by Stephen L. Moshier
+//
+// The readme file at http://netlib.sandia.gov/cephes/ says:
+//    Some software in this archive may be from the book _Methods and
+// Programs for Mathematical Functions_ (Prentice-Hall or Simon & Schuster
+// International, 1989) or from the Cephes Mathematical Library, a
+// commercial product. In either event, it is copyrighted by the author.
+// What you see here may be used freely but it comes with no support or
+// guarantee.
+//
+//   The two known misprints in the book are repaired here in the
+// source listings for the gamma function and the incomplete beta
+// integral.
+//
+//   Stephen L. Moshier
+//   moshier@na-net.ornl.gov
+
+// Complex exponential function
+//
+// DESCRIPTION:
+//
+// Returns the complex exponential of the complex argument z.
+//
+// If
+//     z = x + iy,
+//     r = exp(x),
+// then
+//     w = r cos y + i r sin y.
+//
+// ACCURACY:
+//
+//                      Relative error:
+// arithmetic   domain     # trials      peak         rms
+//    DEC       -10,+10      8700       3.7e-17     1.1e-17
+//    IEEE      -10,+10     30000       3.0e-16     8.7e-17
+
+// Exp returns e**x, the base-e exponential of x.
+func Exp(x complex128) complex128 {
+	switch re, im := real(x), imag(x); {
+	case math.IsInf(re, 0):
+		switch {
+		case re > 0 && im == 0:
+			return x
+		case math.IsInf(im, 0) || math.IsNaN(im):
+			if re < 0 {
+				return complex(0, math.Copysign(0, im))
+			} else {
+				return complex(math.Inf(1.0), math.NaN())
+			}
+		}
+	case math.IsNaN(re):
+		if im == 0 {
+			return complex(math.NaN(), im)
+		}
+	}
+	r := math.Exp(real(x))
+	s, c := math.Sincos(imag(x))
+	return complex(r*c, r*s)
+}
diff --git a/src/math/cmplx/huge_test.go b/src/math/cmplx/huge_test.go
new file mode 100644
index 0000000..e794cf2
--- /dev/null
+++ b/src/math/cmplx/huge_test.go
@@ -0,0 +1,22 @@
+// Copyright 2020 The Go Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+// Disabled for s390x because it uses assembly routines that are not
+// accurate for huge arguments.
+
+//go:build !s390x
+
+package cmplx
+
+import (
+	"testing"
+)
+
+func TestTanHuge(t *testing.T) {
+	for i, x := range hugeIn {
+		if f := Tan(x); !cSoclose(tanHuge[i], f, 3e-15) {
+			t.Errorf("Tan(%g) = %g, want %g", x, f, tanHuge[i])
+		}
+	}
+}
diff --git a/src/math/cmplx/isinf.go b/src/math/cmplx/isinf.go
new file mode 100644
index 0000000..6273cd3
--- /dev/null
+++ b/src/math/cmplx/isinf.go
@@ -0,0 +1,21 @@
+// Copyright 2010 The Go Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+package cmplx
+
+import "math"
+
+// IsInf reports whether either real(x) or imag(x) is an infinity.
+func IsInf(x complex128) bool {
+	if math.IsInf(real(x), 0) || math.IsInf(imag(x), 0) {
+		return true
+	}
+	return false
+}
+
+// Inf returns a complex infinity, complex(+Inf, +Inf).
+func Inf() complex128 {
+	inf := math.Inf(1)
+	return complex(inf, inf)
+}
diff --git a/src/math/cmplx/isnan.go b/src/math/cmplx/isnan.go
new file mode 100644
index 0000000..fed442c
--- /dev/null
+++ b/src/math/cmplx/isnan.go
@@ -0,0 +1,25 @@
+// Copyright 2010 The Go Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+package cmplx
+
+import "math"
+
+// IsNaN reports whether either real(x) or imag(x) is NaN
+// and neither is an infinity.
+func IsNaN(x complex128) bool {
+	switch {
+	case math.IsInf(real(x), 0) || math.IsInf(imag(x), 0):
+		return false
+	case math.IsNaN(real(x)) || math.IsNaN(imag(x)):
+		return true
+	}
+	return false
+}
+
+// NaN returns a complex “not-a-number” value.
+func NaN() complex128 {
+	nan := math.NaN()
+	return complex(nan, nan)
+}
diff --git a/src/math/cmplx/log.go b/src/math/cmplx/log.go
new file mode 100644
index 0000000..fd39c76
--- /dev/null
+++ b/src/math/cmplx/log.go
@@ -0,0 +1,65 @@
+// Copyright 2010 The Go Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+package cmplx
+
+import "math"
+
+// The original C code, the long comment, and the constants
+// below are from http://netlib.sandia.gov/cephes/c9x-complex/clog.c.
+// The go code is a simplified version of the original C.
+//
+// Cephes Math Library Release 2.8:  June, 2000
+// Copyright 1984, 1987, 1989, 1992, 2000 by Stephen L. Moshier
+//
+// The readme file at http://netlib.sandia.gov/cephes/ says:
+//    Some software in this archive may be from the book _Methods and
+// Programs for Mathematical Functions_ (Prentice-Hall or Simon & Schuster
+// International, 1989) or from the Cephes Mathematical Library, a
+// commercial product. In either event, it is copyrighted by the author.
+// What you see here may be used freely but it comes with no support or
+// guarantee.
+//
+//   The two known misprints in the book are repaired here in the
+// source listings for the gamma function and the incomplete beta
+// integral.
+//
+//   Stephen L. Moshier
+//   moshier@na-net.ornl.gov
+
+// Complex natural logarithm
+//
+// DESCRIPTION:
+//
+// Returns complex logarithm to the base e (2.718...) of
+// the complex argument z.
+//
+// If
+//       z = x + iy, r = sqrt( x**2 + y**2 ),
+// then
+//       w = log(r) + i arctan(y/x).
+//
+// The arctangent ranges from -PI to +PI.
+//
+// ACCURACY:
+//
+//                      Relative error:
+// arithmetic   domain     # trials      peak         rms
+//    DEC       -10,+10      7000       8.5e-17     1.9e-17
+//    IEEE      -10,+10     30000       5.0e-15     1.1e-16
+//
+// Larger relative error can be observed for z near 1 +i0.
+// In IEEE arithmetic the peak absolute error is 5.2e-16, rms
+// absolute error 1.0e-16.
+
+// Log returns the natural logarithm of x.
+func Log(x complex128) complex128 {
+	return complex(math.Log(Abs(x)), Phase(x))
+}
+
+// Log10 returns the decimal logarithm of x.
+func Log10(x complex128) complex128 {
+	z := Log(x)
+	return complex(math.Log10E*real(z), math.Log10E*imag(z))
+}
diff --git a/src/math/cmplx/phase.go b/src/math/cmplx/phase.go
new file mode 100644
index 0000000..03cece8
--- /dev/null
+++ b/src/math/cmplx/phase.go
@@ -0,0 +1,11 @@
+// Copyright 2010 The Go Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+package cmplx
+
+import "math"
+
+// Phase returns the phase (also called the argument) of x.
+// The returned value is in the range [-Pi, Pi].
+func Phase(x complex128) float64 { return math.Atan2(imag(x), real(x)) }
diff --git a/src/math/cmplx/polar.go b/src/math/cmplx/polar.go
new file mode 100644
index 0000000..9b192bc
--- /dev/null
+++ b/src/math/cmplx/polar.go
@@ -0,0 +1,12 @@
+// Copyright 2010 The Go Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+package cmplx
+
+// Polar returns the absolute value r and phase θ of x,
+// such that x = r * e**θi.
+// The phase is in the range [-Pi, Pi].
+func Polar(x complex128) (r, θ float64) {
+	return Abs(x), Phase(x)
+}
diff --git a/src/math/cmplx/pow.go b/src/math/cmplx/pow.go
new file mode 100644
index 0000000..666bba2
--- /dev/null
+++ b/src/math/cmplx/pow.go
@@ -0,0 +1,82 @@
+// Copyright 2010 The Go Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+package cmplx
+
+import "math"
+
+// The original C code, the long comment, and the constants
+// below are from http://netlib.sandia.gov/cephes/c9x-complex/clog.c.
+// The go code is a simplified version of the original C.
+//
+// Cephes Math Library Release 2.8:  June, 2000
+// Copyright 1984, 1987, 1989, 1992, 2000 by Stephen L. Moshier
+//
+// The readme file at http://netlib.sandia.gov/cephes/ says:
+//    Some software in this archive may be from the book _Methods and
+// Programs for Mathematical Functions_ (Prentice-Hall or Simon & Schuster
+// International, 1989) or from the Cephes Mathematical Library, a
+// commercial product. In either event, it is copyrighted by the author.
+// What you see here may be used freely but it comes with no support or
+// guarantee.
+//
+//   The two known misprints in the book are repaired here in the
+// source listings for the gamma function and the incomplete beta
+// integral.
+//
+//   Stephen L. Moshier
+//   moshier@na-net.ornl.gov
+
+// Complex power function
+//
+// DESCRIPTION:
+//
+// Raises complex A to the complex Zth power.
+// Definition is per AMS55 # 4.2.8,
+// analytically equivalent to cpow(a,z) = cexp(z clog(a)).
+//
+// ACCURACY:
+//
+//                      Relative error:
+// arithmetic   domain     # trials      peak         rms
+//    IEEE      -10,+10     30000       9.4e-15     1.5e-15
+
+// Pow returns x**y, the base-x exponential of y.
+// For generalized compatibility with math.Pow:
+//
+//	Pow(0, ±0) returns 1+0i
+//	Pow(0, c) for real(c)<0 returns Inf+0i if imag(c) is zero, otherwise Inf+Inf i.
+func Pow(x, y complex128) complex128 {
+	if x == 0 { // Guaranteed also true for x == -0.
+		if IsNaN(y) {
+			return NaN()
+		}
+		r, i := real(y), imag(y)
+		switch {
+		case r == 0:
+			return 1
+		case r < 0:
+			if i == 0 {
+				return complex(math.Inf(1), 0)
+			}
+			return Inf()
+		case r > 0:
+			return 0
+		}
+		panic("not reached")
+	}
+	modulus := Abs(x)
+	if modulus == 0 {
+		return complex(0, 0)
+	}
+	r := math.Pow(modulus, real(y))
+	arg := Phase(x)
+	theta := real(y) * arg
+	if imag(y) != 0 {
+		r *= math.Exp(-imag(y) * arg)
+		theta += imag(y) * math.Log(modulus)
+	}
+	s, c := math.Sincos(theta)
+	return complex(r*c, r*s)
+}
diff --git a/src/math/cmplx/rect.go b/src/math/cmplx/rect.go
new file mode 100644
index 0000000..bf94d78
--- /dev/null
+++ b/src/math/cmplx/rect.go
@@ -0,0 +1,13 @@
+// Copyright 2010 The Go Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+package cmplx
+
+import "math"
+
+// Rect returns the complex number x with polar coordinates r, θ.
+func Rect(r, θ float64) complex128 {
+	s, c := math.Sincos(θ)
+	return complex(r*c, r*s)
+}
diff --git a/src/math/cmplx/sin.go b/src/math/cmplx/sin.go
new file mode 100644
index 0000000..51cf405
--- /dev/null
+++ b/src/math/cmplx/sin.go
@@ -0,0 +1,184 @@
+// Copyright 2010 The Go Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+package cmplx
+
+import "math"
+
+// The original C code, the long comment, and the constants
+// below are from http://netlib.sandia.gov/cephes/c9x-complex/clog.c.
+// The go code is a simplified version of the original C.
+//
+// Cephes Math Library Release 2.8:  June, 2000
+// Copyright 1984, 1987, 1989, 1992, 2000 by Stephen L. Moshier
+//
+// The readme file at http://netlib.sandia.gov/cephes/ says:
+//    Some software in this archive may be from the book _Methods and
+// Programs for Mathematical Functions_ (Prentice-Hall or Simon & Schuster
+// International, 1989) or from the Cephes Mathematical Library, a
+// commercial product. In either event, it is copyrighted by the author.
+// What you see here may be used freely but it comes with no support or
+// guarantee.
+//
+//   The two known misprints in the book are repaired here in the
+// source listings for the gamma function and the incomplete beta
+// integral.
+//
+//   Stephen L. Moshier
+//   moshier@na-net.ornl.gov
+
+// Complex circular sine
+//
+// DESCRIPTION:
+//
+// If
+//     z = x + iy,
+//
+// then
+//
+//     w = sin x  cosh y  +  i cos x sinh y.
+//
+// csin(z) = -i csinh(iz).
+//
+// ACCURACY:
+//
+//                      Relative error:
+// arithmetic   domain     # trials      peak         rms
+//    DEC       -10,+10      8400       5.3e-17     1.3e-17
+//    IEEE      -10,+10     30000       3.8e-16     1.0e-16
+// Also tested by csin(casin(z)) = z.
+
+// Sin returns the sine of x.
+func Sin(x complex128) complex128 {
+	switch re, im := real(x), imag(x); {
+	case im == 0 && (math.IsInf(re, 0) || math.IsNaN(re)):
+		return complex(math.NaN(), im)
+	case math.IsInf(im, 0):
+		switch {
+		case re == 0:
+			return x
+		case math.IsInf(re, 0) || math.IsNaN(re):
+			return complex(math.NaN(), im)
+		}
+	case re == 0 && math.IsNaN(im):
+		return x
+	}
+	s, c := math.Sincos(real(x))
+	sh, ch := sinhcosh(imag(x))
+	return complex(s*ch, c*sh)
+}
+
+// Complex hyperbolic sine
+//
+// DESCRIPTION:
+//
+// csinh z = (cexp(z) - cexp(-z))/2
+//         = sinh x * cos y  +  i cosh x * sin y .
+//
+// ACCURACY:
+//
+//                      Relative error:
+// arithmetic   domain     # trials      peak         rms
+//    IEEE      -10,+10     30000       3.1e-16     8.2e-17
+
+// Sinh returns the hyperbolic sine of x.
+func Sinh(x complex128) complex128 {
+	switch re, im := real(x), imag(x); {
+	case re == 0 && (math.IsInf(im, 0) || math.IsNaN(im)):
+		return complex(re, math.NaN())
+	case math.IsInf(re, 0):
+		switch {
+		case im == 0:
+			return complex(re, im)
+		case math.IsInf(im, 0) || math.IsNaN(im):
+			return complex(re, math.NaN())
+		}
+	case im == 0 && math.IsNaN(re):
+		return complex(math.NaN(), im)
+	}
+	s, c := math.Sincos(imag(x))
+	sh, ch := sinhcosh(real(x))
+	return complex(c*sh, s*ch)
+}
+
+// Complex circular cosine
+//
+// DESCRIPTION:
+//
+// If
+//     z = x + iy,
+//
+// then
+//
+//     w = cos x  cosh y  -  i sin x sinh y.
+//
+// ACCURACY:
+//
+//                      Relative error:
+// arithmetic   domain     # trials      peak         rms
+//    DEC       -10,+10      8400       4.5e-17     1.3e-17
+//    IEEE      -10,+10     30000       3.8e-16     1.0e-16
+
+// Cos returns the cosine of x.
+func Cos(x complex128) complex128 {
+	switch re, im := real(x), imag(x); {
+	case im == 0 && (math.IsInf(re, 0) || math.IsNaN(re)):
+		return complex(math.NaN(), -im*math.Copysign(0, re))
+	case math.IsInf(im, 0):
+		switch {
+		case re == 0:
+			return complex(math.Inf(1), -re*math.Copysign(0, im))
+		case math.IsInf(re, 0) || math.IsNaN(re):
+			return complex(math.Inf(1), math.NaN())
+		}
+	case re == 0 && math.IsNaN(im):
+		return complex(math.NaN(), 0)
+	}
+	s, c := math.Sincos(real(x))
+	sh, ch := sinhcosh(imag(x))
+	return complex(c*ch, -s*sh)
+}
+
+// Complex hyperbolic cosine
+//
+// DESCRIPTION:
+//
+// ccosh(z) = cosh x  cos y + i sinh x sin y .
+//
+// ACCURACY:
+//
+//                      Relative error:
+// arithmetic   domain     # trials      peak         rms
+//    IEEE      -10,+10     30000       2.9e-16     8.1e-17
+
+// Cosh returns the hyperbolic cosine of x.
+func Cosh(x complex128) complex128 {
+	switch re, im := real(x), imag(x); {
+	case re == 0 && (math.IsInf(im, 0) || math.IsNaN(im)):
+		return complex(math.NaN(), re*math.Copysign(0, im))
+	case math.IsInf(re, 0):
+		switch {
+		case im == 0:
+			return complex(math.Inf(1), im*math.Copysign(0, re))
+		case math.IsInf(im, 0) || math.IsNaN(im):
+			return complex(math.Inf(1), math.NaN())
+		}
+	case im == 0 && math.IsNaN(re):
+		return complex(math.NaN(), im)
+	}
+	s, c := math.Sincos(imag(x))
+	sh, ch := sinhcosh(real(x))
+	return complex(c*ch, s*sh)
+}
+
+// calculate sinh and cosh.
+func sinhcosh(x float64) (sh, ch float64) {
+	if math.Abs(x) <= 0.5 {
+		return math.Sinh(x), math.Cosh(x)
+	}
+	e := math.Exp(x)
+	ei := 0.5 / e
+	e *= 0.5
+	return e - ei, e + ei
+}
diff --git a/src/math/cmplx/sqrt.go b/src/math/cmplx/sqrt.go
new file mode 100644
index 0000000..eddce2f
--- /dev/null
+++ b/src/math/cmplx/sqrt.go
@@ -0,0 +1,107 @@
+// Copyright 2010 The Go Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+package cmplx
+
+import "math"
+
+// The original C code, the long comment, and the constants
+// below are from http://netlib.sandia.gov/cephes/c9x-complex/clog.c.
+// The go code is a simplified version of the original C.
+//
+// Cephes Math Library Release 2.8:  June, 2000
+// Copyright 1984, 1987, 1989, 1992, 2000 by Stephen L. Moshier
+//
+// The readme file at http://netlib.sandia.gov/cephes/ says:
+//    Some software in this archive may be from the book _Methods and
+// Programs for Mathematical Functions_ (Prentice-Hall or Simon & Schuster
+// International, 1989) or from the Cephes Mathematical Library, a
+// commercial product. In either event, it is copyrighted by the author.
+// What you see here may be used freely but it comes with no support or
+// guarantee.
+//
+//   The two known misprints in the book are repaired here in the
+// source listings for the gamma function and the incomplete beta
+// integral.
+//
+//   Stephen L. Moshier
+//   moshier@na-net.ornl.gov
+
+// Complex square root
+//
+// DESCRIPTION:
+//
+// If z = x + iy,  r = |z|, then
+//
+//                       1/2
+// Re w  =  [ (r + x)/2 ]   ,
+//
+//                       1/2
+// Im w  =  [ (r - x)/2 ]   .
+//
+// Cancellation error in r-x or r+x is avoided by using the
+// identity  2 Re w Im w  =  y.
+//
+// Note that -w is also a square root of z. The root chosen
+// is always in the right half plane and Im w has the same sign as y.
+//
+// ACCURACY:
+//
+//                      Relative error:
+// arithmetic   domain     # trials      peak         rms
+//    DEC       -10,+10     25000       3.2e-17     9.6e-18
+//    IEEE      -10,+10   1,000,000     2.9e-16     6.1e-17
+
+// Sqrt returns the square root of x.
+// The result r is chosen so that real(r) ≥ 0 and imag(r) has the same sign as imag(x).
+func Sqrt(x complex128) complex128 {
+	if imag(x) == 0 {
+		// Ensure that imag(r) has the same sign as imag(x) for imag(x) == signed zero.
+		if real(x) == 0 {
+			return complex(0, imag(x))
+		}
+		if real(x) < 0 {
+			return complex(0, math.Copysign(math.Sqrt(-real(x)), imag(x)))
+		}
+		return complex(math.Sqrt(real(x)), imag(x))
+	} else if math.IsInf(imag(x), 0) {
+		return complex(math.Inf(1.0), imag(x))
+	}
+	if real(x) == 0 {
+		if imag(x) < 0 {
+			r := math.Sqrt(-0.5 * imag(x))
+			return complex(r, -r)
+		}
+		r := math.Sqrt(0.5 * imag(x))
+		return complex(r, r)
+	}
+	a := real(x)
+	b := imag(x)
+	var scale float64
+	// Rescale to avoid internal overflow or underflow.
+	if math.Abs(a) > 4 || math.Abs(b) > 4 {
+		a *= 0.25
+		b *= 0.25
+		scale = 2
+	} else {
+		a *= 1.8014398509481984e16 // 2**54
+		b *= 1.8014398509481984e16
+		scale = 7.450580596923828125e-9 // 2**-27
+	}
+	r := math.Hypot(a, b)
+	var t float64
+	if a > 0 {
+		t = math.Sqrt(0.5*r + 0.5*a)
+		r = scale * math.Abs((0.5*b)/t)
+		t *= scale
+	} else {
+		r = math.Sqrt(0.5*r - 0.5*a)
+		t = scale * math.Abs((0.5*b)/r)
+		r *= scale
+	}
+	if b < 0 {
+		return complex(t, -r)
+	}
+	return complex(t, r)
+}
diff --git a/src/math/cmplx/tan.go b/src/math/cmplx/tan.go
new file mode 100644
index 0000000..67a1133
--- /dev/null
+++ b/src/math/cmplx/tan.go
@@ -0,0 +1,297 @@
+// Copyright 2010 The Go Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+package cmplx
+
+import (
+	"math"
+	"math/bits"
+)
+
+// The original C code, the long comment, and the constants
+// below are from http://netlib.sandia.gov/cephes/c9x-complex/clog.c.
+// The go code is a simplified version of the original C.
+//
+// Cephes Math Library Release 2.8:  June, 2000
+// Copyright 1984, 1987, 1989, 1992, 2000 by Stephen L. Moshier
+//
+// The readme file at http://netlib.sandia.gov/cephes/ says:
+//    Some software in this archive may be from the book _Methods and
+// Programs for Mathematical Functions_ (Prentice-Hall or Simon & Schuster
+// International, 1989) or from the Cephes Mathematical Library, a
+// commercial product. In either event, it is copyrighted by the author.
+// What you see here may be used freely but it comes with no support or
+// guarantee.
+//
+//   The two known misprints in the book are repaired here in the
+// source listings for the gamma function and the incomplete beta
+// integral.
+//
+//   Stephen L. Moshier
+//   moshier@na-net.ornl.gov
+
+// Complex circular tangent
+//
+// DESCRIPTION:
+//
+// If
+//     z = x + iy,
+//
+// then
+//
+//           sin 2x  +  i sinh 2y
+//     w  =  --------------------.
+//            cos 2x  +  cosh 2y
+//
+// On the real axis the denominator is zero at odd multiples
+// of PI/2. The denominator is evaluated by its Taylor
+// series near these points.
+//
+// ctan(z) = -i ctanh(iz).
+//
+// ACCURACY:
+//
+//                      Relative error:
+// arithmetic   domain     # trials      peak         rms
+//    DEC       -10,+10      5200       7.1e-17     1.6e-17
+//    IEEE      -10,+10     30000       7.2e-16     1.2e-16
+// Also tested by ctan * ccot = 1 and catan(ctan(z))  =  z.
+
+// Tan returns the tangent of x.
+func Tan(x complex128) complex128 {
+	switch re, im := real(x), imag(x); {
+	case math.IsInf(im, 0):
+		switch {
+		case math.IsInf(re, 0) || math.IsNaN(re):
+			return complex(math.Copysign(0, re), math.Copysign(1, im))
+		}
+		return complex(math.Copysign(0, math.Sin(2*re)), math.Copysign(1, im))
+	case re == 0 && math.IsNaN(im):
+		return x
+	}
+	d := math.Cos(2*real(x)) + math.Cosh(2*imag(x))
+	if math.Abs(d) < 0.25 {
+		d = tanSeries(x)
+	}
+	if d == 0 {
+		return Inf()
+	}
+	return complex(math.Sin(2*real(x))/d, math.Sinh(2*imag(x))/d)
+}
+
+// Complex hyperbolic tangent
+//
+// DESCRIPTION:
+//
+// tanh z = (sinh 2x  +  i sin 2y) / (cosh 2x + cos 2y) .
+//
+// ACCURACY:
+//
+//                      Relative error:
+// arithmetic   domain     # trials      peak         rms
+//    IEEE      -10,+10     30000       1.7e-14     2.4e-16
+
+// Tanh returns the hyperbolic tangent of x.
+func Tanh(x complex128) complex128 {
+	switch re, im := real(x), imag(x); {
+	case math.IsInf(re, 0):
+		switch {
+		case math.IsInf(im, 0) || math.IsNaN(im):
+			return complex(math.Copysign(1, re), math.Copysign(0, im))
+		}
+		return complex(math.Copysign(1, re), math.Copysign(0, math.Sin(2*im)))
+	case im == 0 && math.IsNaN(re):
+		return x
+	}
+	d := math.Cosh(2*real(x)) + math.Cos(2*imag(x))
+	if d == 0 {
+		return Inf()
+	}
+	return complex(math.Sinh(2*real(x))/d, math.Sin(2*imag(x))/d)
+}
+
+// reducePi reduces the input argument x to the range (-Pi/2, Pi/2].
+// x must be greater than or equal to 0. For small arguments it
+// uses Cody-Waite reduction in 3 float64 parts based on:
+// "Elementary Function Evaluation:  Algorithms and Implementation"
+// Jean-Michel Muller, 1997.
+// For very large arguments it uses Payne-Hanek range reduction based on:
+// "ARGUMENT REDUCTION FOR HUGE ARGUMENTS: Good to the Last Bit"
+// K. C. Ng et al, March 24, 1992.
+func reducePi(x float64) float64 {
+	// reduceThreshold is the maximum value of x where the reduction using
+	// Cody-Waite reduction still gives accurate results. This threshold
+	// is set by t*PIn being representable as a float64 without error
+	// where t is given by t = floor(x * (1 / Pi)) and PIn are the leading partial
+	// terms of Pi. Since the leading terms, PI1 and PI2 below, have 30 and 32
+	// trailing zero bits respectively, t should have less than 30 significant bits.
+	//	t < 1<<30  -> floor(x*(1/Pi)+0.5) < 1<<30 -> x < (1<<30-1) * Pi - 0.5
+	// So, conservatively we can take x < 1<<30.
+	const reduceThreshold float64 = 1 << 30
+	if math.Abs(x) < reduceThreshold {
+		// Use Cody-Waite reduction in three parts.
+		const (
+			// PI1, PI2 and PI3 comprise an extended precision value of PI
+			// such that PI ~= PI1 + PI2 + PI3. The parts are chosen so
+			// that PI1 and PI2 have an approximately equal number of trailing
+			// zero bits. This ensures that t*PI1 and t*PI2 are exact for
+			// large integer values of t. The full precision PI3 ensures the
+			// approximation of PI is accurate to 102 bits to handle cancellation
+			// during subtraction.
+			PI1 = 3.141592502593994      // 0x400921fb40000000
+			PI2 = 1.5099578831723193e-07 // 0x3e84442d00000000
+			PI3 = 1.0780605716316238e-14 // 0x3d08469898cc5170
+		)
+		t := x / math.Pi
+		t += 0.5
+		t = float64(int64(t)) // int64(t) = the multiple
+		return ((x - t*PI1) - t*PI2) - t*PI3
+	}
+	// Must apply Payne-Hanek range reduction
+	const (
+		mask     = 0x7FF
+		shift    = 64 - 11 - 1
+		bias     = 1023
+		fracMask = 1<<shift - 1
+	)
+	// Extract out the integer and exponent such that,
+	// x = ix * 2 ** exp.
+	ix := math.Float64bits(x)
+	exp := int(ix>>shift&mask) - bias - shift
+	ix &= fracMask
+	ix |= 1 << shift
+
+	// mPi is the binary digits of 1/Pi as a uint64 array,
+	// that is, 1/Pi = Sum mPi[i]*2^(-64*i).
+	// 19 64-bit digits give 1216 bits of precision
+	// to handle the largest possible float64 exponent.
+	var mPi = [...]uint64{
+		0x0000000000000000,
+		0x517cc1b727220a94,
+		0xfe13abe8fa9a6ee0,
+		0x6db14acc9e21c820,
+		0xff28b1d5ef5de2b0,
+		0xdb92371d2126e970,
+		0x0324977504e8c90e,
+		0x7f0ef58e5894d39f,
+		0x74411afa975da242,
+		0x74ce38135a2fbf20,
+		0x9cc8eb1cc1a99cfa,
+		0x4e422fc5defc941d,
+		0x8ffc4bffef02cc07,
+		0xf79788c5ad05368f,
+		0xb69b3f6793e584db,
+		0xa7a31fb34f2ff516,
+		0xba93dd63f5f2f8bd,
+		0x9e839cfbc5294975,
+		0x35fdafd88fc6ae84,
+		0x2b0198237e3db5d5,
+	}
+	// Use the exponent to extract the 3 appropriate uint64 digits from mPi,
+	// B ~ (z0, z1, z2), such that the product leading digit has the exponent -64.
+	// Note, exp >= 50 since x >= reduceThreshold and exp < 971 for maximum float64.
+	digit, bitshift := uint(exp+64)/64, uint(exp+64)%64
+	z0 := (mPi[digit] << bitshift) | (mPi[digit+1] >> (64 - bitshift))
+	z1 := (mPi[digit+1] << bitshift) | (mPi[digit+2] >> (64 - bitshift))
+	z2 := (mPi[digit+2] << bitshift) | (mPi[digit+3] >> (64 - bitshift))
+	// Multiply mantissa by the digits and extract the upper two digits (hi, lo).
+	z2hi, _ := bits.Mul64(z2, ix)
+	z1hi, z1lo := bits.Mul64(z1, ix)
+	z0lo := z0 * ix
+	lo, c := bits.Add64(z1lo, z2hi, 0)
+	hi, _ := bits.Add64(z0lo, z1hi, c)
+	// Find the magnitude of the fraction.
+	lz := uint(bits.LeadingZeros64(hi))
+	e := uint64(bias - (lz + 1))
+	// Clear implicit mantissa bit and shift into place.
+	hi = (hi << (lz + 1)) | (lo >> (64 - (lz + 1)))
+	hi >>= 64 - shift
+	// Include the exponent and convert to a float.
+	hi |= e << shift
+	x = math.Float64frombits(hi)
+	// map to (-Pi/2, Pi/2]
+	if x > 0.5 {
+		x--
+	}
+	return math.Pi * x
+}
+
+// Taylor series expansion for cosh(2y) - cos(2x)
+func tanSeries(z complex128) float64 {
+	const MACHEP = 1.0 / (1 << 53)
+	x := math.Abs(2 * real(z))
+	y := math.Abs(2 * imag(z))
+	x = reducePi(x)
+	x = x * x
+	y = y * y
+	x2 := 1.0
+	y2 := 1.0
+	f := 1.0
+	rn := 0.0
+	d := 0.0
+	for {
+		rn++
+		f *= rn
+		rn++
+		f *= rn
+		x2 *= x
+		y2 *= y
+		t := y2 + x2
+		t /= f
+		d += t
+
+		rn++
+		f *= rn
+		rn++
+		f *= rn
+		x2 *= x
+		y2 *= y
+		t = y2 - x2
+		t /= f
+		d += t
+		if !(math.Abs(t/d) > MACHEP) {
+			// Caution: Use ! and > instead of <= for correct behavior if t/d is NaN.
+			// See issue 17577.
+			break
+		}
+	}
+	return d
+}
+
+// Complex circular cotangent
+//
+// DESCRIPTION:
+//
+// If
+//     z = x + iy,
+//
+// then
+//
+//           sin 2x  -  i sinh 2y
+//     w  =  --------------------.
+//            cosh 2y  -  cos 2x
+//
+// On the real axis, the denominator has zeros at even
+// multiples of PI/2.  Near these points it is evaluated
+// by a Taylor series.
+//
+// ACCURACY:
+//
+//                      Relative error:
+// arithmetic   domain     # trials      peak         rms
+//    DEC       -10,+10      3000       6.5e-17     1.6e-17
+//    IEEE      -10,+10     30000       9.2e-16     1.2e-16
+// Also tested by ctan * ccot = 1 + i0.
+
+// Cot returns the cotangent of x.
+func Cot(x complex128) complex128 {
+	d := math.Cosh(2*imag(x)) - math.Cos(2*real(x))
+	if math.Abs(d) < 0.25 {
+		d = tanSeries(x)
+	}
+	if d == 0 {
+		return Inf()
+	}
+	return complex(math.Sin(2*real(x))/d, -math.Sinh(2*imag(x))/d)
+}
diff --git a/src/math/const.go b/src/math/const.go
new file mode 100644
index 0000000..b15e50e
--- /dev/null
+++ b/src/math/const.go
@@ -0,0 +1,57 @@
+// Copyright 2009 The Go Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+// Package math provides basic constants and mathematical functions.
+//
+// This package does not guarantee bit-identical results across architectures.
+package math
+
+// Mathematical constants.
+const (
+	E   = 2.71828182845904523536028747135266249775724709369995957496696763 // https://oeis.org/A001113
+	Pi  = 3.14159265358979323846264338327950288419716939937510582097494459 // https://oeis.org/A000796
+	Phi = 1.61803398874989484820458683436563811772030917980576286213544862 // https://oeis.org/A001622
+
+	Sqrt2   = 1.41421356237309504880168872420969807856967187537694807317667974 // https://oeis.org/A002193
+	SqrtE   = 1.64872127070012814684865078781416357165377610071014801157507931 // https://oeis.org/A019774
+	SqrtPi  = 1.77245385090551602729816748334114518279754945612238712821380779 // https://oeis.org/A002161
+	SqrtPhi = 1.27201964951406896425242246173749149171560804184009624861664038 // https://oeis.org/A139339
+
+	Ln2    = 0.693147180559945309417232121458176568075500134360255254120680009 // https://oeis.org/A002162
+	Log2E  = 1 / Ln2
+	Ln10   = 2.30258509299404568401799145468436420760110148862877297603332790 // https://oeis.org/A002392
+	Log10E = 1 / Ln10
+)
+
+// Floating-point limit values.
+// Max is the largest finite value representable by the type.
+// SmallestNonzero is the smallest positive, non-zero value representable by the type.
+const (
+	MaxFloat32             = 0x1p127 * (1 + (1 - 0x1p-23)) // 3.40282346638528859811704183484516925440e+38
+	SmallestNonzeroFloat32 = 0x1p-126 * 0x1p-23            // 1.401298464324817070923729583289916131280e-45
+
+	MaxFloat64             = 0x1p1023 * (1 + (1 - 0x1p-52)) // 1.79769313486231570814527423731704356798070e+308
+	SmallestNonzeroFloat64 = 0x1p-1022 * 0x1p-52            // 4.9406564584124654417656879286822137236505980e-324
+)
+
+// Integer limit values.
+const (
+	intSize = 32 << (^uint(0) >> 63) // 32 or 64
+
+	MaxInt    = 1<<(intSize-1) - 1  // MaxInt32 or MaxInt64 depending on intSize.
+	MinInt    = -1 << (intSize - 1) // MinInt32 or MinInt64 depending on intSize.
+	MaxInt8   = 1<<7 - 1            // 127
+	MinInt8   = -1 << 7             // -128
+	MaxInt16  = 1<<15 - 1           // 32767
+	MinInt16  = -1 << 15            // -32768
+	MaxInt32  = 1<<31 - 1           // 2147483647
+	MinInt32  = -1 << 31            // -2147483648
+	MaxInt64  = 1<<63 - 1           // 9223372036854775807
+	MinInt64  = -1 << 63            // -9223372036854775808
+	MaxUint   = 1<<intSize - 1      // MaxUint32 or MaxUint64 depending on intSize.
+	MaxUint8  = 1<<8 - 1            // 255
+	MaxUint16 = 1<<16 - 1           // 65535
+	MaxUint32 = 1<<32 - 1           // 4294967295
+	MaxUint64 = 1<<64 - 1           // 18446744073709551615
+)
diff --git a/src/math/const_test.go b/src/math/const_test.go
new file mode 100644
index 0000000..170ba6a
--- /dev/null
+++ b/src/math/const_test.go
@@ -0,0 +1,47 @@
+// Copyright 2021 The Go Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+package math_test
+
+import (
+	"testing"
+
+	. "math"
+)
+
+func TestMaxUint(t *testing.T) {
+	if v := uint(MaxUint); v+1 != 0 {
+		t.Errorf("MaxUint should wrap around to zero: %d", v+1)
+	}
+	if v := uint8(MaxUint8); v+1 != 0 {
+		t.Errorf("MaxUint8 should wrap around to zero: %d", v+1)
+	}
+	if v := uint16(MaxUint16); v+1 != 0 {
+		t.Errorf("MaxUint16 should wrap around to zero: %d", v+1)
+	}
+	if v := uint32(MaxUint32); v+1 != 0 {
+		t.Errorf("MaxUint32 should wrap around to zero: %d", v+1)
+	}
+	if v := uint64(MaxUint64); v+1 != 0 {
+		t.Errorf("MaxUint64 should wrap around to zero: %d", v+1)
+	}
+}
+
+func TestMaxInt(t *testing.T) {
+	if v := int(MaxInt); v+1 != MinInt {
+		t.Errorf("MaxInt should wrap around to MinInt: %d", v+1)
+	}
+	if v := int8(MaxInt8); v+1 != MinInt8 {
+		t.Errorf("MaxInt8 should wrap around to MinInt8: %d", v+1)
+	}
+	if v := int16(MaxInt16); v+1 != MinInt16 {
+		t.Errorf("MaxInt16 should wrap around to MinInt16: %d", v+1)
+	}
+	if v := int32(MaxInt32); v+1 != MinInt32 {
+		t.Errorf("MaxInt32 should wrap around to MinInt32: %d", v+1)
+	}
+	if v := int64(MaxInt64); v+1 != MinInt64 {
+		t.Errorf("MaxInt64 should wrap around to MinInt64: %d", v+1)
+	}
+}
diff --git a/src/math/copysign.go b/src/math/copysign.go
new file mode 100644
index 0000000..3a30afb
--- /dev/null
+++ b/src/math/copysign.go
@@ -0,0 +1,12 @@
+// Copyright 2010 The Go Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+package math
+
+// Copysign returns a value with the magnitude of f
+// and the sign of sign.
+func Copysign(f, sign float64) float64 {
+	const signBit = 1 << 63
+	return Float64frombits(Float64bits(f)&^signBit | Float64bits(sign)&signBit)
+}
diff --git a/src/math/cosh_s390x.s b/src/math/cosh_s390x.s
new file mode 100644
index 0000000..ca1d86e
--- /dev/null
+++ b/src/math/cosh_s390x.s
@@ -0,0 +1,211 @@
+// Copyright 2016 The Go Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+#include "textflag.h"
+
+// Constants
+DATA coshrodataL23<>+0(SB)/8, $0.231904681384629956E-16
+DATA coshrodataL23<>+8(SB)/8, $0.693147180559945286E+00
+DATA coshrodataL23<>+16(SB)/8, $0.144269504088896339E+01
+DATA coshrodataL23<>+24(SB)/8, $704.E0
+GLOBL coshrodataL23<>+0(SB), RODATA, $32
+DATA coshxinf<>+0(SB)/8, $0x7FF0000000000000
+GLOBL coshxinf<>+0(SB), RODATA, $8
+DATA coshxlim1<>+0(SB)/8, $800.E0
+GLOBL coshxlim1<>+0(SB), RODATA, $8
+DATA coshxaddhy<>+0(SB)/8, $0xc2f0000100003fdf
+GLOBL coshxaddhy<>+0(SB), RODATA, $8
+DATA coshx4ff<>+0(SB)/8, $0x4ff0000000000000
+GLOBL coshx4ff<>+0(SB), RODATA, $8
+DATA coshe1<>+0(SB)/8, $0x3ff000000000000a
+GLOBL coshe1<>+0(SB), RODATA, $8
+
+// Log multiplier table
+DATA coshtab<>+0(SB)/8, $0.442737824274138381E-01
+DATA coshtab<>+8(SB)/8, $0.263602189790660309E-01
+DATA coshtab<>+16(SB)/8, $0.122565642281703586E-01
+DATA coshtab<>+24(SB)/8, $0.143757052860721398E-02
+DATA coshtab<>+32(SB)/8, $-.651375034121276075E-02
+DATA coshtab<>+40(SB)/8, $-.119317678849450159E-01
+DATA coshtab<>+48(SB)/8, $-.150868749549871069E-01
+DATA coshtab<>+56(SB)/8, $-.161992609578469234E-01
+DATA coshtab<>+64(SB)/8, $-.154492360403337917E-01
+DATA coshtab<>+72(SB)/8, $-.129850717389178721E-01
+DATA coshtab<>+80(SB)/8, $-.892902649276657891E-02
+DATA coshtab<>+88(SB)/8, $-.338202636596794887E-02
+DATA coshtab<>+96(SB)/8, $0.357266307045684762E-02
+DATA coshtab<>+104(SB)/8, $0.118665304327406698E-01
+DATA coshtab<>+112(SB)/8, $0.214434994118118914E-01
+DATA coshtab<>+120(SB)/8, $0.322580645161290314E-01
+GLOBL coshtab<>+0(SB), RODATA, $128
+
+// Minimax polynomial approximations
+DATA coshe2<>+0(SB)/8, $0.500000000000004237e+00
+GLOBL coshe2<>+0(SB), RODATA, $8
+DATA coshe3<>+0(SB)/8, $0.166666666630345592e+00
+GLOBL coshe3<>+0(SB), RODATA, $8
+DATA coshe4<>+0(SB)/8, $0.416666664838056960e-01
+GLOBL coshe4<>+0(SB), RODATA, $8
+DATA coshe5<>+0(SB)/8, $0.833349307718286047e-02
+GLOBL coshe5<>+0(SB), RODATA, $8
+DATA coshe6<>+0(SB)/8, $0.138926439368309441e-02
+GLOBL coshe6<>+0(SB), RODATA, $8
+
+// Cosh returns the hyperbolic cosine of x.
+//
+// Special cases are:
+//      Cosh(±0) = 1
+//      Cosh(±Inf) = +Inf
+//      Cosh(NaN) = NaN
+// The algorithm used is minimax polynomial approximation
+// with coefficients determined with a Remez exchange algorithm.
+
+TEXT ·coshAsm(SB),NOSPLIT,$0-16
+	FMOVD   x+0(FP), F0
+	MOVD    $coshrodataL23<>+0(SB), R9
+	LTDBR	F0, F0
+	MOVD    $0x4086000000000000, R2
+	MOVD    $0x4086000000000000, R3
+	BLTU    L19
+	FMOVD   F0, F4
+L2:
+	WORD    $0xED409018     //cdb %f4,.L24-.L23(%r9)
+	BYTE    $0x00
+	BYTE    $0x19
+	BGE     L14     //jnl   .L14
+	BVS     L14
+	WFCEDBS V4, V4, V2
+	BEQ     L20
+L1:
+	FMOVD   F0, ret+8(FP)
+	RET
+
+L14:
+	WFCEDBS V4, V4, V2
+	BVS     L1
+	MOVD    $coshxlim1<>+0(SB), R1
+	FMOVD   0(R1), F2
+	WFCHEDBS        V4, V2, V2
+	BEQ     L21
+	MOVD    $coshxaddhy<>+0(SB), R1
+	FMOVD   coshrodataL23<>+16(SB), F5
+	FMOVD   0(R1), F2
+	WFMSDB  V0, V5, V2, V5
+	FMOVD   coshrodataL23<>+8(SB), F3
+	FADD    F5, F2
+	MOVD    $coshe6<>+0(SB), R1
+	WFMSDB  V2, V3, V0, V3
+	FMOVD   0(R1), F6
+	WFMDB   V3, V3, V1
+	MOVD    $coshe4<>+0(SB), R1
+	FMOVD   coshrodataL23<>+0(SB), F7
+	WFMADB  V2, V7, V3, V2
+	FMOVD   0(R1), F3
+	MOVD    $coshe5<>+0(SB), R1
+	WFMADB  V1, V6, V3, V6
+	FMOVD   0(R1), F7
+	MOVD    $coshe3<>+0(SB), R1
+	FMOVD   0(R1), F3
+	WFMADB  V1, V7, V3, V7
+	FNEG    F2, F3
+	LGDR    F5, R1
+	MOVD    $coshe2<>+0(SB), R3
+	WFCEDBS V4, V0, V0
+	FMOVD   0(R3), F5
+	MOVD    $coshe1<>+0(SB), R3
+	WFMADB  V1, V6, V5, V6
+	FMOVD   0(R3), F5
+	RISBGN	$0, $15, $48, R1, R2
+	WFMADB  V1, V7, V5, V1
+	BVS     L22
+	RISBGZ	$57, $60, $3, R1, R4
+	MOVD    $coshtab<>+0(SB), R3
+	WFMADB  V3, V6, V1, V6
+	WORD    $0x68043000     //ld    %f0,0(%r4,%r3)
+	FMSUB   F0, F3, F2
+	WORD    $0xA71AF000     //ahi   %r1,-4096
+	WFMADB  V2, V6, V0, V6
+L17:
+	RISBGN	$0, $15, $48, R1, R2
+	LDGR    R2, F2
+	FMADD   F2, F6, F2
+	MOVD    $coshx4ff<>+0(SB), R1
+	FMOVD   0(R1), F0
+	FMUL    F2, F0
+	FMOVD   F0, ret+8(FP)
+	RET
+
+L19:
+	FNEG    F0, F4
+	BR      L2
+L20:
+	MOVD    $coshxaddhy<>+0(SB), R1
+	FMOVD   coshrodataL23<>+16(SB), F3
+	FMOVD   0(R1), F2
+	WFMSDB  V0, V3, V2, V3
+	FMOVD   coshrodataL23<>+8(SB), F4
+	FADD    F3, F2
+	MOVD    $coshe6<>+0(SB), R1
+	FMSUB   F4, F2, F0
+	FMOVD   0(R1), F6
+	WFMDB   V0, V0, V1
+	MOVD    $coshe4<>+0(SB), R1
+	FMOVD   0(R1), F4
+	MOVD    $coshe5<>+0(SB), R1
+	FMOVD   coshrodataL23<>+0(SB), F5
+	WFMADB  V1, V6, V4, V6
+	FMADD   F5, F2, F0
+	FMOVD   0(R1), F2
+	MOVD    $coshe3<>+0(SB), R1
+	FMOVD   0(R1), F4
+	WFMADB  V1, V2, V4, V2
+	MOVD    $coshe2<>+0(SB), R1
+	FMOVD   0(R1), F5
+	FNEG    F0, F4
+	WFMADB  V1, V6, V5, V6
+	MOVD    $coshe1<>+0(SB), R1
+	FMOVD   0(R1), F5
+	WFMADB  V1, V2, V5, V1
+	LGDR    F3, R1
+	MOVD    $coshtab<>+0(SB), R5
+	WFMADB  V4, V6, V1, V3
+	RISBGZ	$57, $60, $3, R1, R4
+	WFMSDB  V4, V6, V1, V6
+	WORD    $0x68145000     //ld %f1,0(%r4,%r5)
+	WFMSDB  V4, V1, V0, V2
+	WORD    $0xA7487FBE     //lhi %r4,32702
+	FMADD   F3, F2, F1
+	SUBW    R1, R4
+	RISBGZ	$57, $60, $3, R4, R12
+	WORD    $0x682C5000     //ld %f2,0(%r12,%r5)
+	FMSUB   F2, F4, F0
+	RISBGN	$0, $15, $48, R1, R2
+	WFMADB  V0, V6, V2, V6
+	RISBGN	$0, $15, $48, R4, R3
+	LDGR    R2, F2
+	LDGR    R3, F0
+	FMADD   F2, F1, F2
+	FMADD   F0, F6, F0
+	FADD    F2, F0
+	FMOVD   F0, ret+8(FP)
+	RET
+
+L22:
+	WORD    $0xA7387FBE     //lhi %r3,32702
+	MOVD    $coshtab<>+0(SB), R4
+	SUBW    R1, R3
+	WFMSDB  V3, V6, V1, V6
+	RISBGZ	$57, $60, $3, R3, R3
+	WORD    $0x68034000     //ld %f0,0(%r3,%r4)
+	FMSUB   F0, F3, F2
+	WORD    $0xA7386FBE     //lhi %r3,28606
+	WFMADB  V2, V6, V0, V6
+	SUBW    R1, R3, R1
+	BR      L17
+L21:
+	MOVD    $coshxinf<>+0(SB), R1
+	FMOVD   0(R1), F0
+	FMOVD   F0, ret+8(FP)
+	RET
+
diff --git a/src/math/dim.go b/src/math/dim.go
new file mode 100644
index 0000000..f369f70
--- /dev/null
+++ b/src/math/dim.go
@@ -0,0 +1,100 @@
+// Copyright 2010 The Go Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+package math
+
+// Dim returns the maximum of x-y or 0.
+//
+// Special cases are:
+//
+//	Dim(+Inf, +Inf) = NaN
+//	Dim(-Inf, -Inf) = NaN
+//	Dim(x, NaN) = Dim(NaN, x) = NaN
+func Dim(x, y float64) float64 {
+	// The special cases result in NaN after the subtraction:
+	//      +Inf - +Inf = NaN
+	//      -Inf - -Inf = NaN
+	//       NaN - y    = NaN
+	//         x - NaN  = NaN
+	v := x - y
+	if v <= 0 {
+		// v is negative or 0
+		return 0
+	}
+	// v is positive or NaN
+	return v
+}
+
+// Max returns the larger of x or y.
+//
+// Special cases are:
+//
+//	Max(x, +Inf) = Max(+Inf, x) = +Inf
+//	Max(x, NaN) = Max(NaN, x) = NaN
+//	Max(+0, ±0) = Max(±0, +0) = +0
+//	Max(-0, -0) = -0
+//
+// Note that this differs from the built-in function max when called
+// with NaN and +Inf.
+func Max(x, y float64) float64 {
+	if haveArchMax {
+		return archMax(x, y)
+	}
+	return max(x, y)
+}
+
+func max(x, y float64) float64 {
+	// special cases
+	switch {
+	case IsInf(x, 1) || IsInf(y, 1):
+		return Inf(1)
+	case IsNaN(x) || IsNaN(y):
+		return NaN()
+	case x == 0 && x == y:
+		if Signbit(x) {
+			return y
+		}
+		return x
+	}
+	if x > y {
+		return x
+	}
+	return y
+}
+
+// Min returns the smaller of x or y.
+//
+// Special cases are:
+//
+//	Min(x, -Inf) = Min(-Inf, x) = -Inf
+//	Min(x, NaN) = Min(NaN, x) = NaN
+//	Min(-0, ±0) = Min(±0, -0) = -0
+//
+// Note that this differs from the built-in function min when called
+// with NaN and -Inf.
+func Min(x, y float64) float64 {
+	if haveArchMin {
+		return archMin(x, y)
+	}
+	return min(x, y)
+}
+
+func min(x, y float64) float64 {
+	// special cases
+	switch {
+	case IsInf(x, -1) || IsInf(y, -1):
+		return Inf(-1)
+	case IsNaN(x) || IsNaN(y):
+		return NaN()
+	case x == 0 && x == y:
+		if Signbit(x) {
+			return x
+		}
+		return y
+	}
+	if x < y {
+		return x
+	}
+	return y
+}
diff --git a/src/math/dim_amd64.s b/src/math/dim_amd64.s
new file mode 100644
index 0000000..253f03b
--- /dev/null
+++ b/src/math/dim_amd64.s
@@ -0,0 +1,98 @@
+// Copyright 2010 The Go Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+#include "textflag.h"
+
+#define PosInf 0x7FF0000000000000
+#define NaN    0x7FF8000000000001
+#define NegInf 0xFFF0000000000000
+
+// func ·archMax(x, y float64) float64
+TEXT ·archMax(SB),NOSPLIT,$0
+	// +Inf special cases
+	MOVQ    $PosInf, AX
+	MOVQ    x+0(FP), R8
+	CMPQ    AX, R8
+	JEQ     isPosInf
+	MOVQ    y+8(FP), R9
+	CMPQ    AX, R9
+	JEQ     isPosInf
+	// NaN special cases
+	MOVQ    $~(1<<63), DX // bit mask
+	MOVQ    $PosInf, AX
+	MOVQ    R8, BX
+	ANDQ    DX, BX // x = |x|
+	CMPQ    AX, BX
+	JLT     isMaxNaN
+	MOVQ    R9, CX
+	ANDQ    DX, CX // y = |y|
+	CMPQ    AX, CX
+	JLT     isMaxNaN
+	// ±0 special cases
+	ORQ     CX, BX
+	JEQ     isMaxZero
+
+	MOVQ    R8, X0
+	MOVQ    R9, X1
+	MAXSD   X1, X0
+	MOVSD   X0, ret+16(FP)
+	RET
+isMaxNaN: // return NaN
+	MOVQ	$NaN, AX
+isPosInf: // return +Inf
+	MOVQ    AX, ret+16(FP)
+	RET
+isMaxZero:
+	MOVQ    $(1<<63), AX // -0.0
+	CMPQ    AX, R8
+	JEQ     +3(PC)
+	MOVQ    R8, ret+16(FP) // return 0
+	RET
+	MOVQ    R9, ret+16(FP) // return other 0
+	RET
+
+// func archMin(x, y float64) float64
+TEXT ·archMin(SB),NOSPLIT,$0
+	// -Inf special cases
+	MOVQ    $NegInf, AX
+	MOVQ    x+0(FP), R8
+	CMPQ    AX, R8
+	JEQ     isNegInf
+	MOVQ    y+8(FP), R9
+	CMPQ    AX, R9
+	JEQ     isNegInf
+	// NaN special cases
+	MOVQ    $~(1<<63), DX
+	MOVQ    $PosInf, AX
+	MOVQ    R8, BX
+	ANDQ    DX, BX // x = |x|
+	CMPQ    AX, BX
+	JLT     isMinNaN
+	MOVQ    R9, CX
+	ANDQ    DX, CX // y = |y|
+	CMPQ    AX, CX
+	JLT     isMinNaN
+	// ±0 special cases
+	ORQ     CX, BX
+	JEQ     isMinZero
+
+	MOVQ    R8, X0
+	MOVQ    R9, X1
+	MINSD   X1, X0
+	MOVSD X0, ret+16(FP)
+	RET
+isMinNaN: // return NaN
+	MOVQ	$NaN, AX
+isNegInf: // return -Inf
+	MOVQ    AX, ret+16(FP)
+	RET
+isMinZero:
+	MOVQ    $(1<<63), AX // -0.0
+	CMPQ    AX, R8
+	JEQ     +3(PC)
+	MOVQ    R9, ret+16(FP) // return other 0
+	RET
+	MOVQ    R8, ret+16(FP) // return -0
+	RET
+
diff --git a/src/math/dim_arm64.s b/src/math/dim_arm64.s
new file mode 100644
index 0000000..f112003
--- /dev/null
+++ b/src/math/dim_arm64.s
@@ -0,0 +1,49 @@
+// Copyright 2016 The Go Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+#include "textflag.h"
+
+#define PosInf 0x7FF0000000000000
+#define NaN    0x7FF8000000000001
+#define NegInf 0xFFF0000000000000
+
+// func ·archMax(x, y float64) float64
+TEXT ·archMax(SB),NOSPLIT,$0
+	// +Inf special cases
+	MOVD	$PosInf, R0
+	MOVD	x+0(FP), R1
+	CMP	R0, R1
+	BEQ	isPosInf
+	MOVD	y+8(FP), R2
+	CMP	R0, R2
+	BEQ	isPosInf
+	// normal case
+	FMOVD	R1, F0
+	FMOVD	R2, F1
+	FMAXD	F0, F1, F0
+	FMOVD	F0, ret+16(FP)
+	RET
+isPosInf: // return +Inf
+	MOVD	R0, ret+16(FP)
+	RET
+
+// func archMin(x, y float64) float64
+TEXT ·archMin(SB),NOSPLIT,$0
+	// -Inf special cases
+	MOVD	$NegInf, R0
+	MOVD	x+0(FP), R1
+	CMP	R0, R1
+	BEQ	isNegInf
+	MOVD	y+8(FP), R2
+	CMP	R0, R2
+	BEQ	isNegInf
+	// normal case
+	FMOVD	R1, F0
+	FMOVD	R2, F1
+	FMIND	F0, F1, F0
+	FMOVD	F0, ret+16(FP)
+	RET
+isNegInf: // return -Inf
+	MOVD	R0, ret+16(FP)
+	RET
diff --git a/src/math/dim_asm.go b/src/math/dim_asm.go
new file mode 100644
index 0000000..f4adbd0
--- /dev/null
+++ b/src/math/dim_asm.go
@@ -0,0 +1,15 @@
+// Copyright 2021 The Go Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+//go:build amd64 || arm64 || riscv64 || s390x
+
+package math
+
+const haveArchMax = true
+
+func archMax(x, y float64) float64
+
+const haveArchMin = true
+
+func archMin(x, y float64) float64
diff --git a/src/math/dim_noasm.go b/src/math/dim_noasm.go
new file mode 100644
index 0000000..5b9e06f
--- /dev/null
+++ b/src/math/dim_noasm.go
@@ -0,0 +1,19 @@
+// Copyright 2021 The Go Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+//go:build !amd64 && !arm64 && !riscv64 && !s390x
+
+package math
+
+const haveArchMax = false
+
+func archMax(x, y float64) float64 {
+	panic("not implemented")
+}
+
+const haveArchMin = false
+
+func archMin(x, y float64) float64 {
+	panic("not implemented")
+}
diff --git a/src/math/dim_riscv64.s b/src/math/dim_riscv64.s
new file mode 100644
index 0000000..5b2fd3d
--- /dev/null
+++ b/src/math/dim_riscv64.s
@@ -0,0 +1,70 @@
+// Copyright 2020 The Go Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+#include "textflag.h"
+
+// Values returned from an FCLASS instruction.
+#define	NegInf	0x001
+#define	PosInf	0x080
+#define	NaN	0x200
+
+// func archMax(x, y float64) float64
+TEXT ·archMax(SB),NOSPLIT,$0
+	MOVD	x+0(FP), F0
+	MOVD	y+8(FP), F1
+	FCLASSD	F0, X5
+	FCLASSD	F1, X6
+
+	// +Inf special cases
+	MOV	$PosInf, X7
+	BEQ	X7, X5, isMaxX
+	BEQ	X7, X6, isMaxY
+
+	// NaN special cases
+	MOV	$NaN, X7
+	BEQ	X7, X5, isMaxX
+	BEQ	X7, X6, isMaxY
+
+	// normal case
+	FMAXD	F0, F1, F0
+	MOVD	F0, ret+16(FP)
+	RET
+
+isMaxX: // return x
+	MOVD	F0, ret+16(FP)
+	RET
+
+isMaxY: // return y
+	MOVD	F1, ret+16(FP)
+	RET
+
+// func archMin(x, y float64) float64
+TEXT ·archMin(SB),NOSPLIT,$0
+	MOVD	x+0(FP), F0
+	MOVD	y+8(FP), F1
+	FCLASSD	F0, X5
+	FCLASSD	F1, X6
+
+	// -Inf special cases
+	MOV	$NegInf, X7
+	BEQ	X7, X5, isMinX
+	BEQ	X7, X6, isMinY
+
+	// NaN special cases
+	MOV	$NaN, X7
+	BEQ	X7, X5, isMinX
+	BEQ	X7, X6, isMinY
+
+	// normal case
+	FMIND	F0, F1, F0
+	MOVD	F0, ret+16(FP)
+	RET
+
+isMinX: // return x
+	MOVD	F0, ret+16(FP)
+	RET
+
+isMinY: // return y
+	MOVD	F1, ret+16(FP)
+	RET
diff --git a/src/math/dim_s390x.s b/src/math/dim_s390x.s
new file mode 100644
index 0000000..1277026
--- /dev/null
+++ b/src/math/dim_s390x.s
@@ -0,0 +1,96 @@
+// Copyright 2016 The Go Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+// Based on dim_amd64.s
+
+#include "textflag.h"
+
+#define PosInf 0x7FF0000000000000
+#define NaN    0x7FF8000000000001
+#define NegInf 0xFFF0000000000000
+
+// func ·Max(x, y float64) float64
+TEXT ·archMax(SB),NOSPLIT,$0
+	// +Inf special cases
+	MOVD    $PosInf, R4
+	MOVD    x+0(FP), R8
+	CMPUBEQ R4, R8, isPosInf
+	MOVD    y+8(FP), R9
+	CMPUBEQ R4, R9, isPosInf
+	// NaN special cases
+	MOVD    $~(1<<63), R5 // bit mask
+	MOVD    $PosInf, R4
+	MOVD    R8, R2
+	AND     R5, R2 // x = |x|
+	CMPUBLT R4, R2, isMaxNaN
+	MOVD    R9, R3
+	AND     R5, R3 // y = |y|
+	CMPUBLT R4, R3, isMaxNaN
+	// ±0 special cases
+	OR      R3, R2
+	BEQ     isMaxZero
+
+	FMOVD   x+0(FP), F1
+	FMOVD   y+8(FP), F2
+	FCMPU   F2, F1
+	BGT     +3(PC)
+	FMOVD   F1, ret+16(FP)
+	RET
+	FMOVD   F2, ret+16(FP)
+	RET
+isMaxNaN: // return NaN
+	MOVD	$NaN, R4
+isPosInf: // return +Inf
+	MOVD    R4, ret+16(FP)
+	RET
+isMaxZero:
+	MOVD    $(1<<63), R4 // -0.0
+	CMPUBEQ R4, R8, +3(PC)
+	MOVD    R8, ret+16(FP) // return 0
+	RET
+	MOVD    R9, ret+16(FP) // return other 0
+	RET
+
+// func archMin(x, y float64) float64
+TEXT ·archMin(SB),NOSPLIT,$0
+	// -Inf special cases
+	MOVD    $NegInf, R4
+	MOVD    x+0(FP), R8
+	CMPUBEQ R4, R8, isNegInf
+	MOVD    y+8(FP), R9
+	CMPUBEQ R4, R9, isNegInf
+	// NaN special cases
+	MOVD    $~(1<<63), R5
+	MOVD    $PosInf, R4
+	MOVD    R8, R2
+	AND     R5, R2 // x = |x|
+	CMPUBLT R4, R2, isMinNaN
+	MOVD    R9, R3
+	AND     R5, R3 // y = |y|
+	CMPUBLT R4, R3, isMinNaN
+	// ±0 special cases
+	OR      R3, R2
+	BEQ     isMinZero
+
+	FMOVD   x+0(FP), F1
+	FMOVD   y+8(FP), F2
+	FCMPU   F2, F1
+	BLT     +3(PC)
+	FMOVD   F1, ret+16(FP)
+	RET
+	FMOVD   F2, ret+16(FP)
+	RET
+isMinNaN: // return NaN
+	MOVD	$NaN, R4
+isNegInf: // return -Inf
+	MOVD    R4, ret+16(FP)
+	RET
+isMinZero:
+	MOVD    $(1<<63), R4 // -0.0
+	CMPUBEQ R4, R8, +3(PC)
+	MOVD    R9, ret+16(FP) // return other 0
+	RET
+	MOVD    R8, ret+16(FP) // return -0
+	RET
+
diff --git a/src/math/erf.go b/src/math/erf.go
new file mode 100644
index 0000000..ba00c7d
--- /dev/null
+++ b/src/math/erf.go
@@ -0,0 +1,351 @@
+// Copyright 2010 The Go Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+package math
+
+/*
+	Floating-point error function and complementary error function.
+*/
+
+// The original C code and the long comment below are
+// from FreeBSD's /usr/src/lib/msun/src/s_erf.c and
+// came with this notice. The go code is a simplified
+// version of the original C.
+//
+// ====================================================
+// Copyright (C) 1993 by Sun Microsystems, Inc. All rights reserved.
+//
+// Developed at SunPro, a Sun Microsystems, Inc. business.
+// Permission to use, copy, modify, and distribute this
+// software is freely granted, provided that this notice
+// is preserved.
+// ====================================================
+//
+//
+// double erf(double x)
+// double erfc(double x)
+//                           x
+//                    2      |\
+//     erf(x)  =  ---------  | exp(-t*t)dt
+//                 sqrt(pi) \|
+//                           0
+//
+//     erfc(x) =  1-erf(x)
+//  Note that
+//              erf(-x) = -erf(x)
+//              erfc(-x) = 2 - erfc(x)
+//
+// Method:
+//      1. For |x| in [0, 0.84375]
+//          erf(x)  = x + x*R(x**2)
+//          erfc(x) = 1 - erf(x)           if x in [-.84375,0.25]
+//                  = 0.5 + ((0.5-x)-x*R)  if x in [0.25,0.84375]
+//         where R = P/Q where P is an odd poly of degree 8 and
+//         Q is an odd poly of degree 10.
+//                                               -57.90
+//                      | R - (erf(x)-x)/x | <= 2
+//
+//
+//         Remark. The formula is derived by noting
+//          erf(x) = (2/sqrt(pi))*(x - x**3/3 + x**5/10 - x**7/42 + ....)
+//         and that
+//          2/sqrt(pi) = 1.128379167095512573896158903121545171688
+//         is close to one. The interval is chosen because the fix
+//         point of erf(x) is near 0.6174 (i.e., erf(x)=x when x is
+//         near 0.6174), and by some experiment, 0.84375 is chosen to
+//         guarantee the error is less than one ulp for erf.
+//
+//      2. For |x| in [0.84375,1.25], let s = |x| - 1, and
+//         c = 0.84506291151 rounded to single (24 bits)
+//              erf(x)  = sign(x) * (c  + P1(s)/Q1(s))
+//              erfc(x) = (1-c)  - P1(s)/Q1(s) if x > 0
+//                        1+(c+P1(s)/Q1(s))    if x < 0
+//              |P1/Q1 - (erf(|x|)-c)| <= 2**-59.06
+//         Remark: here we use the taylor series expansion at x=1.
+//              erf(1+s) = erf(1) + s*Poly(s)
+//                       = 0.845.. + P1(s)/Q1(s)
+//         That is, we use rational approximation to approximate
+//                      erf(1+s) - (c = (single)0.84506291151)
+//         Note that |P1/Q1|< 0.078 for x in [0.84375,1.25]
+//         where
+//              P1(s) = degree 6 poly in s
+//              Q1(s) = degree 6 poly in s
+//
+//      3. For x in [1.25,1/0.35(~2.857143)],
+//              erfc(x) = (1/x)*exp(-x*x-0.5625+R1/S1)
+//              erf(x)  = 1 - erfc(x)
+//         where
+//              R1(z) = degree 7 poly in z, (z=1/x**2)
+//              S1(z) = degree 8 poly in z
+//
+//      4. For x in [1/0.35,28]
+//              erfc(x) = (1/x)*exp(-x*x-0.5625+R2/S2) if x > 0
+//                      = 2.0 - (1/x)*exp(-x*x-0.5625+R2/S2) if -6<x<0
+//                      = 2.0 - tiny            (if x <= -6)
+//              erf(x)  = sign(x)*(1.0 - erfc(x)) if x < 6, else
+//              erf(x)  = sign(x)*(1.0 - tiny)
+//         where
+//              R2(z) = degree 6 poly in z, (z=1/x**2)
+//              S2(z) = degree 7 poly in z
+//
+//      Note1:
+//         To compute exp(-x*x-0.5625+R/S), let s be a single
+//         precision number and s := x; then
+//              -x*x = -s*s + (s-x)*(s+x)
+//              exp(-x*x-0.5626+R/S) =
+//                      exp(-s*s-0.5625)*exp((s-x)*(s+x)+R/S);
+//      Note2:
+//         Here 4 and 5 make use of the asymptotic series
+//                        exp(-x*x)
+//              erfc(x) ~ ---------- * ( 1 + Poly(1/x**2) )
+//                        x*sqrt(pi)
+//         We use rational approximation to approximate
+//              g(s)=f(1/x**2) = log(erfc(x)*x) - x*x + 0.5625
+//         Here is the error bound for R1/S1 and R2/S2
+//              |R1/S1 - f(x)|  < 2**(-62.57)
+//              |R2/S2 - f(x)|  < 2**(-61.52)
+//
+//      5. For inf > x >= 28
+//              erf(x)  = sign(x) *(1 - tiny)  (raise inexact)
+//              erfc(x) = tiny*tiny (raise underflow) if x > 0
+//                      = 2 - tiny if x<0
+//
+//      7. Special case:
+//              erf(0)  = 0, erf(inf)  = 1, erf(-inf) = -1,
+//              erfc(0) = 1, erfc(inf) = 0, erfc(-inf) = 2,
+//              erfc/erf(NaN) is NaN
+
+const (
+	erx = 8.45062911510467529297e-01 // 0x3FEB0AC160000000
+	// Coefficients for approximation to  erf in [0, 0.84375]
+	efx  = 1.28379167095512586316e-01  // 0x3FC06EBA8214DB69
+	efx8 = 1.02703333676410069053e+00  // 0x3FF06EBA8214DB69
+	pp0  = 1.28379167095512558561e-01  // 0x3FC06EBA8214DB68
+	pp1  = -3.25042107247001499370e-01 // 0xBFD4CD7D691CB913
+	pp2  = -2.84817495755985104766e-02 // 0xBF9D2A51DBD7194F
+	pp3  = -5.77027029648944159157e-03 // 0xBF77A291236668E4
+	pp4  = -2.37630166566501626084e-05 // 0xBEF8EAD6120016AC
+	qq1  = 3.97917223959155352819e-01  // 0x3FD97779CDDADC09
+	qq2  = 6.50222499887672944485e-02  // 0x3FB0A54C5536CEBA
+	qq3  = 5.08130628187576562776e-03  // 0x3F74D022C4D36B0F
+	qq4  = 1.32494738004321644526e-04  // 0x3F215DC9221C1A10
+	qq5  = -3.96022827877536812320e-06 // 0xBED09C4342A26120
+	// Coefficients for approximation to  erf  in [0.84375, 1.25]
+	pa0 = -2.36211856075265944077e-03 // 0xBF6359B8BEF77538
+	pa1 = 4.14856118683748331666e-01  // 0x3FDA8D00AD92B34D
+	pa2 = -3.72207876035701323847e-01 // 0xBFD7D240FBB8C3F1
+	pa3 = 3.18346619901161753674e-01  // 0x3FD45FCA805120E4
+	pa4 = -1.10894694282396677476e-01 // 0xBFBC63983D3E28EC
+	pa5 = 3.54783043256182359371e-02  // 0x3FA22A36599795EB
+	pa6 = -2.16637559486879084300e-03 // 0xBF61BF380A96073F
+	qa1 = 1.06420880400844228286e-01  // 0x3FBB3E6618EEE323
+	qa2 = 5.40397917702171048937e-01  // 0x3FE14AF092EB6F33
+	qa3 = 7.18286544141962662868e-02  // 0x3FB2635CD99FE9A7
+	qa4 = 1.26171219808761642112e-01  // 0x3FC02660E763351F
+	qa5 = 1.36370839120290507362e-02  // 0x3F8BEDC26B51DD1C
+	qa6 = 1.19844998467991074170e-02  // 0x3F888B545735151D
+	// Coefficients for approximation to  erfc in [1.25, 1/0.35]
+	ra0 = -9.86494403484714822705e-03 // 0xBF843412600D6435
+	ra1 = -6.93858572707181764372e-01 // 0xBFE63416E4BA7360
+	ra2 = -1.05586262253232909814e+01 // 0xC0251E0441B0E726
+	ra3 = -6.23753324503260060396e+01 // 0xC04F300AE4CBA38D
+	ra4 = -1.62396669462573470355e+02 // 0xC0644CB184282266
+	ra5 = -1.84605092906711035994e+02 // 0xC067135CEBCCABB2
+	ra6 = -8.12874355063065934246e+01 // 0xC054526557E4D2F2
+	ra7 = -9.81432934416914548592e+00 // 0xC023A0EFC69AC25C
+	sa1 = 1.96512716674392571292e+01  // 0x4033A6B9BD707687
+	sa2 = 1.37657754143519042600e+02  // 0x4061350C526AE721
+	sa3 = 4.34565877475229228821e+02  // 0x407B290DD58A1A71
+	sa4 = 6.45387271733267880336e+02  // 0x40842B1921EC2868
+	sa5 = 4.29008140027567833386e+02  // 0x407AD02157700314
+	sa6 = 1.08635005541779435134e+02  // 0x405B28A3EE48AE2C
+	sa7 = 6.57024977031928170135e+00  // 0x401A47EF8E484A93
+	sa8 = -6.04244152148580987438e-02 // 0xBFAEEFF2EE749A62
+	// Coefficients for approximation to  erfc in [1/.35, 28]
+	rb0 = -9.86494292470009928597e-03 // 0xBF84341239E86F4A
+	rb1 = -7.99283237680523006574e-01 // 0xBFE993BA70C285DE
+	rb2 = -1.77579549177547519889e+01 // 0xC031C209555F995A
+	rb3 = -1.60636384855821916062e+02 // 0xC064145D43C5ED98
+	rb4 = -6.37566443368389627722e+02 // 0xC083EC881375F228
+	rb5 = -1.02509513161107724954e+03 // 0xC09004616A2E5992
+	rb6 = -4.83519191608651397019e+02 // 0xC07E384E9BDC383F
+	sb1 = 3.03380607434824582924e+01  // 0x403E568B261D5190
+	sb2 = 3.25792512996573918826e+02  // 0x40745CAE221B9F0A
+	sb3 = 1.53672958608443695994e+03  // 0x409802EB189D5118
+	sb4 = 3.19985821950859553908e+03  // 0x40A8FFB7688C246A
+	sb5 = 2.55305040643316442583e+03  // 0x40A3F219CEDF3BE6
+	sb6 = 4.74528541206955367215e+02  // 0x407DA874E79FE763
+	sb7 = -2.24409524465858183362e+01 // 0xC03670E242712D62
+)
+
+// Erf returns the error function of x.
+//
+// Special cases are:
+//
+//	Erf(+Inf) = 1
+//	Erf(-Inf) = -1
+//	Erf(NaN) = NaN
+func Erf(x float64) float64 {
+	if haveArchErf {
+		return archErf(x)
+	}
+	return erf(x)
+}
+
+func erf(x float64) float64 {
+	const (
+		VeryTiny = 2.848094538889218e-306 // 0x0080000000000000
+		Small    = 1.0 / (1 << 28)        // 2**-28
+	)
+	// special cases
+	switch {
+	case IsNaN(x):
+		return NaN()
+	case IsInf(x, 1):
+		return 1
+	case IsInf(x, -1):
+		return -1
+	}
+	sign := false
+	if x < 0 {
+		x = -x
+		sign = true
+	}
+	if x < 0.84375 { // |x| < 0.84375
+		var temp float64
+		if x < Small { // |x| < 2**-28
+			if x < VeryTiny {
+				temp = 0.125 * (8.0*x + efx8*x) // avoid underflow
+			} else {
+				temp = x + efx*x
+			}
+		} else {
+			z := x * x
+			r := pp0 + z*(pp1+z*(pp2+z*(pp3+z*pp4)))
+			s := 1 + z*(qq1+z*(qq2+z*(qq3+z*(qq4+z*qq5))))
+			y := r / s
+			temp = x + x*y
+		}
+		if sign {
+			return -temp
+		}
+		return temp
+	}
+	if x < 1.25 { // 0.84375 <= |x| < 1.25
+		s := x - 1
+		P := pa0 + s*(pa1+s*(pa2+s*(pa3+s*(pa4+s*(pa5+s*pa6)))))
+		Q := 1 + s*(qa1+s*(qa2+s*(qa3+s*(qa4+s*(qa5+s*qa6)))))
+		if sign {
+			return -erx - P/Q
+		}
+		return erx + P/Q
+	}
+	if x >= 6 { // inf > |x| >= 6
+		if sign {
+			return -1
+		}
+		return 1
+	}
+	s := 1 / (x * x)
+	var R, S float64
+	if x < 1/0.35 { // |x| < 1 / 0.35  ~ 2.857143
+		R = ra0 + s*(ra1+s*(ra2+s*(ra3+s*(ra4+s*(ra5+s*(ra6+s*ra7))))))
+		S = 1 + s*(sa1+s*(sa2+s*(sa3+s*(sa4+s*(sa5+s*(sa6+s*(sa7+s*sa8)))))))
+	} else { // |x| >= 1 / 0.35  ~ 2.857143
+		R = rb0 + s*(rb1+s*(rb2+s*(rb3+s*(rb4+s*(rb5+s*rb6)))))
+		S = 1 + s*(sb1+s*(sb2+s*(sb3+s*(sb4+s*(sb5+s*(sb6+s*sb7))))))
+	}
+	z := Float64frombits(Float64bits(x) & 0xffffffff00000000) // pseudo-single (20-bit) precision x
+	r := Exp(-z*z-0.5625) * Exp((z-x)*(z+x)+R/S)
+	if sign {
+		return r/x - 1
+	}
+	return 1 - r/x
+}
+
+// Erfc returns the complementary error function of x.
+//
+// Special cases are:
+//
+//	Erfc(+Inf) = 0
+//	Erfc(-Inf) = 2
+//	Erfc(NaN) = NaN
+func Erfc(x float64) float64 {
+	if haveArchErfc {
+		return archErfc(x)
+	}
+	return erfc(x)
+}
+
+func erfc(x float64) float64 {
+	const Tiny = 1.0 / (1 << 56) // 2**-56
+	// special cases
+	switch {
+	case IsNaN(x):
+		return NaN()
+	case IsInf(x, 1):
+		return 0
+	case IsInf(x, -1):
+		return 2
+	}
+	sign := false
+	if x < 0 {
+		x = -x
+		sign = true
+	}
+	if x < 0.84375 { // |x| < 0.84375
+		var temp float64
+		if x < Tiny { // |x| < 2**-56
+			temp = x
+		} else {
+			z := x * x
+			r := pp0 + z*(pp1+z*(pp2+z*(pp3+z*pp4)))
+			s := 1 + z*(qq1+z*(qq2+z*(qq3+z*(qq4+z*qq5))))
+			y := r / s
+			if x < 0.25 { // |x| < 1/4
+				temp = x + x*y
+			} else {
+				temp = 0.5 + (x*y + (x - 0.5))
+			}
+		}
+		if sign {
+			return 1 + temp
+		}
+		return 1 - temp
+	}
+	if x < 1.25 { // 0.84375 <= |x| < 1.25
+		s := x - 1
+		P := pa0 + s*(pa1+s*(pa2+s*(pa3+s*(pa4+s*(pa5+s*pa6)))))
+		Q := 1 + s*(qa1+s*(qa2+s*(qa3+s*(qa4+s*(qa5+s*qa6)))))
+		if sign {
+			return 1 + erx + P/Q
+		}
+		return 1 - erx - P/Q
+
+	}
+	if x < 28 { // |x| < 28
+		s := 1 / (x * x)
+		var R, S float64
+		if x < 1/0.35 { // |x| < 1 / 0.35 ~ 2.857143
+			R = ra0 + s*(ra1+s*(ra2+s*(ra3+s*(ra4+s*(ra5+s*(ra6+s*ra7))))))
+			S = 1 + s*(sa1+s*(sa2+s*(sa3+s*(sa4+s*(sa5+s*(sa6+s*(sa7+s*sa8)))))))
+		} else { // |x| >= 1 / 0.35 ~ 2.857143
+			if sign && x > 6 {
+				return 2 // x < -6
+			}
+			R = rb0 + s*(rb1+s*(rb2+s*(rb3+s*(rb4+s*(rb5+s*rb6)))))
+			S = 1 + s*(sb1+s*(sb2+s*(sb3+s*(sb4+s*(sb5+s*(sb6+s*sb7))))))
+		}
+		z := Float64frombits(Float64bits(x) & 0xffffffff00000000) // pseudo-single (20-bit) precision x
+		r := Exp(-z*z-0.5625) * Exp((z-x)*(z+x)+R/S)
+		if sign {
+			return 2 - r/x
+		}
+		return r / x
+	}
+	if sign {
+		return 2
+	}
+	return 0
+}
diff --git a/src/math/erf_s390x.s b/src/math/erf_s390x.s
new file mode 100644
index 0000000..99ab436
--- /dev/null
+++ b/src/math/erf_s390x.s
@@ -0,0 +1,293 @@
+// Copyright 2017 The Go Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+#include "textflag.h"
+
+// Minimax polynomial coefficients and other constants
+DATA ·erfrodataL13<> + 0(SB)/8, $0.243673229298474689E+01
+DATA ·erfrodataL13<> + 8(SB)/8, $-.654905018503145600E+00
+DATA ·erfrodataL13<> + 16(SB)/8, $0.404669310217538718E+01
+DATA ·erfrodataL13<> + 24(SB)/8, $-.564189219162765367E+00
+DATA ·erfrodataL13<> + 32(SB)/8, $-.200104300906596851E+01
+DATA ·erfrodataL13<> + 40(SB)/8, $0.5
+DATA ·erfrodataL13<> + 48(SB)/8, $0.144070097650207154E+00
+DATA ·erfrodataL13<> + 56(SB)/8, $-.116697735205906191E+00
+DATA ·erfrodataL13<> + 64(SB)/8, $0.256847684882319665E-01
+DATA ·erfrodataL13<> + 72(SB)/8, $-.510805169106229148E-02
+DATA ·erfrodataL13<> + 80(SB)/8, $0.885258164825590267E-03
+DATA ·erfrodataL13<> + 88(SB)/8, $-.133861989591931411E-03
+DATA ·erfrodataL13<> + 96(SB)/8, $0.178294867340272534E-04
+DATA ·erfrodataL13<> + 104(SB)/8, $-.211436095674019218E-05
+DATA ·erfrodataL13<> + 112(SB)/8, $0.225503753499344434E-06
+DATA ·erfrodataL13<> + 120(SB)/8, $-.218247939190783624E-07
+DATA ·erfrodataL13<> + 128(SB)/8, $0.193179206264594029E-08
+DATA ·erfrodataL13<> + 136(SB)/8, $-.157440643541715319E-09
+DATA ·erfrodataL13<> + 144(SB)/8, $0.118878583237342616E-10
+DATA ·erfrodataL13<> + 152(SB)/8, $0.554289288424588473E-13
+DATA ·erfrodataL13<> + 160(SB)/8, $-.277649758489502214E-14
+DATA ·erfrodataL13<> + 168(SB)/8, $-.839318416990049443E-12
+DATA ·erfrodataL13<> + 176(SB)/8, $-2.25
+DATA ·erfrodataL13<> + 184(SB)/8, $.12837916709551258632
+DATA ·erfrodataL13<> + 192(SB)/8, $1.0
+DATA ·erfrodataL13<> + 200(SB)/8, $0.500000000000004237e+00
+DATA ·erfrodataL13<> + 208(SB)/8, $1.0
+DATA ·erfrodataL13<> + 216(SB)/8, $0.416666664838056960e-01
+DATA ·erfrodataL13<> + 224(SB)/8, $0.166666666630345592e+00
+DATA ·erfrodataL13<> + 232(SB)/8, $0.138926439368309441e-02
+DATA ·erfrodataL13<> + 240(SB)/8, $0.833349307718286047e-02
+DATA ·erfrodataL13<> + 248(SB)/8, $-.693147180559945286e+00
+DATA ·erfrodataL13<> + 256(SB)/8, $-.144269504088896339e+01
+DATA ·erfrodataL13<> + 264(SB)/8, $281475245147134.9375
+DATA ·erfrodataL13<> + 272(SB)/8, $0.358256136398192529E+01
+DATA ·erfrodataL13<> + 280(SB)/8, $-.554084396500738270E+00
+DATA ·erfrodataL13<> + 288(SB)/8, $0.203630123025312046E+02
+DATA ·erfrodataL13<> + 296(SB)/8, $-.735750304705934424E+01
+DATA ·erfrodataL13<> + 304(SB)/8, $0.250491598091071797E+02
+DATA ·erfrodataL13<> + 312(SB)/8, $-.118955882760959931E+02
+DATA ·erfrodataL13<> + 320(SB)/8, $0.942903335085524187E+01
+DATA ·erfrodataL13<> + 328(SB)/8, $-.564189522219085689E+00
+DATA ·erfrodataL13<> + 336(SB)/8, $-.503767199403555540E+01
+DATA ·erfrodataL13<> + 344(SB)/8, $0xbbc79ca10c924223
+DATA ·erfrodataL13<> + 352(SB)/8, $0.004099975562609307E+01
+DATA ·erfrodataL13<> + 360(SB)/8, $-.324434353381296556E+00
+DATA ·erfrodataL13<> + 368(SB)/8, $0.945204812084476250E-01
+DATA ·erfrodataL13<> + 376(SB)/8, $-.221407443830058214E-01
+DATA ·erfrodataL13<> + 384(SB)/8, $0.426072376238804349E-02
+DATA ·erfrodataL13<> + 392(SB)/8, $-.692229229127016977E-03
+DATA ·erfrodataL13<> + 400(SB)/8, $0.971111253652087188E-04
+DATA ·erfrodataL13<> + 408(SB)/8, $-.119752226272050504E-04
+DATA ·erfrodataL13<> + 416(SB)/8, $0.131662993588532278E-05
+DATA ·erfrodataL13<> + 424(SB)/8, $0.115776482315851236E-07
+DATA ·erfrodataL13<> + 432(SB)/8, $-.780118522218151687E-09
+DATA ·erfrodataL13<> + 440(SB)/8, $-.130465975877241088E-06
+DATA ·erfrodataL13<> + 448(SB)/8, $-0.25
+GLOBL ·erfrodataL13<> + 0(SB), RODATA, $456
+
+// Table of log correction terms
+DATA ·erftab2066<> + 0(SB)/8, $0.442737824274138381e-01
+DATA ·erftab2066<> + 8(SB)/8, $0.263602189790660309e-01
+DATA ·erftab2066<> + 16(SB)/8, $0.122565642281703586e-01
+DATA ·erftab2066<> + 24(SB)/8, $0.143757052860721398e-02
+DATA ·erftab2066<> + 32(SB)/8, $-.651375034121276075e-02
+DATA ·erftab2066<> + 40(SB)/8, $-.119317678849450159e-01
+DATA ·erftab2066<> + 48(SB)/8, $-.150868749549871069e-01
+DATA ·erftab2066<> + 56(SB)/8, $-.161992609578469234e-01
+DATA ·erftab2066<> + 64(SB)/8, $-.154492360403337917e-01
+DATA ·erftab2066<> + 72(SB)/8, $-.129850717389178721e-01
+DATA ·erftab2066<> + 80(SB)/8, $-.892902649276657891e-02
+DATA ·erftab2066<> + 88(SB)/8, $-.338202636596794887e-02
+DATA ·erftab2066<> + 96(SB)/8, $0.357266307045684762e-02
+DATA ·erftab2066<> + 104(SB)/8, $0.118665304327406698e-01
+DATA ·erftab2066<> + 112(SB)/8, $0.214434994118118914e-01
+DATA ·erftab2066<> + 120(SB)/8, $0.322580645161290314e-01
+GLOBL ·erftab2066<> + 0(SB), RODATA, $128
+
+// Table of +/- 1.0
+DATA ·erftab12067<> + 0(SB)/8, $1.0
+DATA ·erftab12067<> + 8(SB)/8, $-1.0
+GLOBL ·erftab12067<> + 0(SB), RODATA, $16
+
+// Erf returns the error function of the argument.
+//
+// Special cases are:
+//      Erf(+Inf) = 1
+//      Erf(-Inf) = -1
+//      Erf(NaN) = NaN
+// The algorithm used is minimax polynomial approximation
+// with coefficients determined with a Remez exchange algorithm.
+
+TEXT	·erfAsm(SB), NOSPLIT, $0-16
+	FMOVD	x+0(FP), F0
+	MOVD	$·erfrodataL13<>+0(SB), R5
+	LGDR	F0, R1
+	FMOVD	F0, F6
+	SRAD	$48, R1
+	MOVH	$16383, R3
+	RISBGZ	$49, $63, $0, R1, R2
+	MOVW	R2, R6
+	MOVW	R3, R7
+	CMPBGT	R6, R7, L2
+	MOVH	$12287, R1
+	MOVW	R1, R7
+	CMPBLE	R6, R7 ,L12
+	MOVH	$16367, R1
+	MOVW	R1, R7
+	CMPBGT	R6, R7, L5
+	FMOVD	448(R5), F4
+	FMADD	F0, F0, F4
+	FMOVD	440(R5), F3
+	WFMDB	V4, V4, V2
+	FMOVD	432(R5), F0
+	FMOVD	424(R5), F1
+	WFMADB	V2, V0, V3, V0
+	FMOVD	416(R5), F3
+	WFMADB	V2, V1, V3, V1
+	FMOVD	408(R5), F5
+	FMOVD	400(R5), F3
+	WFMADB	V2, V0, V5, V0
+	WFMADB	V2, V1, V3, V1
+	FMOVD	392(R5), F5
+	FMOVD	384(R5), F3
+	WFMADB	V2, V0, V5, V0
+	WFMADB	V2, V1, V3, V1
+	FMOVD	376(R5), F5
+	FMOVD	368(R5), F3
+	WFMADB	V2, V0, V5, V0
+	WFMADB	V2, V1, V3, V1
+	FMOVD	360(R5), F5
+	FMOVD	352(R5), F3
+	WFMADB	V2, V0, V5, V0
+	WFMADB	V2, V1, V3, V2
+	WFMADB	V4, V0, V2, V0
+	WFMADB	V6, V0, V6, V0
+L1:
+	FMOVD	F0, ret+8(FP)
+	RET
+L2:
+	MOVH	R1, R1
+	MOVH	$16407, R3
+	SRW	$31, R1, R1
+	MOVW	R2, R6
+	MOVW	R3, R7
+	CMPBLE	R6, R7, L6
+	MOVW	R1, R1
+	SLD	$3, R1, R1
+	MOVD	$·erftab12067<>+0(SB), R3
+	WORD    $0x68013000     //ld %f0,0(%r1,%r3)
+	MOVH	$32751, R1
+	MOVW	R1, R7
+	CMPBGT	R6, R7, L7
+	FMOVD	344(R5), F2
+	FMADD	F2, F0, F0
+L7:
+	WFCEDBS	V6, V6, V2
+	BEQ	L1
+	FMOVD	F6, F0
+	FMOVD	F0, ret+8(FP)
+	RET
+
+L6:
+	MOVW	R1, R1
+	SLD	$3, R1, R1
+	MOVD	$·erftab12067<>+0(SB), R4
+	WFMDB	V0, V0, V1
+	MOVH	$0x0, R3
+	WORD    $0x68014000     //ld %f0,0(%r1,%r4)
+	MOVH	$16399, R1
+	MOVW	R2, R6
+	MOVW	R1, R7
+	CMPBGT	R6, R7, L8
+	FMOVD	336(R5), F3
+	FMOVD	328(R5), F2
+	FMOVD	F1, F4
+	WFMADB	V1, V2, V3, V2
+	WORD	$0xED405140	//adb %f4,.L30-.L13(%r5)
+	BYTE	$0x00
+	BYTE	$0x1A
+	FMOVD	312(R5), F3
+	WFMADB	V1, V2, V3, V2
+	FMOVD	304(R5), F3
+	WFMADB	V1, V4, V3, V4
+	FMOVD	296(R5), F3
+	WFMADB	V1, V2, V3, V2
+	FMOVD	288(R5), F3
+	WFMADB	V1, V4, V3, V4
+	FMOVD	280(R5), F3
+	WFMADB	V1, V2, V3, V2
+	FMOVD	272(R5), F3
+	WFMADB	V1, V4, V3, V4
+L9:
+	FMOVD	264(R5), F3
+	FMUL	F4, F6
+	FMOVD	256(R5), F4
+	WFMADB	V1, V4, V3, V4
+	FDIV	F6, F2
+	LGDR	F4, R1
+	FSUB	F3, F4
+	FMOVD	248(R5), F6
+	WFMSDB	V4, V6, V1, V4
+	FMOVD	240(R5), F1
+	FMOVD	232(R5), F6
+	WFMADB	V4, V6, V1, V6
+	FMOVD	224(R5), F1
+	FMOVD	216(R5), F3
+	WFMADB	V4, V3, V1, V3
+	WFMDB	V4, V4, V1
+	FMOVD	208(R5), F5
+	WFMADB	V6, V1, V3, V6
+	FMOVD	200(R5), F3
+	MOVH	R1,R1
+	WFMADB	V4, V3, V5, V3
+	RISBGZ	$57, $60, $3, R1, R2
+	WFMADB	V1, V6, V3, V6
+	RISBGN	$0, $15, $48, R1, R3
+	MOVD	$·erftab2066<>+0(SB), R1
+	FMOVD	192(R5), F1
+	LDGR	R3, F3
+	WORD	$0xED221000	//madb %f2,%f2,0(%r2,%r1)
+	BYTE	$0x20
+	BYTE	$0x1E
+	WFMADB	V4, V6, V1, V4
+	FMUL	F3, F2
+	FMADD	F4, F2, F0
+	FMOVD	F0, ret+8(FP)
+	RET
+L12:
+	FMOVD	184(R5), F0
+	WFMADB	V6, V0, V6, V0
+	FMOVD	F0, ret+8(FP)
+	RET
+L5:
+	FMOVD	176(R5), F1
+	FMADD	F0, F0, F1
+	FMOVD	168(R5), F3
+	WFMDB	V1, V1, V2
+	FMOVD	160(R5), F0
+	FMOVD	152(R5), F4
+	WFMADB	V2, V0, V3, V0
+	FMOVD	144(R5), F3
+	WFMADB	V2, V4, V3, V4
+	FMOVD	136(R5), F5
+	FMOVD	128(R5), F3
+	WFMADB	V2, V0, V5, V0
+	WFMADB	V2, V4, V3, V4
+	FMOVD	120(R5), F5
+	FMOVD	112(R5), F3
+	WFMADB	V2, V0, V5, V0
+	WFMADB	V2, V4, V3, V4
+	FMOVD	104(R5), F5
+	FMOVD	96(R5), F3
+	WFMADB	V2, V0, V5, V0
+	WFMADB	V2, V4, V3, V4
+	FMOVD	88(R5), F5
+	FMOVD	80(R5), F3
+	WFMADB	V2, V0, V5, V0
+	WFMADB	V2, V4, V3, V4
+	FMOVD	72(R5), F5
+	FMOVD	64(R5), F3
+	WFMADB	V2, V0, V5, V0
+	WFMADB	V2, V4, V3, V4
+	FMOVD	56(R5), F5
+	FMOVD	48(R5), F3
+	WFMADB	V2, V0, V5, V0
+	WFMADB	V2, V4, V3, V2
+	FMOVD	40(R5), F4
+	WFMADB	V1, V0, V2, V0
+	FMUL	F6, F0
+	FMADD	F4, F6, F0
+	FMOVD	F0, ret+8(FP)
+	RET
+L8:
+	FMOVD	32(R5), F3
+	FMOVD	24(R5), F2
+	FMOVD	F1, F4
+	WFMADB	V1, V2, V3, V2
+	WORD	$0xED405010	//adb %f4,.L68-.L13(%r5)
+	BYTE	$0x00
+	BYTE	$0x1A
+	FMOVD	8(R5), F3
+	WFMADB	V1, V2, V3, V2
+	FMOVD	·erfrodataL13<>+0(SB), F3
+	WFMADB	V1, V4, V3, V4
+	BR	L9
diff --git a/src/math/erfc_s390x.s b/src/math/erfc_s390x.s
new file mode 100644
index 0000000..7e9d469
--- /dev/null
+++ b/src/math/erfc_s390x.s
@@ -0,0 +1,527 @@
+// Copyright 2017 The Go Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+#include "textflag.h"
+
+#define Neg2p11 0xC000E147AE147AE1
+#define Pos15   0x402E
+
+// Minimax polynomial coefficients and other constants
+DATA ·erfcrodataL38<> + 0(SB)/8, $.234875460637085087E-01
+DATA ·erfcrodataL38<> + 8(SB)/8, $.234469449299256284E-01
+DATA ·erfcrodataL38<> + 16(SB)/8, $-.606918710392844955E-04
+DATA ·erfcrodataL38<> + 24(SB)/8, $-.198827088077636213E-04
+DATA ·erfcrodataL38<> + 32(SB)/8, $.257805645845475331E-06
+DATA ·erfcrodataL38<> + 40(SB)/8, $-.184427218110620284E-09
+DATA ·erfcrodataL38<> + 48(SB)/8, $.122408098288933181E-10
+DATA ·erfcrodataL38<> + 56(SB)/8, $.484691106751495392E-07
+DATA ·erfcrodataL38<> + 64(SB)/8, $-.150147637632890281E-08
+DATA ·erfcrodataL38<> + 72(SB)/8, $23.999999999973521625
+DATA ·erfcrodataL38<> + 80(SB)/8, $27.226017111108365754
+DATA ·erfcrodataL38<> + 88(SB)/8, $-2.0
+DATA ·erfcrodataL38<> + 96(SB)/8, $0.100108802034478228E+00
+DATA ·erfcrodataL38<> + 104(SB)/8, $0.244588413746558125E+00
+DATA ·erfcrodataL38<> + 112(SB)/8, $-.669188879646637174E-01
+DATA ·erfcrodataL38<> + 120(SB)/8, $0.151311447000953551E-01
+DATA ·erfcrodataL38<> + 128(SB)/8, $-.284720833493302061E-02
+DATA ·erfcrodataL38<> + 136(SB)/8, $0.455491239358743212E-03
+DATA ·erfcrodataL38<> + 144(SB)/8, $-.631850539280720949E-04
+DATA ·erfcrodataL38<> + 152(SB)/8, $0.772532660726086679E-05
+DATA ·erfcrodataL38<> + 160(SB)/8, $-.843706007150936940E-06
+DATA ·erfcrodataL38<> + 168(SB)/8, $-.735330214904227472E-08
+DATA ·erfcrodataL38<> + 176(SB)/8, $0.753002008837084967E-09
+DATA ·erfcrodataL38<> + 184(SB)/8, $0.832482036660624637E-07
+DATA ·erfcrodataL38<> + 192(SB)/8, $-0.75
+DATA ·erfcrodataL38<> + 200(SB)/8, $.927765678007128609E-01
+DATA ·erfcrodataL38<> + 208(SB)/8, $.903621209344751506E-01
+DATA ·erfcrodataL38<> + 216(SB)/8, $-.344203375025257265E-02
+DATA ·erfcrodataL38<> + 224(SB)/8, $-.869243428221791329E-03
+DATA ·erfcrodataL38<> + 232(SB)/8, $.174699813107105603E-03
+DATA ·erfcrodataL38<> + 240(SB)/8, $.649481036316130000E-05
+DATA ·erfcrodataL38<> + 248(SB)/8, $-.895265844897118382E-05
+DATA ·erfcrodataL38<> + 256(SB)/8, $.135970046909529513E-05
+DATA ·erfcrodataL38<> + 264(SB)/8, $.277617717014748015E-06
+DATA ·erfcrodataL38<> + 272(SB)/8, $.810628018408232910E-08
+DATA ·erfcrodataL38<> + 280(SB)/8, $.210430084693497985E-07
+DATA ·erfcrodataL38<> + 288(SB)/8, $-.342138077525615091E-08
+DATA ·erfcrodataL38<> + 296(SB)/8, $-.165467946798610800E-06
+DATA ·erfcrodataL38<> + 304(SB)/8, $5.999999999988412824
+DATA ·erfcrodataL38<> + 312(SB)/8, $.468542210149072159E-01
+DATA ·erfcrodataL38<> + 320(SB)/8, $.465343528567604256E-01
+DATA ·erfcrodataL38<> + 328(SB)/8, $-.473338083650201733E-03
+DATA ·erfcrodataL38<> + 336(SB)/8, $-.147220659069079156E-03
+DATA ·erfcrodataL38<> + 344(SB)/8, $.755284723554388339E-05
+DATA ·erfcrodataL38<> + 352(SB)/8, $.116158570631428789E-05
+DATA ·erfcrodataL38<> + 360(SB)/8, $-.155445501551602389E-06
+DATA ·erfcrodataL38<> + 368(SB)/8, $-.616940119847805046E-10
+DATA ·erfcrodataL38<> + 376(SB)/8, $-.728705590727563158E-10
+DATA ·erfcrodataL38<> + 384(SB)/8, $-.983452460354586779E-08
+DATA ·erfcrodataL38<> + 392(SB)/8, $.365156164194346316E-08
+DATA ·erfcrodataL38<> + 400(SB)/8, $11.999999999996530775
+DATA ·erfcrodataL38<> + 408(SB)/8, $0.467773498104726584E-02
+DATA ·erfcrodataL38<> + 416(SB)/8, $0.206669853540920535E-01
+DATA ·erfcrodataL38<> + 424(SB)/8, $0.413339707081841473E-01
+DATA ·erfcrodataL38<> + 432(SB)/8, $0.482229658262131320E-01
+DATA ·erfcrodataL38<> + 440(SB)/8, $0.344449755901841897E-01
+DATA ·erfcrodataL38<> + 448(SB)/8, $0.130890907240765465E-01
+DATA ·erfcrodataL38<> + 456(SB)/8, $-.459266344100642687E-03
+DATA ·erfcrodataL38<> + 464(SB)/8, $-.337888800856913728E-02
+DATA ·erfcrodataL38<> + 472(SB)/8, $-.159103061687062373E-02
+DATA ·erfcrodataL38<> + 480(SB)/8, $-.501128905515922644E-04
+DATA ·erfcrodataL38<> + 488(SB)/8, $0.262775855852903132E-03
+DATA ·erfcrodataL38<> + 496(SB)/8, $0.103860982197462436E-03
+DATA ·erfcrodataL38<> + 504(SB)/8, $-.548835785414200775E-05
+DATA ·erfcrodataL38<> + 512(SB)/8, $-.157075054646618214E-04
+DATA ·erfcrodataL38<> + 520(SB)/8, $-.480056366276045110E-05
+DATA ·erfcrodataL38<> + 528(SB)/8, $0.198263013759701555E-05
+DATA ·erfcrodataL38<> + 536(SB)/8, $-.224394262958888780E-06
+DATA ·erfcrodataL38<> + 544(SB)/8, $-.321853693146683428E-06
+DATA ·erfcrodataL38<> + 552(SB)/8, $0.445073894984683537E-07
+DATA ·erfcrodataL38<> + 560(SB)/8, $0.660425940000555729E-06
+DATA ·erfcrodataL38<> + 568(SB)/8, $2.0
+DATA ·erfcrodataL38<> + 576(SB)/8, $8.63616855509444462538e-78
+DATA ·erfcrodataL38<> + 584(SB)/8, $1.00000000000000222044
+DATA ·erfcrodataL38<> + 592(SB)/8, $0.500000000000004237e+00
+DATA ·erfcrodataL38<> + 600(SB)/8, $0.416666664838056960e-01
+DATA ·erfcrodataL38<> + 608(SB)/8, $0.166666666630345592e+00
+DATA ·erfcrodataL38<> + 616(SB)/8, $0.138926439368309441e-02
+DATA ·erfcrodataL38<> + 624(SB)/8, $0.833349307718286047e-02
+DATA ·erfcrodataL38<> + 632(SB)/8, $-.693147180558298714e+00
+DATA ·erfcrodataL38<> + 640(SB)/8, $-.164659495826017651e-11
+DATA ·erfcrodataL38<> + 648(SB)/8, $.179001151181866548E+00
+DATA ·erfcrodataL38<> + 656(SB)/8, $-.144269504088896339e+01
+DATA ·erfcrodataL38<> + 664(SB)/8, $+281475245147134.9375
+DATA ·erfcrodataL38<> + 672(SB)/8, $.163116780021877404E+00
+DATA ·erfcrodataL38<> + 680(SB)/8, $-.201574395828120710E-01
+DATA ·erfcrodataL38<> + 688(SB)/8, $-.185726336009394125E-02
+DATA ·erfcrodataL38<> + 696(SB)/8, $.199349204957273749E-02
+DATA ·erfcrodataL38<> + 704(SB)/8, $-.554902415532606242E-03
+DATA ·erfcrodataL38<> + 712(SB)/8, $-.638914789660242846E-05
+DATA ·erfcrodataL38<> + 720(SB)/8, $-.424441522653742898E-04
+DATA ·erfcrodataL38<> + 728(SB)/8, $.827967511921486190E-04
+DATA ·erfcrodataL38<> + 736(SB)/8, $.913965446284062654E-05
+DATA ·erfcrodataL38<> + 744(SB)/8, $.277344791076320853E-05
+DATA ·erfcrodataL38<> + 752(SB)/8, $-.467239678927239526E-06
+DATA ·erfcrodataL38<> + 760(SB)/8, $.344814065920419986E-07
+DATA ·erfcrodataL38<> + 768(SB)/8, $-.366013491552527132E-05
+DATA ·erfcrodataL38<> + 776(SB)/8, $.181242810023783439E-05
+DATA ·erfcrodataL38<> + 784(SB)/8, $2.999999999991234567
+DATA ·erfcrodataL38<> + 792(SB)/8, $1.0
+GLOBL ·erfcrodataL38<> + 0(SB), RODATA, $800
+
+// Table of log correction terms
+DATA ·erfctab2069<> + 0(SB)/8, $0.442737824274138381e-01
+DATA ·erfctab2069<> + 8(SB)/8, $0.263602189790660309e-01
+DATA ·erfctab2069<> + 16(SB)/8, $0.122565642281703586e-01
+DATA ·erfctab2069<> + 24(SB)/8, $0.143757052860721398e-02
+DATA ·erfctab2069<> + 32(SB)/8, $-.651375034121276075e-02
+DATA ·erfctab2069<> + 40(SB)/8, $-.119317678849450159e-01
+DATA ·erfctab2069<> + 48(SB)/8, $-.150868749549871069e-01
+DATA ·erfctab2069<> + 56(SB)/8, $-.161992609578469234e-01
+DATA ·erfctab2069<> + 64(SB)/8, $-.154492360403337917e-01
+DATA ·erfctab2069<> + 72(SB)/8, $-.129850717389178721e-01
+DATA ·erfctab2069<> + 80(SB)/8, $-.892902649276657891e-02
+DATA ·erfctab2069<> + 88(SB)/8, $-.338202636596794887e-02
+DATA ·erfctab2069<> + 96(SB)/8, $0.357266307045684762e-02
+DATA ·erfctab2069<> + 104(SB)/8, $0.118665304327406698e-01
+DATA ·erfctab2069<> + 112(SB)/8, $0.214434994118118914e-01
+DATA ·erfctab2069<> + 120(SB)/8, $0.322580645161290314e-01
+GLOBL ·erfctab2069<> + 0(SB), RODATA, $128
+
+// Erfc returns the complementary error function of the argument.
+//
+// Special cases are:
+//      Erfc(+Inf) = 0
+//      Erfc(-Inf) = 2
+//      Erfc(NaN) = NaN
+// The algorithm used is minimax polynomial approximation
+// with coefficients determined with a Remez exchange algorithm.
+// This assembly implementation handles inputs in the range [-2.11, +15].
+// For all other inputs we call the generic Go implementation.
+
+TEXT	·erfcAsm(SB), NOSPLIT|NOFRAME, $0-16
+	MOVD	x+0(FP), R1
+	MOVD	$Neg2p11, R2
+	CMPUBGT	R1, R2, usego
+
+	FMOVD	x+0(FP), F0
+	MOVD	$·erfcrodataL38<>+0(SB), R9
+	FMOVD	F0, F2
+	SRAD	$48, R1
+	MOVH	R1, R2
+	ANDW	$0x7FFF, R1
+	MOVH	$Pos15, R3
+	CMPW	R1, R3
+	BGT	usego
+	MOVH	$0x3FFF, R3
+	MOVW	R1, R6
+	MOVW	R3, R7
+	CMPBGT	R6, R7, L2
+	MOVH	$0x3FEF, R3
+	MOVW	R3, R7
+	CMPBGT	R6, R7, L3
+	MOVH	$0x2FFF, R2
+	MOVW	R2, R7
+	CMPBGT	R6, R7, L4
+	FMOVD	792(R9), F0
+	WFSDB	V2, V0, V2
+	FMOVD	F2, ret+8(FP)
+	RET
+
+L2:
+	LTDBR	F0, F0
+	MOVH	$0x0, R4
+	BLTU	L3
+	FMOVD	F0, F1
+L9:
+	MOVH	$0x400F, R3
+	MOVW	R1, R6
+	MOVW	R3, R7
+	CMPBGT	R6, R7, L10
+	FMOVD	784(R9), F3
+	FSUB	F1, F3
+	VLEG	$0, 776(R9), V20
+	WFDDB	V1, V3, V6
+	VLEG	$0, 768(R9), V18
+	FMOVD	760(R9), F7
+	FMOVD	752(R9), F5
+	VLEG	$0, 744(R9), V16
+	FMOVD	736(R9), F3
+	FMOVD	728(R9), F2
+	FMOVD	720(R9), F4
+	WFMDB	V6, V6, V1
+	FMUL	F0, F0
+	MOVH	$0x0, R3
+	WFMADB	V1, V7, V20, V7
+	WFMADB	V1, V5, V18, V5
+	WFMADB	V1, V7, V16, V7
+	WFMADB	V1, V5, V3, V5
+	WFMADB	V1, V7, V4, V7
+	WFMADB	V1, V5, V2, V5
+	FMOVD	712(R9), F2
+	WFMADB	V1, V7, V2, V7
+	FMOVD	704(R9), F2
+	WFMADB	V1, V5, V2, V5
+	FMOVD	696(R9), F2
+	WFMADB	V1, V7, V2, V7
+	FMOVD	688(R9), F2
+	MOVH	$0x0, R1
+	WFMADB	V1, V5, V2, V5
+	FMOVD	680(R9), F2
+	WFMADB	V1, V7, V2, V7
+	FMOVD	672(R9), F2
+	WFMADB	V1, V5, V2, V1
+	FMOVD	664(R9), F3
+	WFMADB	V6, V7, V1, V7
+	FMOVD	656(R9), F5
+	FMOVD	648(R9), F2
+	WFMADB	V0, V5, V3, V5
+	WFMADB	V6, V7, V2, V7
+L11:
+	LGDR	F5, R6
+	WFSDB	V0, V0, V2
+	WORD	$0xED509298	//sdb	%f5,.L55-.L38(%r9)
+	BYTE	$0x00
+	BYTE	$0x1B
+	FMOVD	640(R9), F6
+	FMOVD	632(R9), F4
+	WFMSDB	V5, V6, V2, V6
+	WFMSDB	V5, V4, V0, V4
+	FMOVD	624(R9), F2
+	FADD	F6, F4
+	FMOVD	616(R9), F0
+	FMOVD	608(R9), F6
+	WFMADB	V4, V0, V2, V0
+	FMOVD	600(R9), F3
+	WFMDB	V4, V4, V2
+	MOVH	R6,R6
+	ADD	R6, R3
+	WFMADB	V4, V3, V6, V3
+	FMOVD	592(R9), F6
+	WFMADB	V0, V2, V3, V0
+	FMOVD	584(R9), F3
+	WFMADB	V4, V6, V3, V6
+	RISBGZ	$57, $60, $3, R3, R12
+	WFMADB	V2, V0, V6, V0
+	MOVD	$·erfctab2069<>+0(SB), R5
+	WORD	$0x682C5000	//ld	%f2,0(%r12,%r5)
+	FMADD	F2, F4, F4
+	RISBGN	$0, $15, $48, R3, R4
+	WFMADB	V4, V0, V2, V4
+	LDGR	R4, F2
+	FMADD	F4, F2, F2
+	MOVW	R2, R6
+	CMPBLE	R6, $0, L20
+	MOVW	R1, R6
+	CMPBEQ	R6, $0, L21
+	WORD	$0xED709240	//mdb	%f7,.L66-.L38(%r9)
+	BYTE	$0x00
+	BYTE	$0x1C
+L21:
+	FMUL	F7, F2
+L1:
+	FMOVD	F2, ret+8(FP)
+	RET
+L3:
+	LTDBR	F0, F0
+	BLTU	L30
+	FMOVD	568(R9), F2
+	WFSDB	V0, V2, V0
+L8:
+	WFMDB	V0, V0, V4
+	FMOVD	560(R9), F2
+	FMOVD	552(R9), F6
+	FMOVD	544(R9), F1
+	WFMADB	V4, V6, V2, V6
+	FMOVD	536(R9), F2
+	WFMADB	V4, V1, V2, V1
+	FMOVD	528(R9), F3
+	FMOVD	520(R9), F2
+	WFMADB	V4, V6, V3, V6
+	WFMADB	V4, V1, V2, V1
+	FMOVD	512(R9), F3
+	FMOVD	504(R9), F2
+	WFMADB	V4, V6, V3, V6
+	WFMADB	V4, V1, V2, V1
+	FMOVD	496(R9), F3
+	FMOVD	488(R9), F2
+	WFMADB	V4, V6, V3, V6
+	WFMADB	V4, V1, V2, V1
+	FMOVD	480(R9), F3
+	FMOVD	472(R9), F2
+	WFMADB	V4, V6, V3, V6
+	WFMADB	V4, V1, V2, V1
+	FMOVD	464(R9), F3
+	FMOVD	456(R9), F2
+	WFMADB	V4, V6, V3, V6
+	WFMADB	V4, V1, V2, V1
+	FMOVD	448(R9), F3
+	FMOVD	440(R9), F2
+	WFMADB	V4, V6, V3, V6
+	WFMADB	V4, V1, V2, V1
+	FMOVD	432(R9), F3
+	FMOVD	424(R9), F2
+	WFMADB	V4, V6, V3, V6
+	WFMADB	V4, V1, V2, V1
+	FMOVD	416(R9), F3
+	FMOVD	408(R9), F2
+	WFMADB	V4, V6, V3, V6
+	FMADD	F1, F4, F2
+	FMADD	F6, F0, F2
+	MOVW	R2, R6
+	CMPBGE	R6, $0, L1
+	FMOVD	568(R9), F0
+	WFSDB	V2, V0, V2
+	BR	L1
+L10:
+	MOVH	$0x401F, R3
+	MOVW	R1, R6
+	MOVW	R3, R7
+	CMPBLE	R6, R7, L36
+	MOVH	$0x402F, R3
+	MOVW	R3, R7
+	CMPBGT	R6, R7, L13
+	FMOVD	400(R9), F3
+	FSUB	F1, F3
+	VLEG	$0, 392(R9), V20
+	WFDDB	V1, V3, V6
+	VLEG	$0, 384(R9), V18
+	FMOVD	376(R9), F2
+	FMOVD	368(R9), F4
+	VLEG	$0, 360(R9), V16
+	FMOVD	352(R9), F7
+	FMOVD	344(R9), F3
+	FMUL	F0, F0
+	WFMDB	V6, V6, V1
+	FMOVD	656(R9), F5
+	MOVH	$0x0, R3
+	WFMADB	V1, V2, V20, V2
+	WFMADB	V1, V4, V18, V4
+	WFMADB	V1, V2, V16, V2
+	WFMADB	V1, V4, V7, V4
+	WFMADB	V1, V2, V3, V2
+	FMOVD	336(R9), F3
+	WFMADB	V1, V4, V3, V4
+	FMOVD	328(R9), F3
+	WFMADB	V1, V2, V3, V2
+	FMOVD	320(R9), F3
+	WFMADB	V1, V4, V3, V1
+	FMOVD	312(R9), F7
+	WFMADB	V6, V2, V1, V2
+	MOVH	$0x0, R1
+	FMOVD	664(R9), F3
+	FMADD	F2, F6, F7
+	WFMADB	V0, V5, V3, V5
+	BR	L11
+L35:
+	WORD	$0xB3130010	//lcdbr	%f1,%f0
+	BR	L9
+L36:
+	FMOVD	304(R9), F3
+	FSUB	F1, F3
+	VLEG	$0, 296(R9), V20
+	WFDDB	V1, V3, V6
+	FMOVD	288(R9), F5
+	FMOVD	280(R9), F1
+	FMOVD	272(R9), F2
+	VLEG	$0, 264(R9), V18
+	VLEG	$0, 256(R9), V16
+	FMOVD	248(R9), F3
+	FMOVD	240(R9), F4
+	WFMDB	V6, V6, V7
+	FMUL	F0, F0
+	MOVH	$0x0, R3
+	FMADD	F5, F7, F1
+	WFMADB	V7, V2, V20, V2
+	WFMADB	V7, V1, V18, V1
+	WFMADB	V7, V2, V16, V2
+	WFMADB	V7, V1, V3, V1
+	WFMADB	V7, V2, V4, V2
+	FMOVD	232(R9), F4
+	WFMADB	V7, V1, V4, V1
+	FMOVD	224(R9), F4
+	WFMADB	V7, V2, V4, V2
+	FMOVD	216(R9), F4
+	WFMADB	V7, V1, V4, V1
+	FMOVD	208(R9), F4
+	MOVH	$0x0, R1
+	WFMADB	V7, V2, V4, V7
+	FMOVD	656(R9), F5
+	WFMADB	V6, V1, V7, V1
+	FMOVD	664(R9), F3
+	FMOVD	200(R9), F7
+	WFMADB	V0, V5, V3, V5
+	FMADD	F1, F6, F7
+	BR	L11
+L4:
+	FMOVD	192(R9), F1
+	FMADD	F0, F0, F1
+	FMOVD	184(R9), F3
+	WFMDB	V1, V1, V0
+	FMOVD	176(R9), F4
+	FMOVD	168(R9), F6
+	WFMADB	V0, V4, V3, V4
+	FMOVD	160(R9), F3
+	WFMADB	V0, V6, V3, V6
+	FMOVD	152(R9), F5
+	FMOVD	144(R9), F3
+	WFMADB	V0, V4, V5, V4
+	WFMADB	V0, V6, V3, V6
+	FMOVD	136(R9), F5
+	FMOVD	128(R9), F3
+	WFMADB	V0, V4, V5, V4
+	WFMADB	V0, V6, V3, V6
+	FMOVD	120(R9), F5
+	FMOVD	112(R9), F3
+	WFMADB	V0, V4, V5, V4
+	WFMADB	V0, V6, V3, V6
+	FMOVD	104(R9), F5
+	FMOVD	96(R9), F3
+	WFMADB	V0, V4, V5, V4
+	WFMADB	V0, V6, V3, V0
+	FMOVD	F2, F6
+	FMADD	F4, F1, F0
+	WORD	$0xED609318	//sdb	%f6,.L39-.L38(%r9)
+	BYTE	$0x00
+	BYTE	$0x1B
+	WFMSDB	V2, V0, V6, V2
+	FMOVD	F2, ret+8(FP)
+	RET
+L30:
+	WORD	$0xED009238	//adb	%f0,.L67-.L38(%r9)
+	BYTE	$0x00
+	BYTE	$0x1A
+	BR	L8
+L20:
+	FMOVD	88(R9), F0
+	WFMADB	V7, V2, V0, V2
+	WORD	$0xB3130022	//lcdbr	%f2,%f2
+	FMOVD	F2, ret+8(FP)
+	RET
+L13:
+	MOVH	$0x403A, R3
+	MOVW	R1, R6
+	MOVW	R3, R7
+	CMPBLE	R6, R7, L4
+	WORD	$0xED109050	//cdb	%f1,.L128-.L38(%r9)
+	BYTE	$0x00
+	BYTE	$0x19
+	BGE	L37
+	BVS	L37
+	FMOVD	72(R9), F6
+	FSUB	F1, F6
+	MOVH	$0x1000, R3
+	FDIV	F1, F6
+	MOVH	$0x1000, R1
+L17:
+	WFMDB	V6, V6, V1
+	FMOVD	64(R9), F2
+	FMOVD	56(R9), F4
+	FMOVD	48(R9), F3
+	WFMADB	V1, V3, V2, V3
+	FMOVD	40(R9), F2
+	WFMADB	V1, V2, V4, V2
+	FMOVD	32(R9), F4
+	WFMADB	V1, V3, V4, V3
+	FMOVD	24(R9), F4
+	WFMADB	V1, V2, V4, V2
+	FMOVD	16(R9), F4
+	WFMADB	V1, V3, V4, V3
+	FMOVD	8(R9), F4
+	WFMADB	V1, V2, V4, V1
+	FMUL	F0, F0
+	WFMADB	V3, V6, V1, V3
+	FMOVD	656(R9), F5
+	FMOVD	664(R9), F4
+	FMOVD	0(R9), F7
+	WFMADB	V0, V5, V4, V5
+	FMADD	F6, F3, F7
+	BR	L11
+L14:
+	FMOVD	72(R9), F6
+	FSUB	F1, F6
+	MOVH	$0x403A, R3
+	FDIV	F1, F6
+	MOVW	R1, R6
+	MOVW	R3, R7
+	CMPBEQ	R6, R7, L23
+	MOVH	$0x0, R3
+	MOVH	$0x0, R1
+	BR	L17
+L37:
+	WFCEDBS	V0, V0, V0
+	BVS	L1
+	MOVW	R2, R6
+	CMPBLE	R6, $0, L18
+	MOVH	$0x7FEF, R2
+	MOVW	R1, R6
+	MOVW	R2, R7
+	CMPBGT	R6, R7, L24
+
+	WORD	$0xA5400010	//iihh	%r4,16
+	LDGR	R4, F2
+	FMUL	F2, F2
+	BR	L1
+L23:
+	MOVH	$0x1000, R3
+	MOVH	$0x1000, R1
+	BR	L17
+L24:
+	FMOVD	$0, F2
+	BR	L1
+L18:
+	MOVH	$0x7FEF, R2
+	MOVW	R1, R6
+	MOVW	R2, R7
+	CMPBGT	R6, R7, L25
+	WORD	$0xA5408010	//iihh	%r4,32784
+	FMOVD	568(R9), F2
+	LDGR	R4, F0
+	FMADD	F2, F0, F2
+	BR	L1
+L25:
+	FMOVD	568(R9), F2
+	BR	L1
+usego:
+	BR	·erfc(SB)
diff --git a/src/math/erfinv.go b/src/math/erfinv.go
new file mode 100644
index 0000000..eed0feb
--- /dev/null
+++ b/src/math/erfinv.go
@@ -0,0 +1,129 @@
+// Copyright 2017 The Go Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+package math
+
+/*
+	Inverse of the floating-point error function.
+*/
+
+// This implementation is based on the rational approximation
+// of percentage points of normal distribution available from
+// https://www.jstor.org/stable/2347330.
+
+const (
+	// Coefficients for approximation to erf in |x| <= 0.85
+	a0 = 1.1975323115670912564578e0
+	a1 = 4.7072688112383978012285e1
+	a2 = 6.9706266534389598238465e2
+	a3 = 4.8548868893843886794648e3
+	a4 = 1.6235862515167575384252e4
+	a5 = 2.3782041382114385731252e4
+	a6 = 1.1819493347062294404278e4
+	a7 = 8.8709406962545514830200e2
+	b0 = 1.0000000000000000000e0
+	b1 = 4.2313330701600911252e1
+	b2 = 6.8718700749205790830e2
+	b3 = 5.3941960214247511077e3
+	b4 = 2.1213794301586595867e4
+	b5 = 3.9307895800092710610e4
+	b6 = 2.8729085735721942674e4
+	b7 = 5.2264952788528545610e3
+	// Coefficients for approximation to erf in 0.85 < |x| <= 1-2*exp(-25)
+	c0 = 1.42343711074968357734e0
+	c1 = 4.63033784615654529590e0
+	c2 = 5.76949722146069140550e0
+	c3 = 3.64784832476320460504e0
+	c4 = 1.27045825245236838258e0
+	c5 = 2.41780725177450611770e-1
+	c6 = 2.27238449892691845833e-2
+	c7 = 7.74545014278341407640e-4
+	d0 = 1.4142135623730950488016887e0
+	d1 = 2.9036514445419946173133295e0
+	d2 = 2.3707661626024532365971225e0
+	d3 = 9.7547832001787427186894837e-1
+	d4 = 2.0945065210512749128288442e-1
+	d5 = 2.1494160384252876777097297e-2
+	d6 = 7.7441459065157709165577218e-4
+	d7 = 1.4859850019840355905497876e-9
+	// Coefficients for approximation to erf in 1-2*exp(-25) < |x| < 1
+	e0 = 6.65790464350110377720e0
+	e1 = 5.46378491116411436990e0
+	e2 = 1.78482653991729133580e0
+	e3 = 2.96560571828504891230e-1
+	e4 = 2.65321895265761230930e-2
+	e5 = 1.24266094738807843860e-3
+	e6 = 2.71155556874348757815e-5
+	e7 = 2.01033439929228813265e-7
+	f0 = 1.414213562373095048801689e0
+	f1 = 8.482908416595164588112026e-1
+	f2 = 1.936480946950659106176712e-1
+	f3 = 2.103693768272068968719679e-2
+	f4 = 1.112800997078859844711555e-3
+	f5 = 2.611088405080593625138020e-5
+	f6 = 2.010321207683943062279931e-7
+	f7 = 2.891024605872965461538222e-15
+)
+
+// Erfinv returns the inverse error function of x.
+//
+// Special cases are:
+//
+//	Erfinv(1) = +Inf
+//	Erfinv(-1) = -Inf
+//	Erfinv(x) = NaN if x < -1 or x > 1
+//	Erfinv(NaN) = NaN
+func Erfinv(x float64) float64 {
+	// special cases
+	if IsNaN(x) || x <= -1 || x >= 1 {
+		if x == -1 || x == 1 {
+			return Inf(int(x))
+		}
+		return NaN()
+	}
+
+	sign := false
+	if x < 0 {
+		x = -x
+		sign = true
+	}
+
+	var ans float64
+	if x <= 0.85 { // |x| <= 0.85
+		r := 0.180625 - 0.25*x*x
+		z1 := ((((((a7*r+a6)*r+a5)*r+a4)*r+a3)*r+a2)*r+a1)*r + a0
+		z2 := ((((((b7*r+b6)*r+b5)*r+b4)*r+b3)*r+b2)*r+b1)*r + b0
+		ans = (x * z1) / z2
+	} else {
+		var z1, z2 float64
+		r := Sqrt(Ln2 - Log(1.0-x))
+		if r <= 5.0 {
+			r -= 1.6
+			z1 = ((((((c7*r+c6)*r+c5)*r+c4)*r+c3)*r+c2)*r+c1)*r + c0
+			z2 = ((((((d7*r+d6)*r+d5)*r+d4)*r+d3)*r+d2)*r+d1)*r + d0
+		} else {
+			r -= 5.0
+			z1 = ((((((e7*r+e6)*r+e5)*r+e4)*r+e3)*r+e2)*r+e1)*r + e0
+			z2 = ((((((f7*r+f6)*r+f5)*r+f4)*r+f3)*r+f2)*r+f1)*r + f0
+		}
+		ans = z1 / z2
+	}
+
+	if sign {
+		return -ans
+	}
+	return ans
+}
+
+// Erfcinv returns the inverse of Erfc(x).
+//
+// Special cases are:
+//
+//	Erfcinv(0) = +Inf
+//	Erfcinv(2) = -Inf
+//	Erfcinv(x) = NaN if x < 0 or x > 2
+//	Erfcinv(NaN) = NaN
+func Erfcinv(x float64) float64 {
+	return Erfinv(1 - x)
+}
diff --git a/src/math/example_test.go b/src/math/example_test.go
new file mode 100644
index 0000000..a26d8cb
--- /dev/null
+++ b/src/math/example_test.go
@@ -0,0 +1,245 @@
+// Copyright 2017 The Go Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+package math_test
+
+import (
+	"fmt"
+	"math"
+)
+
+func ExampleAcos() {
+	fmt.Printf("%.2f", math.Acos(1))
+	// Output: 0.00
+}
+
+func ExampleAcosh() {
+	fmt.Printf("%.2f", math.Acosh(1))
+	// Output: 0.00
+}
+
+func ExampleAsin() {
+	fmt.Printf("%.2f", math.Asin(0))
+	// Output: 0.00
+}
+
+func ExampleAsinh() {
+	fmt.Printf("%.2f", math.Asinh(0))
+	// Output: 0.00
+}
+
+func ExampleAtan() {
+	fmt.Printf("%.2f", math.Atan(0))
+	// Output: 0.00
+}
+
+func ExampleAtan2() {
+	fmt.Printf("%.2f", math.Atan2(0, 0))
+	// Output: 0.00
+}
+
+func ExampleAtanh() {
+	fmt.Printf("%.2f", math.Atanh(0))
+	// Output: 0.00
+}
+
+func ExampleCopysign() {
+	fmt.Printf("%.2f", math.Copysign(3.2, -1))
+	// Output: -3.20
+}
+
+func ExampleCos() {
+	fmt.Printf("%.2f", math.Cos(math.Pi/2))
+	// Output: 0.00
+}
+
+func ExampleCosh() {
+	fmt.Printf("%.2f", math.Cosh(0))
+	// Output: 1.00
+}
+
+func ExampleSin() {
+	fmt.Printf("%.2f", math.Sin(math.Pi))
+	// Output: 0.00
+}
+
+func ExampleSincos() {
+	sin, cos := math.Sincos(0)
+	fmt.Printf("%.2f, %.2f", sin, cos)
+	// Output: 0.00, 1.00
+}
+
+func ExampleSinh() {
+	fmt.Printf("%.2f", math.Sinh(0))
+	// Output: 0.00
+}
+
+func ExampleTan() {
+	fmt.Printf("%.2f", math.Tan(0))
+	// Output: 0.00
+}
+
+func ExampleTanh() {
+	fmt.Printf("%.2f", math.Tanh(0))
+	// Output: 0.00
+}
+
+func ExampleSqrt() {
+	const (
+		a = 3
+		b = 4
+	)
+	c := math.Sqrt(a*a + b*b)
+	fmt.Printf("%.1f", c)
+	// Output: 5.0
+}
+
+func ExampleCeil() {
+	c := math.Ceil(1.49)
+	fmt.Printf("%.1f", c)
+	// Output: 2.0
+}
+
+func ExampleFloor() {
+	c := math.Floor(1.51)
+	fmt.Printf("%.1f", c)
+	// Output: 1.0
+}
+
+func ExamplePow() {
+	c := math.Pow(2, 3)
+	fmt.Printf("%.1f", c)
+	// Output: 8.0
+}
+
+func ExamplePow10() {
+	c := math.Pow10(2)
+	fmt.Printf("%.1f", c)
+	// Output: 100.0
+}
+
+func ExampleRound() {
+	p := math.Round(10.5)
+	fmt.Printf("%.1f\n", p)
+
+	n := math.Round(-10.5)
+	fmt.Printf("%.1f\n", n)
+	// Output:
+	// 11.0
+	// -11.0
+}
+
+func ExampleRoundToEven() {
+	u := math.RoundToEven(11.5)
+	fmt.Printf("%.1f\n", u)
+
+	d := math.RoundToEven(12.5)
+	fmt.Printf("%.1f\n", d)
+	// Output:
+	// 12.0
+	// 12.0
+}
+
+func ExampleLog() {
+	x := math.Log(1)
+	fmt.Printf("%.1f\n", x)
+
+	y := math.Log(2.7183)
+	fmt.Printf("%.1f\n", y)
+	// Output:
+	// 0.0
+	// 1.0
+}
+
+func ExampleLog2() {
+	fmt.Printf("%.1f", math.Log2(256))
+	// Output: 8.0
+}
+
+func ExampleLog10() {
+	fmt.Printf("%.1f", math.Log10(100))
+	// Output: 2.0
+}
+
+func ExampleRemainder() {
+	fmt.Printf("%.1f", math.Remainder(100, 30))
+	// Output: 10.0
+}
+
+func ExampleMod() {
+	c := math.Mod(7, 4)
+	fmt.Printf("%.1f", c)
+	// Output: 3.0
+}
+
+func ExampleAbs() {
+	x := math.Abs(-2)
+	fmt.Printf("%.1f\n", x)
+
+	y := math.Abs(2)
+	fmt.Printf("%.1f\n", y)
+	// Output:
+	// 2.0
+	// 2.0
+}
+func ExampleDim() {
+	fmt.Printf("%.2f\n", math.Dim(4, -2))
+	fmt.Printf("%.2f\n", math.Dim(-4, 2))
+	// Output:
+	// 6.00
+	// 0.00
+}
+
+func ExampleExp() {
+	fmt.Printf("%.2f\n", math.Exp(1))
+	fmt.Printf("%.2f\n", math.Exp(2))
+	fmt.Printf("%.2f\n", math.Exp(-1))
+	// Output:
+	// 2.72
+	// 7.39
+	// 0.37
+}
+
+func ExampleExp2() {
+	fmt.Printf("%.2f\n", math.Exp2(1))
+	fmt.Printf("%.2f\n", math.Exp2(-3))
+	// Output:
+	// 2.00
+	// 0.12
+}
+
+func ExampleExpm1() {
+	fmt.Printf("%.6f\n", math.Expm1(0.01))
+	fmt.Printf("%.6f\n", math.Expm1(-1))
+	// Output:
+	// 0.010050
+	// -0.632121
+}
+
+func ExampleTrunc() {
+	fmt.Printf("%.2f\n", math.Trunc(math.Pi))
+	fmt.Printf("%.2f\n", math.Trunc(-1.2345))
+	// Output:
+	// 3.00
+	// -1.00
+}
+
+func ExampleCbrt() {
+	fmt.Printf("%.2f\n", math.Cbrt(8))
+	fmt.Printf("%.2f\n", math.Cbrt(27))
+	// Output:
+	// 2.00
+	// 3.00
+}
+
+func ExampleModf() {
+	int, frac := math.Modf(3.14)
+	fmt.Printf("%.2f, %.2f\n", int, frac)
+
+	int, frac = math.Modf(-2.71)
+	fmt.Printf("%.2f, %.2f\n", int, frac)
+	// Output:
+	// 3.00, 0.14
+	// -2.00, -0.71
+}
diff --git a/src/math/exp.go b/src/math/exp.go
new file mode 100644
index 0000000..760795f
--- /dev/null
+++ b/src/math/exp.go
@@ -0,0 +1,203 @@
+// Copyright 2009 The Go Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+package math
+
+// Exp returns e**x, the base-e exponential of x.
+//
+// Special cases are:
+//
+//	Exp(+Inf) = +Inf
+//	Exp(NaN) = NaN
+//
+// Very large values overflow to 0 or +Inf.
+// Very small values underflow to 1.
+func Exp(x float64) float64 {
+	if haveArchExp {
+		return archExp(x)
+	}
+	return exp(x)
+}
+
+// The original C code, the long comment, and the constants
+// below are from FreeBSD's /usr/src/lib/msun/src/e_exp.c
+// and came with this notice. The go code is a simplified
+// version of the original C.
+//
+// ====================================================
+// Copyright (C) 2004 by Sun Microsystems, Inc. All rights reserved.
+//
+// Permission to use, copy, modify, and distribute this
+// software is freely granted, provided that this notice
+// is preserved.
+// ====================================================
+//
+//
+// exp(x)
+// Returns the exponential of x.
+//
+// Method
+//   1. Argument reduction:
+//      Reduce x to an r so that |r| <= 0.5*ln2 ~ 0.34658.
+//      Given x, find r and integer k such that
+//
+//               x = k*ln2 + r,  |r| <= 0.5*ln2.
+//
+//      Here r will be represented as r = hi-lo for better
+//      accuracy.
+//
+//   2. Approximation of exp(r) by a special rational function on
+//      the interval [0,0.34658]:
+//      Write
+//          R(r**2) = r*(exp(r)+1)/(exp(r)-1) = 2 + r*r/6 - r**4/360 + ...
+//      We use a special Remez algorithm on [0,0.34658] to generate
+//      a polynomial of degree 5 to approximate R. The maximum error
+//      of this polynomial approximation is bounded by 2**-59. In
+//      other words,
+//          R(z) ~ 2.0 + P1*z + P2*z**2 + P3*z**3 + P4*z**4 + P5*z**5
+//      (where z=r*r, and the values of P1 to P5 are listed below)
+//      and
+//          |                  5          |     -59
+//          | 2.0+P1*z+...+P5*z   -  R(z) | <= 2
+//          |                             |
+//      The computation of exp(r) thus becomes
+//                             2*r
+//              exp(r) = 1 + -------
+//                            R - r
+//                                 r*R1(r)
+//                     = 1 + r + ----------- (for better accuracy)
+//                                2 - R1(r)
+//      where
+//                               2       4             10
+//              R1(r) = r - (P1*r  + P2*r  + ... + P5*r   ).
+//
+//   3. Scale back to obtain exp(x):
+//      From step 1, we have
+//         exp(x) = 2**k * exp(r)
+//
+// Special cases:
+//      exp(INF) is INF, exp(NaN) is NaN;
+//      exp(-INF) is 0, and
+//      for finite argument, only exp(0)=1 is exact.
+//
+// Accuracy:
+//      according to an error analysis, the error is always less than
+//      1 ulp (unit in the last place).
+//
+// Misc. info.
+//      For IEEE double
+//          if x >  7.09782712893383973096e+02 then exp(x) overflow
+//          if x < -7.45133219101941108420e+02 then exp(x) underflow
+//
+// Constants:
+// The hexadecimal values are the intended ones for the following
+// constants. The decimal values may be used, provided that the
+// compiler will convert from decimal to binary accurately enough
+// to produce the hexadecimal values shown.
+
+func exp(x float64) float64 {
+	const (
+		Ln2Hi = 6.93147180369123816490e-01
+		Ln2Lo = 1.90821492927058770002e-10
+		Log2e = 1.44269504088896338700e+00
+
+		Overflow  = 7.09782712893383973096e+02
+		Underflow = -7.45133219101941108420e+02
+		NearZero  = 1.0 / (1 << 28) // 2**-28
+	)
+
+	// special cases
+	switch {
+	case IsNaN(x) || IsInf(x, 1):
+		return x
+	case IsInf(x, -1):
+		return 0
+	case x > Overflow:
+		return Inf(1)
+	case x < Underflow:
+		return 0
+	case -NearZero < x && x < NearZero:
+		return 1 + x
+	}
+
+	// reduce; computed as r = hi - lo for extra precision.
+	var k int
+	switch {
+	case x < 0:
+		k = int(Log2e*x - 0.5)
+	case x > 0:
+		k = int(Log2e*x + 0.5)
+	}
+	hi := x - float64(k)*Ln2Hi
+	lo := float64(k) * Ln2Lo
+
+	// compute
+	return expmulti(hi, lo, k)
+}
+
+// Exp2 returns 2**x, the base-2 exponential of x.
+//
+// Special cases are the same as Exp.
+func Exp2(x float64) float64 {
+	if haveArchExp2 {
+		return archExp2(x)
+	}
+	return exp2(x)
+}
+
+func exp2(x float64) float64 {
+	const (
+		Ln2Hi = 6.93147180369123816490e-01
+		Ln2Lo = 1.90821492927058770002e-10
+
+		Overflow  = 1.0239999999999999e+03
+		Underflow = -1.0740e+03
+	)
+
+	// special cases
+	switch {
+	case IsNaN(x) || IsInf(x, 1):
+		return x
+	case IsInf(x, -1):
+		return 0
+	case x > Overflow:
+		return Inf(1)
+	case x < Underflow:
+		return 0
+	}
+
+	// argument reduction; x = r×lg(e) + k with |r| ≤ ln(2)/2.
+	// computed as r = hi - lo for extra precision.
+	var k int
+	switch {
+	case x > 0:
+		k = int(x + 0.5)
+	case x < 0:
+		k = int(x - 0.5)
+	}
+	t := x - float64(k)
+	hi := t * Ln2Hi
+	lo := -t * Ln2Lo
+
+	// compute
+	return expmulti(hi, lo, k)
+}
+
+// exp1 returns e**r × 2**k where r = hi - lo and |r| ≤ ln(2)/2.
+func expmulti(hi, lo float64, k int) float64 {
+	const (
+		P1 = 1.66666666666666657415e-01  /* 0x3FC55555; 0x55555555 */
+		P2 = -2.77777777770155933842e-03 /* 0xBF66C16C; 0x16BEBD93 */
+		P3 = 6.61375632143793436117e-05  /* 0x3F11566A; 0xAF25DE2C */
+		P4 = -1.65339022054652515390e-06 /* 0xBEBBBD41; 0xC5D26BF1 */
+		P5 = 4.13813679705723846039e-08  /* 0x3E663769; 0x72BEA4D0 */
+	)
+
+	r := hi - lo
+	t := r * r
+	c := r - t*(P1+t*(P2+t*(P3+t*(P4+t*P5))))
+	y := 1 - ((lo - (r*c)/(2-c)) - hi)
+	// TODO(rsc): make sure Ldexp can handle boundary k
+	return Ldexp(y, k)
+}
diff --git a/src/math/exp2_asm.go b/src/math/exp2_asm.go
new file mode 100644
index 0000000..c26b2c3
--- /dev/null
+++ b/src/math/exp2_asm.go
@@ -0,0 +1,11 @@
+// Copyright 2021 The Go Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+//go:build arm64
+
+package math
+
+const haveArchExp2 = true
+
+func archExp2(x float64) float64
diff --git a/src/math/exp2_noasm.go b/src/math/exp2_noasm.go
new file mode 100644
index 0000000..c2b4093
--- /dev/null
+++ b/src/math/exp2_noasm.go
@@ -0,0 +1,13 @@
+// Copyright 2021 The Go Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+//go:build !arm64
+
+package math
+
+const haveArchExp2 = false
+
+func archExp2(x float64) float64 {
+	panic("not implemented")
+}
diff --git a/src/math/exp_amd64.go b/src/math/exp_amd64.go
new file mode 100644
index 0000000..0f701b1
--- /dev/null
+++ b/src/math/exp_amd64.go
@@ -0,0 +1,11 @@
+// Copyright 2017 The Go Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+//go:build amd64
+
+package math
+
+import "internal/cpu"
+
+var useFMA = cpu.X86.HasAVX && cpu.X86.HasFMA
diff --git a/src/math/exp_amd64.s b/src/math/exp_amd64.s
new file mode 100644
index 0000000..02b71c8
--- /dev/null
+++ b/src/math/exp_amd64.s
@@ -0,0 +1,159 @@
+// Copyright 2010 The Go Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+#include "textflag.h"
+
+// The method is based on a paper by Naoki Shibata: "Efficient evaluation
+// methods of elementary functions suitable for SIMD computation", Proc.
+// of International Supercomputing Conference 2010 (ISC'10), pp. 25 -- 32
+// (May 2010). The paper is available at
+// https://link.springer.com/article/10.1007/s00450-010-0108-2
+//
+// The original code and the constants below are from the author's
+// implementation available at http://freshmeat.net/projects/sleef.
+// The README file says, "The software is in public domain.
+// You can use the software without any obligation."
+//
+// This code is a simplified version of the original.
+
+#define LN2 0.6931471805599453094172321214581766 // log_e(2)
+#define LOG2E 1.4426950408889634073599246810018920 // 1/LN2
+#define LN2U 0.69314718055966295651160180568695068359375 // upper half LN2
+#define LN2L 0.28235290563031577122588448175013436025525412068e-12 // lower half LN2
+#define PosInf 0x7FF0000000000000
+#define NegInf 0xFFF0000000000000
+#define Overflow 7.09782712893384e+02
+
+DATA exprodata<>+0(SB)/8, $0.5
+DATA exprodata<>+8(SB)/8, $1.0
+DATA exprodata<>+16(SB)/8, $2.0
+DATA exprodata<>+24(SB)/8, $1.6666666666666666667e-1
+DATA exprodata<>+32(SB)/8, $4.1666666666666666667e-2
+DATA exprodata<>+40(SB)/8, $8.3333333333333333333e-3
+DATA exprodata<>+48(SB)/8, $1.3888888888888888889e-3
+DATA exprodata<>+56(SB)/8, $1.9841269841269841270e-4
+DATA exprodata<>+64(SB)/8, $2.4801587301587301587e-5
+GLOBL exprodata<>+0(SB), RODATA, $72
+
+// func Exp(x float64) float64
+TEXT ·archExp(SB),NOSPLIT,$0
+	// test bits for not-finite
+	MOVQ    x+0(FP), BX
+	MOVQ    $~(1<<63), AX // sign bit mask
+	MOVQ    BX, DX
+	ANDQ    AX, DX
+	MOVQ    $PosInf, AX
+	CMPQ    AX, DX
+	JLE     notFinite
+	// check if argument will overflow
+	MOVQ    BX, X0
+	MOVSD   $Overflow, X1
+	COMISD  X1, X0
+	JA      overflow
+	MOVSD   $LOG2E, X1
+	MULSD   X0, X1
+	CVTSD2SL X1, BX // BX = exponent
+	CVTSL2SD BX, X1
+	CMPB ·useFMA(SB), $1
+	JE   avxfma
+	MOVSD   $LN2U, X2
+	MULSD   X1, X2
+	SUBSD   X2, X0
+	MOVSD   $LN2L, X2
+	MULSD   X1, X2
+	SUBSD   X2, X0
+	// reduce argument
+	MULSD   $0.0625, X0
+	// Taylor series evaluation
+	MOVSD   exprodata<>+64(SB), X1
+	MULSD   X0, X1
+	ADDSD   exprodata<>+56(SB), X1
+	MULSD   X0, X1
+	ADDSD   exprodata<>+48(SB), X1
+	MULSD   X0, X1
+	ADDSD   exprodata<>+40(SB), X1
+	MULSD   X0, X1
+	ADDSD   exprodata<>+32(SB), X1
+	MULSD   X0, X1
+	ADDSD   exprodata<>+24(SB), X1
+	MULSD   X0, X1
+	ADDSD   exprodata<>+0(SB), X1
+	MULSD   X0, X1
+	ADDSD   exprodata<>+8(SB), X1
+	MULSD   X1, X0
+	MOVSD   exprodata<>+16(SB), X1
+	ADDSD   X0, X1
+	MULSD   X1, X0
+	MOVSD   exprodata<>+16(SB), X1
+	ADDSD   X0, X1
+	MULSD   X1, X0
+	MOVSD   exprodata<>+16(SB), X1
+	ADDSD   X0, X1
+	MULSD   X1, X0
+	MOVSD   exprodata<>+16(SB), X1
+	ADDSD   X0, X1
+	MULSD   X1, X0
+	ADDSD exprodata<>+8(SB), X0
+	// return fr * 2**exponent
+ldexp:
+	ADDL    $0x3FF, BX // add bias
+	JLE     denormal
+	CMPL    BX, $0x7FF
+	JGE     overflow
+lastStep:
+	SHLQ    $52, BX
+	MOVQ    BX, X1
+	MULSD   X1, X0
+	MOVSD   X0, ret+8(FP)
+	RET
+notFinite:
+	// test bits for -Inf
+	MOVQ    $NegInf, AX
+	CMPQ    AX, BX
+	JNE     notNegInf
+	// -Inf, return 0
+underflow: // return 0
+	MOVQ    $0, ret+8(FP)
+	RET
+overflow: // return +Inf
+	MOVQ    $PosInf, BX
+notNegInf: // NaN or +Inf, return x
+	MOVQ    BX, ret+8(FP)
+	RET
+denormal:
+	CMPL    BX, $-52
+	JL      underflow
+	ADDL    $0x3FE, BX // add bias - 1
+	SHLQ    $52, BX
+	MOVQ    BX, X1
+	MULSD   X1, X0
+	MOVQ    $1, BX
+	JMP     lastStep
+
+avxfma:
+	MOVSD   $LN2U, X2
+	VFNMADD231SD X2, X1, X0
+	MOVSD   $LN2L, X2
+	VFNMADD231SD X2, X1, X0
+	// reduce argument
+	MULSD   $0.0625, X0
+	// Taylor series evaluation
+	MOVSD   exprodata<>+64(SB), X1
+	VFMADD213SD exprodata<>+56(SB), X0, X1
+	VFMADD213SD exprodata<>+48(SB), X0, X1
+	VFMADD213SD exprodata<>+40(SB), X0, X1
+	VFMADD213SD exprodata<>+32(SB), X0, X1
+	VFMADD213SD exprodata<>+24(SB), X0, X1
+	VFMADD213SD exprodata<>+0(SB), X0, X1
+	VFMADD213SD exprodata<>+8(SB), X0, X1
+	MULSD   X1, X0
+	VADDSD exprodata<>+16(SB), X0, X1
+	MULSD   X1, X0
+	VADDSD exprodata<>+16(SB), X0, X1
+	MULSD   X1, X0
+	VADDSD exprodata<>+16(SB), X0, X1
+	MULSD   X1, X0
+	VADDSD exprodata<>+16(SB), X0, X1
+	VFMADD213SD   exprodata<>+8(SB), X1, X0
+	JMP ldexp
diff --git a/src/math/exp_arm64.s b/src/math/exp_arm64.s
new file mode 100644
index 0000000..44673ab
--- /dev/null
+++ b/src/math/exp_arm64.s
@@ -0,0 +1,182 @@
+// Copyright 2017 The Go Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+#define	Ln2Hi	6.93147180369123816490e-01
+#define	Ln2Lo	1.90821492927058770002e-10
+#define	Log2e	1.44269504088896338700e+00
+#define	Overflow	7.09782712893383973096e+02
+#define	Underflow	-7.45133219101941108420e+02
+#define	Overflow2	1.0239999999999999e+03
+#define	Underflow2	-1.0740e+03
+#define	NearZero	0x3e30000000000000	// 2**-28
+#define	PosInf	0x7ff0000000000000
+#define	FracMask	0x000fffffffffffff
+#define	C1	0x3cb0000000000000	// 2**-52
+#define	P1	1.66666666666666657415e-01	// 0x3FC55555; 0x55555555
+#define	P2	-2.77777777770155933842e-03	// 0xBF66C16C; 0x16BEBD93
+#define	P3	6.61375632143793436117e-05	// 0x3F11566A; 0xAF25DE2C
+#define	P4	-1.65339022054652515390e-06	// 0xBEBBBD41; 0xC5D26BF1
+#define	P5	4.13813679705723846039e-08	// 0x3E663769; 0x72BEA4D0
+
+// Exp returns e**x, the base-e exponential of x.
+// This is an assembly implementation of the method used for function Exp in file exp.go.
+//
+// func Exp(x float64) float64
+TEXT ·archExp(SB),$0-16
+	FMOVD	x+0(FP), F0	// F0 = x
+	FCMPD	F0, F0
+	BNE	isNaN		// x = NaN, return NaN
+	FMOVD	$Overflow, F1
+	FCMPD	F1, F0
+	BGT	overflow	// x > Overflow, return PosInf
+	FMOVD	$Underflow, F1
+	FCMPD	F1, F0
+	BLT	underflow	// x < Underflow, return 0
+	MOVD	$NearZero, R0
+	FMOVD	R0, F2
+	FABSD	F0, F3
+	FMOVD	$1.0, F1	// F1 = 1.0
+	FCMPD	F2, F3
+	BLT	nearzero	// fabs(x) < NearZero, return 1 + x
+	// argument reduction, x = k*ln2 + r,  |r| <= 0.5*ln2
+	// computed as r = hi - lo for extra precision.
+	FMOVD	$Log2e, F2
+	FMOVD	$0.5, F3
+	FNMSUBD	F0, F3, F2, F4	// Log2e*x - 0.5
+	FMADDD	F0, F3, F2, F3	// Log2e*x + 0.5
+	FCMPD	$0.0, F0
+	FCSELD	LT, F4, F3, F3	// F3 = k
+	FCVTZSD	F3, R1		// R1 = int(k)
+	SCVTFD	R1, F3		// F3 = float64(int(k))
+	FMOVD	$Ln2Hi, F4	// F4 = Ln2Hi
+	FMOVD	$Ln2Lo, F5	// F5 = Ln2Lo
+	FMSUBD	F3, F0, F4, F4	// F4 = hi = x - float64(int(k))*Ln2Hi
+	FMULD	F3, F5		// F5 = lo = float64(int(k)) * Ln2Lo
+	FSUBD	F5, F4, F6	// F6 = r = hi - lo
+	FMULD	F6, F6, F7	// F7 = t = r * r
+	// compute y
+	FMOVD	$P5, F8		// F8 = P5
+	FMOVD	$P4, F9		// F9 = P4
+	FMADDD	F7, F9, F8, F13	// P4+t*P5
+	FMOVD	$P3, F10	// F10 = P3
+	FMADDD	F7, F10, F13, F13	// P3+t*(P4+t*P5)
+	FMOVD	$P2, F11	// F11 = P2
+	FMADDD	F7, F11, F13, F13	// P2+t*(P3+t*(P4+t*P5))
+	FMOVD	$P1, F12	// F12 = P1
+	FMADDD	F7, F12, F13, F13	// P1+t*(P2+t*(P3+t*(P4+t*P5)))
+	FMSUBD	F7, F6, F13, F13	// F13 = c = r - t*(P1+t*(P2+t*(P3+t*(P4+t*P5))))
+	FMOVD	$2.0, F14
+	FSUBD	F13, F14
+	FMULD	F6, F13, F15
+	FDIVD	F14, F15	// F15 = (r*c)/(2-c)
+	FSUBD	F15, F5, F15	// lo-(r*c)/(2-c)
+	FSUBD	F4, F15, F15	// (lo-(r*c)/(2-c))-hi
+	FSUBD	F15, F1, F16	// F16 = y = 1-((lo-(r*c)/(2-c))-hi)
+	// inline Ldexp(y, k), benefit:
+	// 1, no parameter pass overhead.
+	// 2, skip unnecessary checks for Inf/NaN/Zero
+	FMOVD	F16, R0
+	AND	$FracMask, R0, R2	// fraction
+	LSR	$52, R0, R5	// exponent
+	ADD	R1, R5		// R1 = int(k)
+	CMP	$1, R5
+	BGE	normal
+	ADD	$52, R5		// denormal
+	MOVD	$C1, R8
+	FMOVD	R8, F1		// m = 2**-52
+normal:
+	ORR	R5<<52, R2, R0
+	FMOVD	R0, F0
+	FMULD	F1, F0		// return m * x
+	FMOVD	F0, ret+8(FP)
+	RET
+nearzero:
+	FADDD	F1, F0
+isNaN:
+	FMOVD	F0, ret+8(FP)
+	RET
+underflow:
+	MOVD	ZR, ret+8(FP)
+	RET
+overflow:
+	MOVD	$PosInf, R0
+	MOVD	R0, ret+8(FP)
+	RET
+
+
+// Exp2 returns 2**x, the base-2 exponential of x.
+// This is an assembly implementation of the method used for function Exp2 in file exp.go.
+//
+// func Exp2(x float64) float64
+TEXT ·archExp2(SB),$0-16
+	FMOVD	x+0(FP), F0	// F0 = x
+	FCMPD	F0, F0
+	BNE	isNaN		// x = NaN, return NaN
+	FMOVD	$Overflow2, F1
+	FCMPD	F1, F0
+	BGT	overflow	// x > Overflow, return PosInf
+	FMOVD	$Underflow2, F1
+	FCMPD	F1, F0
+	BLT	underflow	// x < Underflow, return 0
+	// argument reduction; x = r*lg(e) + k with |r| <= ln(2)/2
+	// computed as r = hi - lo for extra precision.
+	FMOVD	$0.5, F2
+	FSUBD	F2, F0, F3	// x + 0.5
+	FADDD	F2, F0, F4	// x - 0.5
+	FCMPD	$0.0, F0
+	FCSELD	LT, F3, F4, F3	// F3 = k
+	FCVTZSD	F3, R1		// R1 = int(k)
+	SCVTFD	R1, F3		// F3 = float64(int(k))
+	FSUBD	F3, F0, F3	// t = x - float64(int(k))
+	FMOVD	$Ln2Hi, F4	// F4 = Ln2Hi
+	FMOVD	$Ln2Lo, F5	// F5 = Ln2Lo
+	FMULD	F3, F4		// F4 = hi = t * Ln2Hi
+	FNMULD	F3, F5		// F5 = lo = -t * Ln2Lo
+	FSUBD	F5, F4, F6	// F6 = r = hi - lo
+	FMULD	F6, F6, F7	// F7 = t = r * r
+	// compute y
+	FMOVD	$P5, F8		// F8 = P5
+	FMOVD	$P4, F9		// F9 = P4
+	FMADDD	F7, F9, F8, F13	// P4+t*P5
+	FMOVD	$P3, F10	// F10 = P3
+	FMADDD	F7, F10, F13, F13	// P3+t*(P4+t*P5)
+	FMOVD	$P2, F11	// F11 = P2
+	FMADDD	F7, F11, F13, F13	// P2+t*(P3+t*(P4+t*P5))
+	FMOVD	$P1, F12	// F12 = P1
+	FMADDD	F7, F12, F13, F13	// P1+t*(P2+t*(P3+t*(P4+t*P5)))
+	FMSUBD	F7, F6, F13, F13	// F13 = c = r - t*(P1+t*(P2+t*(P3+t*(P4+t*P5))))
+	FMOVD	$2.0, F14
+	FSUBD	F13, F14
+	FMULD	F6, F13, F15
+	FDIVD	F14, F15	// F15 = (r*c)/(2-c)
+	FMOVD	$1.0, F1	// F1 = 1.0
+	FSUBD	F15, F5, F15	// lo-(r*c)/(2-c)
+	FSUBD	F4, F15, F15	// (lo-(r*c)/(2-c))-hi
+	FSUBD	F15, F1, F16	// F16 = y = 1-((lo-(r*c)/(2-c))-hi)
+	// inline Ldexp(y, k), benefit:
+	// 1, no parameter pass overhead.
+	// 2, skip unnecessary checks for Inf/NaN/Zero
+	FMOVD	F16, R0
+	AND	$FracMask, R0, R2	// fraction
+	LSR	$52, R0, R5	// exponent
+	ADD	R1, R5		// R1 = int(k)
+	CMP	$1, R5
+	BGE	normal
+	ADD	$52, R5		// denormal
+	MOVD	$C1, R8
+	FMOVD	R8, F1		// m = 2**-52
+normal:
+	ORR	R5<<52, R2, R0
+	FMOVD	R0, F0
+	FMULD	F1, F0		// return m * x
+isNaN:
+	FMOVD	F0, ret+8(FP)
+	RET
+underflow:
+	MOVD	ZR, ret+8(FP)
+	RET
+overflow:
+	MOVD	$PosInf, R0
+	MOVD	R0, ret+8(FP)
+	RET
diff --git a/src/math/exp_asm.go b/src/math/exp_asm.go
new file mode 100644
index 0000000..4244428
--- /dev/null
+++ b/src/math/exp_asm.go
@@ -0,0 +1,11 @@
+// Copyright 2021 The Go Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+//go:build amd64 || arm64 || s390x
+
+package math
+
+const haveArchExp = true
+
+func archExp(x float64) float64
diff --git a/src/math/exp_noasm.go b/src/math/exp_noasm.go
new file mode 100644
index 0000000..bd3f024
--- /dev/null
+++ b/src/math/exp_noasm.go
@@ -0,0 +1,13 @@
+// Copyright 2021 The Go Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+//go:build !amd64 && !arm64 && !s390x
+
+package math
+
+const haveArchExp = false
+
+func archExp(x float64) float64 {
+	panic("not implemented")
+}
diff --git a/src/math/exp_s390x.s b/src/math/exp_s390x.s
new file mode 100644
index 0000000..e0ec823
--- /dev/null
+++ b/src/math/exp_s390x.s
@@ -0,0 +1,177 @@
+// Copyright 2017 The Go Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+#include "textflag.h"
+
+// Minimax polynomial approximation and other constants
+DATA ·exprodataL22<> + 0(SB)/8, $800.0E+00
+DATA ·exprodataL22<> + 8(SB)/8, $1.0000000000000022e+00
+DATA ·exprodataL22<> + 16(SB)/8, $0.500000000000004237e+00
+DATA ·exprodataL22<> + 24(SB)/8, $0.166666666630345592e+00
+DATA ·exprodataL22<> + 32(SB)/8, $0.138926439368309441e-02
+DATA ·exprodataL22<> + 40(SB)/8, $0.833349307718286047e-02
+DATA ·exprodataL22<> + 48(SB)/8, $0.416666664838056960e-01
+DATA ·exprodataL22<> + 56(SB)/8, $-.231904681384629956E-16
+DATA ·exprodataL22<> + 64(SB)/8, $-.693147180559945286E+00
+DATA ·exprodataL22<> + 72(SB)/8, $0.144269504088896339E+01
+DATA ·exprodataL22<> + 80(SB)/8, $704.0E+00
+GLOBL ·exprodataL22<> + 0(SB), RODATA, $88
+
+DATA ·expxinf<> + 0(SB)/8, $0x7ff0000000000000
+GLOBL ·expxinf<> + 0(SB), RODATA, $8
+DATA ·expx4ff<> + 0(SB)/8, $0x4ff0000000000000
+GLOBL ·expx4ff<> + 0(SB), RODATA, $8
+DATA ·expx2ff<> + 0(SB)/8, $0x2ff0000000000000
+GLOBL ·expx2ff<> + 0(SB), RODATA, $8
+DATA ·expxaddexp<> + 0(SB)/8, $0xc2f0000100003fef
+GLOBL ·expxaddexp<> + 0(SB), RODATA, $8
+
+// Log multipliers table
+DATA ·exptexp<> + 0(SB)/8, $0.442737824274138381E-01
+DATA ·exptexp<> + 8(SB)/8, $0.263602189790660309E-01
+DATA ·exptexp<> + 16(SB)/8, $0.122565642281703586E-01
+DATA ·exptexp<> + 24(SB)/8, $0.143757052860721398E-02
+DATA ·exptexp<> + 32(SB)/8, $-.651375034121276075E-02
+DATA ·exptexp<> + 40(SB)/8, $-.119317678849450159E-01
+DATA ·exptexp<> + 48(SB)/8, $-.150868749549871069E-01
+DATA ·exptexp<> + 56(SB)/8, $-.161992609578469234E-01
+DATA ·exptexp<> + 64(SB)/8, $-.154492360403337917E-01
+DATA ·exptexp<> + 72(SB)/8, $-.129850717389178721E-01
+DATA ·exptexp<> + 80(SB)/8, $-.892902649276657891E-02
+DATA ·exptexp<> + 88(SB)/8, $-.338202636596794887E-02
+DATA ·exptexp<> + 96(SB)/8, $0.357266307045684762E-02
+DATA ·exptexp<> + 104(SB)/8, $0.118665304327406698E-01
+DATA ·exptexp<> + 112(SB)/8, $0.214434994118118914E-01
+DATA ·exptexp<> + 120(SB)/8, $0.322580645161290314E-01
+GLOBL ·exptexp<> + 0(SB), RODATA, $128
+
+// Exp returns e**x, the base-e exponential of x.
+//
+// Special cases are:
+//      Exp(+Inf) = +Inf
+//      Exp(NaN) = NaN
+// Very large values overflow to 0 or +Inf.
+// Very small values underflow to 1.
+// The algorithm used is minimax polynomial approximation using a table of
+// polynomial coefficients determined with a Remez exchange algorithm.
+
+TEXT	·expAsm(SB), NOSPLIT, $0-16
+	FMOVD	x+0(FP), F0
+	MOVD	$·exprodataL22<>+0(SB), R5
+	LTDBR	F0, F0
+	BLTU	L20
+	FMOVD	F0, F2
+L2:
+	WORD	$0xED205050	//cdb	%f2,.L23-.L22(%r5)
+	BYTE	$0x00
+	BYTE	$0x19
+	BGE	L16
+	BVS	L16
+	WFCEDBS	V2, V2, V2
+	BVS	LEXITTAGexp
+	MOVD	$·expxaddexp<>+0(SB), R1
+	FMOVD	72(R5), F6
+	FMOVD	0(R1), F2
+	WFMSDB	V0, V6, V2, V6
+	FMOVD	64(R5), F4
+	FADD	F6, F2
+	FMOVD	56(R5), F1
+	FMADD	F4, F2, F0
+	FMOVD	48(R5), F3
+	WFMADB	V2, V1, V0, V2
+	FMOVD	40(R5), F1
+	FMOVD	32(R5), F4
+	FMUL	F0, F0
+	WFMADB	V2, V4, V1, V4
+	LGDR	F6, R1
+	FMOVD	24(R5), F1
+	WFMADB	V2, V3, V1, V3
+	FMOVD	16(R5), F1
+	WFMADB	V0, V4, V3, V4
+	FMOVD	8(R5), F3
+	WFMADB	V2, V1, V3, V1
+	RISBGZ	$57, $60, $3, R1, R3
+	WFMADB	V0, V4, V1, V0
+	MOVD	$·exptexp<>+0(SB), R2
+	WORD	$0x68432000	//ld	%f4,0(%r3,%r2)
+	FMADD	F4, F2, F2
+	SLD	$48, R1, R2
+	WFMADB	V2, V0, V4, V2
+	LDGR	R2, F0
+	FMADD	F0, F2, F0
+	FMOVD	F0, ret+8(FP)
+	RET
+L16:
+	WFCEDBS	V2, V2, V4
+	BVS	LEXITTAGexp
+	WORD	$0xED205000	//cdb	%f2,.L33-.L22(%r5)
+	BYTE	$0x00
+	BYTE	$0x19
+	BLT	L6
+	WFCEDBS	V2, V0, V0
+	BVS	L13
+	MOVD	$·expxinf<>+0(SB), R1
+	FMOVD	0(R1), F0
+	FMOVD	F0, ret+8(FP)
+	RET
+L20:
+	WORD	$0xB3130020	//lcdbr	%f2,%f0
+	BR	L2
+L6:
+	MOVD	$·expxaddexp<>+0(SB), R1
+	FMOVD	72(R5), F3
+	FMOVD	0(R1), F4
+	WFMSDB	V0, V3, V4, V3
+	FMOVD	64(R5), F6
+	FADD	F3, F4
+	FMOVD	56(R5), F5
+	WFMADB	V4, V6, V0, V6
+	FMOVD	32(R5), F1
+	WFMADB	V4, V5, V6, V4
+	FMOVD	40(R5), F5
+	FMUL	F6, F6
+	WFMADB	V4, V1, V5, V1
+	FMOVD	48(R5), F7
+	LGDR	F3, R1
+	FMOVD	24(R5), F5
+	WFMADB	V4, V7, V5, V7
+	FMOVD	16(R5), F5
+	WFMADB	V6, V1, V7, V1
+	FMOVD	8(R5), F7
+	WFMADB	V4, V5, V7, V5
+	RISBGZ	$57, $60, $3, R1, R3
+	WFMADB	V6, V1, V5, V6
+	MOVD	$·exptexp<>+0(SB), R2
+	WFCHDBS	V2, V0, V0
+	WORD	$0x68132000	//ld	%f1,0(%r3,%r2)
+	FMADD	F1, F4, F4
+	MOVD	$0x4086000000000000, R2
+	WFMADB	V4, V6, V1, V4
+	BEQ	L21
+	ADDW	$0xF000, R1
+	RISBGN	$0, $15, $48, R1, R2
+	LDGR	R2, F0
+	FMADD	F0, F4, F0
+	MOVD	$·expx4ff<>+0(SB), R3
+	FMOVD	0(R3), F2
+	FMUL	F2, F0
+	FMOVD	F0, ret+8(FP)
+	RET
+L13:
+	FMOVD	$0, F0
+	FMOVD	F0, ret+8(FP)
+	RET
+L21:
+	ADDW	$0x1000, R1
+	RISBGN	$0, $15, $48, R1, R2
+	LDGR	R2, F0
+	FMADD	F0, F4, F0
+	MOVD	$·expx2ff<>+0(SB), R3
+	FMOVD	0(R3), F2
+	FMUL	F2, F0
+	FMOVD	F0, ret+8(FP)
+	RET
+LEXITTAGexp:
+	FMOVD	F0, ret+8(FP)
+	RET
diff --git a/src/math/expm1.go b/src/math/expm1.go
new file mode 100644
index 0000000..ff1c82f
--- /dev/null
+++ b/src/math/expm1.go
@@ -0,0 +1,244 @@
+// Copyright 2010 The Go Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+package math
+
+// The original C code, the long comment, and the constants
+// below are from FreeBSD's /usr/src/lib/msun/src/s_expm1.c
+// and came with this notice. The go code is a simplified
+// version of the original C.
+//
+// ====================================================
+// Copyright (C) 1993 by Sun Microsystems, Inc. All rights reserved.
+//
+// Developed at SunPro, a Sun Microsystems, Inc. business.
+// Permission to use, copy, modify, and distribute this
+// software is freely granted, provided that this notice
+// is preserved.
+// ====================================================
+//
+// expm1(x)
+// Returns exp(x)-1, the exponential of x minus 1.
+//
+// Method
+//   1. Argument reduction:
+//      Given x, find r and integer k such that
+//
+//               x = k*ln2 + r,  |r| <= 0.5*ln2 ~ 0.34658
+//
+//      Here a correction term c will be computed to compensate
+//      the error in r when rounded to a floating-point number.
+//
+//   2. Approximating expm1(r) by a special rational function on
+//      the interval [0,0.34658]:
+//      Since
+//          r*(exp(r)+1)/(exp(r)-1) = 2+ r**2/6 - r**4/360 + ...
+//      we define R1(r*r) by
+//          r*(exp(r)+1)/(exp(r)-1) = 2+ r**2/6 * R1(r*r)
+//      That is,
+//          R1(r**2) = 6/r *((exp(r)+1)/(exp(r)-1) - 2/r)
+//                   = 6/r * ( 1 + 2.0*(1/(exp(r)-1) - 1/r))
+//                   = 1 - r**2/60 + r**4/2520 - r**6/100800 + ...
+//      We use a special Reme algorithm on [0,0.347] to generate
+//      a polynomial of degree 5 in r*r to approximate R1. The
+//      maximum error of this polynomial approximation is bounded
+//      by 2**-61. In other words,
+//          R1(z) ~ 1.0 + Q1*z + Q2*z**2 + Q3*z**3 + Q4*z**4 + Q5*z**5
+//      where   Q1  =  -1.6666666666666567384E-2,
+//              Q2  =   3.9682539681370365873E-4,
+//              Q3  =  -9.9206344733435987357E-6,
+//              Q4  =   2.5051361420808517002E-7,
+//              Q5  =  -6.2843505682382617102E-9;
+//      (where z=r*r, and the values of Q1 to Q5 are listed below)
+//      with error bounded by
+//          |                  5           |     -61
+//          | 1.0+Q1*z+...+Q5*z   -  R1(z) | <= 2
+//          |                              |
+//
+//      expm1(r) = exp(r)-1 is then computed by the following
+//      specific way which minimize the accumulation rounding error:
+//                             2     3
+//                            r     r    [ 3 - (R1 + R1*r/2)  ]
+//            expm1(r) = r + --- + --- * [--------------------]
+//                            2     2    [ 6 - r*(3 - R1*r/2) ]
+//
+//      To compensate the error in the argument reduction, we use
+//              expm1(r+c) = expm1(r) + c + expm1(r)*c
+//                         ~ expm1(r) + c + r*c
+//      Thus c+r*c will be added in as the correction terms for
+//      expm1(r+c). Now rearrange the term to avoid optimization
+//      screw up:
+//                      (      2                                    2 )
+//                      ({  ( r    [ R1 -  (3 - R1*r/2) ]  )  }    r  )
+//       expm1(r+c)~r - ({r*(--- * [--------------------]-c)-c} - --- )
+//                      ({  ( 2    [ 6 - r*(3 - R1*r/2) ]  )  }    2  )
+//                      (                                             )
+//
+//                 = r - E
+//   3. Scale back to obtain expm1(x):
+//      From step 1, we have
+//         expm1(x) = either 2**k*[expm1(r)+1] - 1
+//                  = or     2**k*[expm1(r) + (1-2**-k)]
+//   4. Implementation notes:
+//      (A). To save one multiplication, we scale the coefficient Qi
+//           to Qi*2**i, and replace z by (x**2)/2.
+//      (B). To achieve maximum accuracy, we compute expm1(x) by
+//        (i)   if x < -56*ln2, return -1.0, (raise inexact if x!=inf)
+//        (ii)  if k=0, return r-E
+//        (iii) if k=-1, return 0.5*(r-E)-0.5
+//        (iv)  if k=1 if r < -0.25, return 2*((r+0.5)- E)
+//                     else          return  1.0+2.0*(r-E);
+//        (v)   if (k<-2||k>56) return 2**k(1-(E-r)) - 1 (or exp(x)-1)
+//        (vi)  if k <= 20, return 2**k((1-2**-k)-(E-r)), else
+//        (vii) return 2**k(1-((E+2**-k)-r))
+//
+// Special cases:
+//      expm1(INF) is INF, expm1(NaN) is NaN;
+//      expm1(-INF) is -1, and
+//      for finite argument, only expm1(0)=0 is exact.
+//
+// Accuracy:
+//      according to an error analysis, the error is always less than
+//      1 ulp (unit in the last place).
+//
+// Misc. info.
+//      For IEEE double
+//          if x >  7.09782712893383973096e+02 then expm1(x) overflow
+//
+// Constants:
+// The hexadecimal values are the intended ones for the following
+// constants. The decimal values may be used, provided that the
+// compiler will convert from decimal to binary accurately enough
+// to produce the hexadecimal values shown.
+//
+
+// Expm1 returns e**x - 1, the base-e exponential of x minus 1.
+// It is more accurate than Exp(x) - 1 when x is near zero.
+//
+// Special cases are:
+//
+//	Expm1(+Inf) = +Inf
+//	Expm1(-Inf) = -1
+//	Expm1(NaN) = NaN
+//
+// Very large values overflow to -1 or +Inf.
+func Expm1(x float64) float64 {
+	if haveArchExpm1 {
+		return archExpm1(x)
+	}
+	return expm1(x)
+}
+
+func expm1(x float64) float64 {
+	const (
+		Othreshold = 7.09782712893383973096e+02 // 0x40862E42FEFA39EF
+		Ln2X56     = 3.88162421113569373274e+01 // 0x4043687a9f1af2b1
+		Ln2HalfX3  = 1.03972077083991796413e+00 // 0x3ff0a2b23f3bab73
+		Ln2Half    = 3.46573590279972654709e-01 // 0x3fd62e42fefa39ef
+		Ln2Hi      = 6.93147180369123816490e-01 // 0x3fe62e42fee00000
+		Ln2Lo      = 1.90821492927058770002e-10 // 0x3dea39ef35793c76
+		InvLn2     = 1.44269504088896338700e+00 // 0x3ff71547652b82fe
+		Tiny       = 1.0 / (1 << 54)            // 2**-54 = 0x3c90000000000000
+		// scaled coefficients related to expm1
+		Q1 = -3.33333333333331316428e-02 // 0xBFA11111111110F4
+		Q2 = 1.58730158725481460165e-03  // 0x3F5A01A019FE5585
+		Q3 = -7.93650757867487942473e-05 // 0xBF14CE199EAADBB7
+		Q4 = 4.00821782732936239552e-06  // 0x3ED0CFCA86E65239
+		Q5 = -2.01099218183624371326e-07 // 0xBE8AFDB76E09C32D
+	)
+
+	// special cases
+	switch {
+	case IsInf(x, 1) || IsNaN(x):
+		return x
+	case IsInf(x, -1):
+		return -1
+	}
+
+	absx := x
+	sign := false
+	if x < 0 {
+		absx = -absx
+		sign = true
+	}
+
+	// filter out huge argument
+	if absx >= Ln2X56 { // if |x| >= 56 * ln2
+		if sign {
+			return -1 // x < -56*ln2, return -1
+		}
+		if absx >= Othreshold { // if |x| >= 709.78...
+			return Inf(1)
+		}
+	}
+
+	// argument reduction
+	var c float64
+	var k int
+	if absx > Ln2Half { // if  |x| > 0.5 * ln2
+		var hi, lo float64
+		if absx < Ln2HalfX3 { // and |x| < 1.5 * ln2
+			if !sign {
+				hi = x - Ln2Hi
+				lo = Ln2Lo
+				k = 1
+			} else {
+				hi = x + Ln2Hi
+				lo = -Ln2Lo
+				k = -1
+			}
+		} else {
+			if !sign {
+				k = int(InvLn2*x + 0.5)
+			} else {
+				k = int(InvLn2*x - 0.5)
+			}
+			t := float64(k)
+			hi = x - t*Ln2Hi // t * Ln2Hi is exact here
+			lo = t * Ln2Lo
+		}
+		x = hi - lo
+		c = (hi - x) - lo
+	} else if absx < Tiny { // when |x| < 2**-54, return x
+		return x
+	} else {
+		k = 0
+	}
+
+	// x is now in primary range
+	hfx := 0.5 * x
+	hxs := x * hfx
+	r1 := 1 + hxs*(Q1+hxs*(Q2+hxs*(Q3+hxs*(Q4+hxs*Q5))))
+	t := 3 - r1*hfx
+	e := hxs * ((r1 - t) / (6.0 - x*t))
+	if k == 0 {
+		return x - (x*e - hxs) // c is 0
+	}
+	e = (x*(e-c) - c)
+	e -= hxs
+	switch {
+	case k == -1:
+		return 0.5*(x-e) - 0.5
+	case k == 1:
+		if x < -0.25 {
+			return -2 * (e - (x + 0.5))
+		}
+		return 1 + 2*(x-e)
+	case k <= -2 || k > 56: // suffice to return exp(x)-1
+		y := 1 - (e - x)
+		y = Float64frombits(Float64bits(y) + uint64(k)<<52) // add k to y's exponent
+		return y - 1
+	}
+	if k < 20 {
+		t := Float64frombits(0x3ff0000000000000 - (0x20000000000000 >> uint(k))) // t=1-2**-k
+		y := t - (e - x)
+		y = Float64frombits(Float64bits(y) + uint64(k)<<52) // add k to y's exponent
+		return y
+	}
+	t = Float64frombits(uint64(0x3ff-k) << 52) // 2**-k
+	y := x - (e + t)
+	y++
+	y = Float64frombits(Float64bits(y) + uint64(k)<<52) // add k to y's exponent
+	return y
+}
diff --git a/src/math/expm1_s390x.s b/src/math/expm1_s390x.s
new file mode 100644
index 0000000..16c861b
--- /dev/null
+++ b/src/math/expm1_s390x.s
@@ -0,0 +1,194 @@
+// Copyright 2017 The Go Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+#include "textflag.h"
+
+// Minimax polynomial approximation and other constants
+DATA ·expm1rodataL22<> + 0(SB)/8, $-1.0
+DATA ·expm1rodataL22<> + 8(SB)/8, $800.0E+00
+DATA ·expm1rodataL22<> + 16(SB)/8, $1.0
+DATA ·expm1rodataL22<> + 24(SB)/8, $-.231904681384629956E-16
+DATA ·expm1rodataL22<> + 32(SB)/8, $0.50000000000000029671E+00
+DATA ·expm1rodataL22<> + 40(SB)/8, $0.16666666666666676570E+00
+DATA ·expm1rodataL22<> + 48(SB)/8, $0.83333333323590973444E-02
+DATA ·expm1rodataL22<> + 56(SB)/8, $0.13889096526400683566E-02
+DATA ·expm1rodataL22<> + 64(SB)/8, $0.41666666661701152924E-01
+DATA ·expm1rodataL22<> + 72(SB)/8, $0.19841562053987360264E-03
+DATA ·expm1rodataL22<> + 80(SB)/8, $-.693147180559945286E+00
+DATA ·expm1rodataL22<> + 88(SB)/8, $0.144269504088896339E+01
+DATA ·expm1rodataL22<> + 96(SB)/8, $704.0E+00
+GLOBL ·expm1rodataL22<> + 0(SB), RODATA, $104
+
+DATA ·expm1xmone<> + 0(SB)/8, $0xbff0000000000000
+GLOBL ·expm1xmone<> + 0(SB), RODATA, $8
+DATA ·expm1xinf<> + 0(SB)/8, $0x7ff0000000000000
+GLOBL ·expm1xinf<> + 0(SB), RODATA, $8
+DATA ·expm1x4ff<> + 0(SB)/8, $0x4ff0000000000000
+GLOBL ·expm1x4ff<> + 0(SB), RODATA, $8
+DATA ·expm1x2ff<> + 0(SB)/8, $0x2ff0000000000000
+GLOBL ·expm1x2ff<> + 0(SB), RODATA, $8
+DATA ·expm1xaddexp<> + 0(SB)/8, $0xc2f0000100003ff0
+GLOBL ·expm1xaddexp<> + 0(SB), RODATA, $8
+
+// Log multipliers table
+DATA ·expm1tab<> + 0(SB)/8, $0.0
+DATA ·expm1tab<> + 8(SB)/8, $-.171540871271399150E-01
+DATA ·expm1tab<> + 16(SB)/8, $-.306597931864376363E-01
+DATA ·expm1tab<> + 24(SB)/8, $-.410200970469965021E-01
+DATA ·expm1tab<> + 32(SB)/8, $-.486343079978231466E-01
+DATA ·expm1tab<> + 40(SB)/8, $-.538226193725835820E-01
+DATA ·expm1tab<> + 48(SB)/8, $-.568439602538111520E-01
+DATA ·expm1tab<> + 56(SB)/8, $-.579091847395528847E-01
+DATA ·expm1tab<> + 64(SB)/8, $-.571909584179366341E-01
+DATA ·expm1tab<> + 72(SB)/8, $-.548312665987204407E-01
+DATA ·expm1tab<> + 80(SB)/8, $-.509471843643441085E-01
+DATA ·expm1tab<> + 88(SB)/8, $-.456353588448863359E-01
+DATA ·expm1tab<> + 96(SB)/8, $-.389755254243262365E-01
+DATA ·expm1tab<> + 104(SB)/8, $-.310332908285244231E-01
+DATA ·expm1tab<> + 112(SB)/8, $-.218623539150173528E-01
+DATA ·expm1tab<> + 120(SB)/8, $-.115062908917949451E-01
+GLOBL ·expm1tab<> + 0(SB), RODATA, $128
+
+// Expm1 returns e**x - 1, the base-e exponential of x minus 1.
+// It is more accurate than Exp(x) - 1 when x is near zero.
+//
+// Special cases are:
+//      Expm1(+Inf) = +Inf
+//      Expm1(-Inf) = -1
+//      Expm1(NaN) = NaN
+// Very large values overflow to -1 or +Inf.
+// The algorithm used is minimax polynomial approximation using a table of
+// polynomial coefficients determined with a Remez exchange algorithm.
+
+TEXT	·expm1Asm(SB), NOSPLIT, $0-16
+	FMOVD	x+0(FP), F0
+	MOVD	$·expm1rodataL22<>+0(SB), R5
+	LTDBR	F0, F0
+	BLTU	L20
+	FMOVD	F0, F2
+L2:
+	WORD	$0xED205060	//cdb	%f2,.L23-.L22(%r5)
+	BYTE	$0x00
+	BYTE	$0x19
+	BGE	L16
+	BVS	L16
+	WFCEDBS	V2, V2, V2
+	BVS	LEXITTAGexpm1
+	MOVD	$·expm1xaddexp<>+0(SB), R1
+	FMOVD	88(R5), F1
+	FMOVD	0(R1), F2
+	WFMSDB	V0, V1, V2, V1
+	FMOVD	80(R5), F6
+	WFADB	V1, V2, V4
+	FMOVD	72(R5), F2
+	FMADD	F6, F4, F0
+	FMOVD	64(R5), F3
+	FMOVD	56(R5), F6
+	FMOVD	48(R5), F5
+	FMADD	F2, F0, F6
+	WFMADB	V0, V5, V3, V5
+	WFMDB	V0, V0, V2
+	LGDR	F1, R1
+	WFMADB	V6, V2, V5, V6
+	FMOVD	40(R5), F3
+	FMOVD	32(R5), F5
+	WFMADB	V0, V3, V5, V3
+	FMOVD	24(R5), F5
+	WFMADB	V2, V6, V3, V2
+	FMADD	F5, F4, F0
+	FMOVD	16(R5), F6
+	WFMADB	V0, V2, V6, V2
+	RISBGZ	$57, $60, $3, R1, R3
+	WORD	$0xB3130022	//lcdbr	%f2,%f2
+	MOVD	$·expm1tab<>+0(SB), R2
+	WORD	$0x68432000	//ld	%f4,0(%r3,%r2)
+	FMADD	F4, F0, F0
+	SLD	$48, R1, R2
+	WFMSDB	V2, V0, V4, V0
+	LDGR	R2, F4
+	WORD	$0xB3130000	//lcdbr	%f0,%f0
+	FSUB	F4, F6
+	WFMSDB	V0, V4, V6, V0
+	FMOVD	F0, ret+8(FP)
+	RET
+L16:
+	WFCEDBS	V2, V2, V4
+	BVS	LEXITTAGexpm1
+	WORD	$0xED205008	//cdb	%f2,.L34-.L22(%r5)
+	BYTE	$0x00
+	BYTE	$0x19
+	BLT	L6
+	WFCEDBS	V2, V0, V0
+	BVS	L7
+	MOVD	$·expm1xinf<>+0(SB), R1
+	FMOVD	0(R1), F0
+	FMOVD	F0, ret+8(FP)
+	RET
+L20:
+	WORD	$0xB3130020	//lcdbr	%f2,%f0
+	BR	L2
+L6:
+	MOVD	$·expm1xaddexp<>+0(SB), R1
+	FMOVD	88(R5), F5
+	FMOVD	0(R1), F4
+	WFMSDB	V0, V5, V4, V5
+	FMOVD	80(R5), F3
+	WFADB	V5, V4, V1
+	VLEG	$0, 48(R5), V16
+	WFMADB	V1, V3, V0, V3
+	FMOVD	56(R5), F4
+	FMOVD	64(R5), F7
+	FMOVD	72(R5), F6
+	WFMADB	V3, V16, V7, V16
+	WFMADB	V3, V6, V4, V6
+	WFMDB	V3, V3, V4
+	MOVD	$·expm1tab<>+0(SB), R2
+	WFMADB	V6, V4, V16, V6
+	VLEG	$0, 32(R5), V16
+	FMOVD	40(R5), F7
+	WFMADB	V3, V7, V16, V7
+	VLEG	$0, 24(R5), V16
+	WFMADB	V4, V6, V7, V4
+	WFMADB	V1, V16, V3, V1
+	FMOVD	16(R5), F6
+	FMADD	F4, F1, F6
+	LGDR	F5, R1
+	WORD	$0xB3130066	//lcdbr	%f6,%f6
+	RISBGZ	$57, $60, $3, R1, R3
+	WORD	$0x68432000	//ld	%f4,0(%r3,%r2)
+	FMADD	F4, F1, F1
+	MOVD	$0x4086000000000000, R2
+	FMSUB	F1, F6, F4
+	WORD	$0xB3130044	//lcdbr	%f4,%f4
+	WFCHDBS	V2, V0, V0
+	BEQ	L21
+	ADDW	$0xF000, R1
+	RISBGN	$0, $15, $48, R1, R2
+	LDGR	R2, F0
+	FMADD	F0, F4, F0
+	MOVD	$·expm1x4ff<>+0(SB), R3
+	FMOVD	0(R5), F4
+	FMOVD	0(R3), F2
+	WFMADB	V2, V0, V4, V0
+	FMOVD	F0, ret+8(FP)
+	RET
+L7:
+	MOVD	$·expm1xmone<>+0(SB), R1
+	FMOVD	0(R1), F0
+	FMOVD	F0, ret+8(FP)
+	RET
+L21:
+	ADDW	$0x1000, R1
+	RISBGN	$0, $15, $48, R1, R2
+	LDGR	R2, F0
+	FMADD	F0, F4, F0
+	MOVD	$·expm1x2ff<>+0(SB), R3
+	FMOVD	0(R5), F4
+	FMOVD	0(R3), F2
+	WFMADB	V2, V0, V4, V0
+	FMOVD	F0, ret+8(FP)
+	RET
+LEXITTAGexpm1:
+	FMOVD	F0, ret+8(FP)
+	RET
diff --git a/src/math/export_s390x_test.go b/src/math/export_s390x_test.go
new file mode 100644
index 0000000..827bf1c
--- /dev/null
+++ b/src/math/export_s390x_test.go
@@ -0,0 +1,31 @@
+// Copyright 2016 The Go Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+package math
+
+// Export internal functions and variable for testing.
+var Log10NoVec = log10
+var CosNoVec = cos
+var CoshNoVec = cosh
+var SinNoVec = sin
+var SinhNoVec = sinh
+var TanhNoVec = tanh
+var Log1pNovec = log1p
+var AtanhNovec = atanh
+var AcosNovec = acos
+var AcoshNovec = acosh
+var AsinNovec = asin
+var AsinhNovec = asinh
+var ErfNovec = erf
+var ErfcNovec = erfc
+var AtanNovec = atan
+var Atan2Novec = atan2
+var CbrtNovec = cbrt
+var LogNovec = log
+var TanNovec = tan
+var ExpNovec = exp
+var Expm1Novec = expm1
+var PowNovec = pow
+var HypotNovec = hypot
+var HasVX = hasVX
diff --git a/src/math/export_test.go b/src/math/export_test.go
new file mode 100644
index 0000000..53d9205
--- /dev/null
+++ b/src/math/export_test.go
@@ -0,0 +1,14 @@
+// Copyright 2011 The Go Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+package math
+
+// Export internal functions for testing.
+var ExpGo = exp
+var Exp2Go = exp2
+var HypotGo = hypot
+var SqrtGo = sqrt
+var TrigReduce = trigReduce
+
+const ReduceThreshold = reduceThreshold
diff --git a/src/math/floor.go b/src/math/floor.go
new file mode 100644
index 0000000..cb58564
--- /dev/null
+++ b/src/math/floor.go
@@ -0,0 +1,151 @@
+// Copyright 2009 The Go Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+package math
+
+// Floor returns the greatest integer value less than or equal to x.
+//
+// Special cases are:
+//
+//	Floor(±0) = ±0
+//	Floor(±Inf) = ±Inf
+//	Floor(NaN) = NaN
+func Floor(x float64) float64 {
+	if haveArchFloor {
+		return archFloor(x)
+	}
+	return floor(x)
+}
+
+func floor(x float64) float64 {
+	if x == 0 || IsNaN(x) || IsInf(x, 0) {
+		return x
+	}
+	if x < 0 {
+		d, fract := Modf(-x)
+		if fract != 0.0 {
+			d = d + 1
+		}
+		return -d
+	}
+	d, _ := Modf(x)
+	return d
+}
+
+// Ceil returns the least integer value greater than or equal to x.
+//
+// Special cases are:
+//
+//	Ceil(±0) = ±0
+//	Ceil(±Inf) = ±Inf
+//	Ceil(NaN) = NaN
+func Ceil(x float64) float64 {
+	if haveArchCeil {
+		return archCeil(x)
+	}
+	return ceil(x)
+}
+
+func ceil(x float64) float64 {
+	return -Floor(-x)
+}
+
+// Trunc returns the integer value of x.
+//
+// Special cases are:
+//
+//	Trunc(±0) = ±0
+//	Trunc(±Inf) = ±Inf
+//	Trunc(NaN) = NaN
+func Trunc(x float64) float64 {
+	if haveArchTrunc {
+		return archTrunc(x)
+	}
+	return trunc(x)
+}
+
+func trunc(x float64) float64 {
+	if x == 0 || IsNaN(x) || IsInf(x, 0) {
+		return x
+	}
+	d, _ := Modf(x)
+	return d
+}
+
+// Round returns the nearest integer, rounding half away from zero.
+//
+// Special cases are:
+//
+//	Round(±0) = ±0
+//	Round(±Inf) = ±Inf
+//	Round(NaN) = NaN
+func Round(x float64) float64 {
+	// Round is a faster implementation of:
+	//
+	// func Round(x float64) float64 {
+	//   t := Trunc(x)
+	//   if Abs(x-t) >= 0.5 {
+	//     return t + Copysign(1, x)
+	//   }
+	//   return t
+	// }
+	bits := Float64bits(x)
+	e := uint(bits>>shift) & mask
+	if e < bias {
+		// Round abs(x) < 1 including denormals.
+		bits &= signMask // +-0
+		if e == bias-1 {
+			bits |= uvone // +-1
+		}
+	} else if e < bias+shift {
+		// Round any abs(x) >= 1 containing a fractional component [0,1).
+		//
+		// Numbers with larger exponents are returned unchanged since they
+		// must be either an integer, infinity, or NaN.
+		const half = 1 << (shift - 1)
+		e -= bias
+		bits += half >> e
+		bits &^= fracMask >> e
+	}
+	return Float64frombits(bits)
+}
+
+// RoundToEven returns the nearest integer, rounding ties to even.
+//
+// Special cases are:
+//
+//	RoundToEven(±0) = ±0
+//	RoundToEven(±Inf) = ±Inf
+//	RoundToEven(NaN) = NaN
+func RoundToEven(x float64) float64 {
+	// RoundToEven is a faster implementation of:
+	//
+	// func RoundToEven(x float64) float64 {
+	//   t := math.Trunc(x)
+	//   odd := math.Remainder(t, 2) != 0
+	//   if d := math.Abs(x - t); d > 0.5 || (d == 0.5 && odd) {
+	//     return t + math.Copysign(1, x)
+	//   }
+	//   return t
+	// }
+	bits := Float64bits(x)
+	e := uint(bits>>shift) & mask
+	if e >= bias {
+		// Round abs(x) >= 1.
+		// - Large numbers without fractional components, infinity, and NaN are unchanged.
+		// - Add 0.499.. or 0.5 before truncating depending on whether the truncated
+		//   number is even or odd (respectively).
+		const halfMinusULP = (1 << (shift - 1)) - 1
+		e -= bias
+		bits += (halfMinusULP + (bits>>(shift-e))&1) >> e
+		bits &^= fracMask >> e
+	} else if e == bias-1 && bits&fracMask != 0 {
+		// Round 0.5 < abs(x) < 1.
+		bits = bits&signMask | uvone // +-1
+	} else {
+		// Round abs(x) <= 0.5 including denormals.
+		bits &= signMask // +-0
+	}
+	return Float64frombits(bits)
+}
diff --git a/src/math/floor_386.s b/src/math/floor_386.s
new file mode 100644
index 0000000..1990cb0
--- /dev/null
+++ b/src/math/floor_386.s
@@ -0,0 +1,46 @@
+// Copyright 2010 The Go Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+#include "textflag.h"
+
+// func archCeil(x float64) float64
+TEXT ·archCeil(SB),NOSPLIT,$0
+	FMOVD   x+0(FP), F0  // F0=x
+	FSTCW   -2(SP)       // save old Control Word
+	MOVW    -2(SP), AX
+	ANDW    $0xf3ff, AX
+	ORW     $0x0800, AX  // Rounding Control set to +Inf
+	MOVW    AX, -4(SP)   // store new Control Word
+	FLDCW   -4(SP)       // load new Control Word
+	FRNDINT              // F0=Ceil(x)
+	FLDCW   -2(SP)       // load old Control Word
+	FMOVDP  F0, ret+8(FP)
+	RET
+
+// func archFloor(x float64) float64
+TEXT ·archFloor(SB),NOSPLIT,$0
+	FMOVD   x+0(FP), F0  // F0=x
+	FSTCW   -2(SP)       // save old Control Word
+	MOVW    -2(SP), AX
+	ANDW    $0xf3ff, AX
+	ORW     $0x0400, AX  // Rounding Control set to -Inf
+	MOVW    AX, -4(SP)   // store new Control Word
+	FLDCW   -4(SP)       // load new Control Word
+	FRNDINT              // F0=Floor(x)
+	FLDCW   -2(SP)       // load old Control Word
+	FMOVDP  F0, ret+8(FP)
+	RET
+
+// func archTrunc(x float64) float64
+TEXT ·archTrunc(SB),NOSPLIT,$0
+	FMOVD   x+0(FP), F0  // F0=x
+	FSTCW   -2(SP)       // save old Control Word
+	MOVW    -2(SP), AX
+	ORW     $0x0c00, AX  // Rounding Control set to truncate
+	MOVW    AX, -4(SP)   // store new Control Word
+	FLDCW   -4(SP)       // load new Control Word
+	FRNDINT              // F0=Trunc(x)
+	FLDCW   -2(SP)       // load old Control Word
+	FMOVDP  F0, ret+8(FP)
+	RET
diff --git a/src/math/floor_amd64.s b/src/math/floor_amd64.s
new file mode 100644
index 0000000..0880499
--- /dev/null
+++ b/src/math/floor_amd64.s
@@ -0,0 +1,76 @@
+// Copyright 2012 The Go Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+#include "textflag.h"
+
+#define Big		0x4330000000000000 // 2**52
+
+// func archFloor(x float64) float64
+TEXT ·archFloor(SB),NOSPLIT,$0
+	MOVQ	x+0(FP), AX
+	MOVQ	$~(1<<63), DX // sign bit mask
+	ANDQ	AX,DX // DX = |x|
+	SUBQ	$1,DX
+	MOVQ    $(Big - 1), CX // if |x| >= 2**52-1 or IsNaN(x) or |x| == 0, return x
+	CMPQ	DX,CX
+	JAE     isBig_floor
+	MOVQ	AX, X0 // X0 = x
+	CVTTSD2SQ	X0, AX
+	CVTSQ2SD	AX, X1 // X1 = float(int(x))
+	CMPSD	X1, X0, 1 // compare LT; X0 = 0xffffffffffffffff or 0
+	MOVSD	$(-1.0), X2
+	ANDPD	X2, X0 // if x < float(int(x)) {X0 = -1} else {X0 = 0}
+	ADDSD	X1, X0
+	MOVSD	X0, ret+8(FP)
+	RET
+isBig_floor:
+	MOVQ    AX, ret+8(FP) // return x
+	RET
+
+// func archCeil(x float64) float64
+TEXT ·archCeil(SB),NOSPLIT,$0
+	MOVQ	x+0(FP), AX
+	MOVQ	$~(1<<63), DX // sign bit mask
+	MOVQ	AX, BX // BX = copy of x
+	ANDQ    DX, BX // BX = |x|
+	MOVQ    $Big, CX // if |x| >= 2**52 or IsNaN(x), return x
+	CMPQ    BX, CX
+	JAE     isBig_ceil
+	MOVQ	AX, X0 // X0 = x
+	MOVQ	DX, X2 // X2 = sign bit mask
+	CVTTSD2SQ	X0, AX
+	ANDNPD	X0, X2 // X2 = sign
+	CVTSQ2SD	AX, X1	// X1 = float(int(x))
+	CMPSD	X1, X0, 2 // compare LE; X0 = 0xffffffffffffffff or 0
+	ORPD	X2, X1 // if X1 = 0.0, incorporate sign
+	MOVSD	$1.0, X3
+	ANDNPD	X3, X0
+	ORPD	X2, X0 // if float(int(x)) <= x {X0 = 1} else {X0 = -0}
+	ADDSD	X1, X0
+	MOVSD	X0, ret+8(FP)
+	RET
+isBig_ceil:
+	MOVQ	AX, ret+8(FP)
+	RET
+
+// func archTrunc(x float64) float64
+TEXT ·archTrunc(SB),NOSPLIT,$0
+	MOVQ	x+0(FP), AX
+	MOVQ	$~(1<<63), DX // sign bit mask
+	MOVQ	AX, BX // BX = copy of x
+	ANDQ    DX, BX // BX = |x|
+	MOVQ    $Big, CX // if |x| >= 2**52 or IsNaN(x), return x
+	CMPQ    BX, CX
+	JAE     isBig_trunc
+	MOVQ	AX, X0
+	MOVQ	DX, X2 // X2 = sign bit mask
+	CVTTSD2SQ	X0, AX
+	ANDNPD	X0, X2 // X2 = sign
+	CVTSQ2SD	AX, X0 // X0 = float(int(x))
+	ORPD	X2, X0 // if X0 = 0.0, incorporate sign
+	MOVSD	X0, ret+8(FP)
+	RET
+isBig_trunc:
+	MOVQ    AX, ret+8(FP) // return x
+	RET
diff --git a/src/math/floor_arm64.s b/src/math/floor_arm64.s
new file mode 100644
index 0000000..d9c5df7
--- /dev/null
+++ b/src/math/floor_arm64.s
@@ -0,0 +1,26 @@
+// Copyright 2016 The Go Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+#include "textflag.h"
+
+// func archFloor(x float64) float64
+TEXT ·archFloor(SB),NOSPLIT,$0
+	FMOVD	x+0(FP), F0
+	FRINTMD	F0, F0
+	FMOVD	F0, ret+8(FP)
+	RET
+
+// func archCeil(x float64) float64
+TEXT ·archCeil(SB),NOSPLIT,$0
+	FMOVD	x+0(FP), F0
+	FRINTPD	F0, F0
+	FMOVD	F0, ret+8(FP)
+	RET
+
+// func archTrunc(x float64) float64
+TEXT ·archTrunc(SB),NOSPLIT,$0
+	FMOVD	x+0(FP), F0
+	FRINTZD	F0, F0
+	FMOVD	F0, ret+8(FP)
+	RET
diff --git a/src/math/floor_asm.go b/src/math/floor_asm.go
new file mode 100644
index 0000000..fb419d6
--- /dev/null
+++ b/src/math/floor_asm.go
@@ -0,0 +1,19 @@
+// Copyright 2021 The Go Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+//go:build 386 || amd64 || arm64 || ppc64 || ppc64le || s390x || wasm
+
+package math
+
+const haveArchFloor = true
+
+func archFloor(x float64) float64
+
+const haveArchCeil = true
+
+func archCeil(x float64) float64
+
+const haveArchTrunc = true
+
+func archTrunc(x float64) float64
diff --git a/src/math/floor_noasm.go b/src/math/floor_noasm.go
new file mode 100644
index 0000000..5641c7e
--- /dev/null
+++ b/src/math/floor_noasm.go
@@ -0,0 +1,25 @@
+// Copyright 2021 The Go Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+//go:build !386 && !amd64 && !arm64 && !ppc64 && !ppc64le && !s390x && !wasm
+
+package math
+
+const haveArchFloor = false
+
+func archFloor(x float64) float64 {
+	panic("not implemented")
+}
+
+const haveArchCeil = false
+
+func archCeil(x float64) float64 {
+	panic("not implemented")
+}
+
+const haveArchTrunc = false
+
+func archTrunc(x float64) float64 {
+	panic("not implemented")
+}
diff --git a/src/math/floor_ppc64x.s b/src/math/floor_ppc64x.s
new file mode 100644
index 0000000..584c27e
--- /dev/null
+++ b/src/math/floor_ppc64x.s
@@ -0,0 +1,26 @@
+// Copyright 2016 The Go Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+//go:build ppc64 || ppc64le
+// +build ppc64 ppc64le
+
+#include "textflag.h"
+
+TEXT ·archFloor(SB),NOSPLIT,$0
+	FMOVD   x+0(FP), F0
+	FRIM	F0, F0
+	FMOVD   F0, ret+8(FP)
+	RET
+
+TEXT ·archCeil(SB),NOSPLIT,$0
+	FMOVD   x+0(FP), F0
+	FRIP    F0, F0
+	FMOVD	F0, ret+8(FP)
+	RET
+
+TEXT ·archTrunc(SB),NOSPLIT,$0
+	FMOVD   x+0(FP), F0
+	FRIZ    F0, F0
+	FMOVD   F0, ret+8(FP)
+	RET
diff --git a/src/math/floor_s390x.s b/src/math/floor_s390x.s
new file mode 100644
index 0000000..b5dd462
--- /dev/null
+++ b/src/math/floor_s390x.s
@@ -0,0 +1,26 @@
+// Copyright 2016 The Go Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+#include "textflag.h"
+
+// func archFloor(x float64) float64
+TEXT ·archFloor(SB),NOSPLIT,$0
+	FMOVD	x+0(FP), F0
+	FIDBR	$7, F0, F0
+	FMOVD	F0, ret+8(FP)
+	RET
+
+// func archCeil(x float64) float64
+TEXT ·archCeil(SB),NOSPLIT,$0
+	FMOVD	x+0(FP), F0
+	FIDBR	$6, F0, F0
+	FMOVD	F0, ret+8(FP)
+	RET
+
+// func archTrunc(x float64) float64
+TEXT ·archTrunc(SB),NOSPLIT,$0
+	FMOVD	x+0(FP), F0
+	FIDBR	$5, F0, F0
+	FMOVD	F0, ret+8(FP)
+	RET
diff --git a/src/math/floor_wasm.s b/src/math/floor_wasm.s
new file mode 100644
index 0000000..3751471
--- /dev/null
+++ b/src/math/floor_wasm.s
@@ -0,0 +1,26 @@
+// Copyright 2018 The Go Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+#include "textflag.h"
+
+TEXT ·archFloor(SB),NOSPLIT,$0
+	Get SP
+	F64Load x+0(FP)
+	F64Floor
+	F64Store ret+8(FP)
+	RET
+
+TEXT ·archCeil(SB),NOSPLIT,$0
+	Get SP
+	F64Load x+0(FP)
+	F64Ceil
+	F64Store ret+8(FP)
+	RET
+
+TEXT ·archTrunc(SB),NOSPLIT,$0
+	Get SP
+	F64Load x+0(FP)
+	F64Trunc
+	F64Store ret+8(FP)
+	RET
diff --git a/src/math/fma.go b/src/math/fma.go
new file mode 100644
index 0000000..ba03fbe
--- /dev/null
+++ b/src/math/fma.go
@@ -0,0 +1,175 @@
+// Copyright 2019 The Go Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+package math
+
+import "math/bits"
+
+func zero(x uint64) uint64 {
+	if x == 0 {
+		return 1
+	}
+	return 0
+	// branchless:
+	// return ((x>>1 | x&1) - 1) >> 63
+}
+
+func nonzero(x uint64) uint64 {
+	if x != 0 {
+		return 1
+	}
+	return 0
+	// branchless:
+	// return 1 - ((x>>1|x&1)-1)>>63
+}
+
+func shl(u1, u2 uint64, n uint) (r1, r2 uint64) {
+	r1 = u1<<n | u2>>(64-n) | u2<<(n-64)
+	r2 = u2 << n
+	return
+}
+
+func shr(u1, u2 uint64, n uint) (r1, r2 uint64) {
+	r2 = u2>>n | u1<<(64-n) | u1>>(n-64)
+	r1 = u1 >> n
+	return
+}
+
+// shrcompress compresses the bottom n+1 bits of the two-word
+// value into a single bit. the result is equal to the value
+// shifted to the right by n, except the result's 0th bit is
+// set to the bitwise OR of the bottom n+1 bits.
+func shrcompress(u1, u2 uint64, n uint) (r1, r2 uint64) {
+	// TODO: Performance here is really sensitive to the
+	// order/placement of these branches. n == 0 is common
+	// enough to be in the fast path. Perhaps more measurement
+	// needs to be done to find the optimal order/placement?
+	switch {
+	case n == 0:
+		return u1, u2
+	case n == 64:
+		return 0, u1 | nonzero(u2)
+	case n >= 128:
+		return 0, nonzero(u1 | u2)
+	case n < 64:
+		r1, r2 = shr(u1, u2, n)
+		r2 |= nonzero(u2 & (1<<n - 1))
+	case n < 128:
+		r1, r2 = shr(u1, u2, n)
+		r2 |= nonzero(u1&(1<<(n-64)-1) | u2)
+	}
+	return
+}
+
+func lz(u1, u2 uint64) (l int32) {
+	l = int32(bits.LeadingZeros64(u1))
+	if l == 64 {
+		l += int32(bits.LeadingZeros64(u2))
+	}
+	return l
+}
+
+// split splits b into sign, biased exponent, and mantissa.
+// It adds the implicit 1 bit to the mantissa for normal values,
+// and normalizes subnormal values.
+func split(b uint64) (sign uint32, exp int32, mantissa uint64) {
+	sign = uint32(b >> 63)
+	exp = int32(b>>52) & mask
+	mantissa = b & fracMask
+
+	if exp == 0 {
+		// Normalize value if subnormal.
+		shift := uint(bits.LeadingZeros64(mantissa) - 11)
+		mantissa <<= shift
+		exp = 1 - int32(shift)
+	} else {
+		// Add implicit 1 bit
+		mantissa |= 1 << 52
+	}
+	return
+}
+
+// FMA returns x * y + z, computed with only one rounding.
+// (That is, FMA returns the fused multiply-add of x, y, and z.)
+func FMA(x, y, z float64) float64 {
+	bx, by, bz := Float64bits(x), Float64bits(y), Float64bits(z)
+
+	// Inf or NaN or zero involved. At most one rounding will occur.
+	if x == 0.0 || y == 0.0 || z == 0.0 || bx&uvinf == uvinf || by&uvinf == uvinf {
+		return x*y + z
+	}
+	// Handle non-finite z separately. Evaluating x*y+z where
+	// x and y are finite, but z is infinite, should always result in z.
+	if bz&uvinf == uvinf {
+		return z
+	}
+
+	// Inputs are (sub)normal.
+	// Split x, y, z into sign, exponent, mantissa.
+	xs, xe, xm := split(bx)
+	ys, ye, ym := split(by)
+	zs, ze, zm := split(bz)
+
+	// Compute product p = x*y as sign, exponent, two-word mantissa.
+	// Start with exponent. "is normal" bit isn't subtracted yet.
+	pe := xe + ye - bias + 1
+
+	// pm1:pm2 is the double-word mantissa for the product p.
+	// Shift left to leave top bit in product. Effectively
+	// shifts the 106-bit product to the left by 21.
+	pm1, pm2 := bits.Mul64(xm<<10, ym<<11)
+	zm1, zm2 := zm<<10, uint64(0)
+	ps := xs ^ ys // product sign
+
+	// normalize to 62nd bit
+	is62zero := uint((^pm1 >> 62) & 1)
+	pm1, pm2 = shl(pm1, pm2, is62zero)
+	pe -= int32(is62zero)
+
+	// Swap addition operands so |p| >= |z|
+	if pe < ze || pe == ze && pm1 < zm1 {
+		ps, pe, pm1, pm2, zs, ze, zm1, zm2 = zs, ze, zm1, zm2, ps, pe, pm1, pm2
+	}
+
+	// Special case: if p == -z the result is always +0 since neither operand is zero.
+	if ps != zs && pe == ze && pm1 == zm1 && pm2 == zm2 {
+		return 0
+	}
+
+	// Align significands
+	zm1, zm2 = shrcompress(zm1, zm2, uint(pe-ze))
+
+	// Compute resulting significands, normalizing if necessary.
+	var m, c uint64
+	if ps == zs {
+		// Adding (pm1:pm2) + (zm1:zm2)
+		pm2, c = bits.Add64(pm2, zm2, 0)
+		pm1, _ = bits.Add64(pm1, zm1, c)
+		pe -= int32(^pm1 >> 63)
+		pm1, m = shrcompress(pm1, pm2, uint(64+pm1>>63))
+	} else {
+		// Subtracting (pm1:pm2) - (zm1:zm2)
+		// TODO: should we special-case cancellation?
+		pm2, c = bits.Sub64(pm2, zm2, 0)
+		pm1, _ = bits.Sub64(pm1, zm1, c)
+		nz := lz(pm1, pm2)
+		pe -= nz
+		m, pm2 = shl(pm1, pm2, uint(nz-1))
+		m |= nonzero(pm2)
+	}
+
+	// Round and break ties to even
+	if pe > 1022+bias || pe == 1022+bias && (m+1<<9)>>63 == 1 {
+		// rounded value overflows exponent range
+		return Float64frombits(uint64(ps)<<63 | uvinf)
+	}
+	if pe < 0 {
+		n := uint(-pe)
+		m = m>>n | nonzero(m&(1<<n-1))
+		pe = 0
+	}
+	m = ((m + 1<<9) >> 10) & ^zero((m&(1<<10-1))^1<<9)
+	pe &= -int32(nonzero(m))
+	return Float64frombits(uint64(ps)<<63 + uint64(pe)<<52 + m)
+}
diff --git a/src/math/frexp.go b/src/math/frexp.go
new file mode 100644
index 0000000..e194947
--- /dev/null
+++ b/src/math/frexp.go
@@ -0,0 +1,39 @@
+// Copyright 2009 The Go Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+package math
+
+// Frexp breaks f into a normalized fraction
+// and an integral power of two.
+// It returns frac and exp satisfying f == frac × 2**exp,
+// with the absolute value of frac in the interval [½, 1).
+//
+// Special cases are:
+//
+//	Frexp(±0) = ±0, 0
+//	Frexp(±Inf) = ±Inf, 0
+//	Frexp(NaN) = NaN, 0
+func Frexp(f float64) (frac float64, exp int) {
+	if haveArchFrexp {
+		return archFrexp(f)
+	}
+	return frexp(f)
+}
+
+func frexp(f float64) (frac float64, exp int) {
+	// special cases
+	switch {
+	case f == 0:
+		return f, 0 // correctly return -0
+	case IsInf(f, 0) || IsNaN(f):
+		return f, 0
+	}
+	f, exp = normalize(f)
+	x := Float64bits(f)
+	exp += int((x>>shift)&mask) - bias + 1
+	x &^= mask << shift
+	x |= (-1 + bias) << shift
+	frac = Float64frombits(x)
+	return
+}
diff --git a/src/math/gamma.go b/src/math/gamma.go
new file mode 100644
index 0000000..86c6723
--- /dev/null
+++ b/src/math/gamma.go
@@ -0,0 +1,222 @@
+// Copyright 2010 The Go Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+package math
+
+// The original C code, the long comment, and the constants
+// below are from http://netlib.sandia.gov/cephes/cprob/gamma.c.
+// The go code is a simplified version of the original C.
+//
+//      tgamma.c
+//
+//      Gamma function
+//
+// SYNOPSIS:
+//
+// double x, y, tgamma();
+// extern int signgam;
+//
+// y = tgamma( x );
+//
+// DESCRIPTION:
+//
+// Returns gamma function of the argument. The result is
+// correctly signed, and the sign (+1 or -1) is also
+// returned in a global (extern) variable named signgam.
+// This variable is also filled in by the logarithmic gamma
+// function lgamma().
+//
+// Arguments |x| <= 34 are reduced by recurrence and the function
+// approximated by a rational function of degree 6/7 in the
+// interval (2,3).  Large arguments are handled by Stirling's
+// formula. Large negative arguments are made positive using
+// a reflection formula.
+//
+// ACCURACY:
+//
+//                      Relative error:
+// arithmetic   domain     # trials      peak         rms
+//    DEC      -34, 34      10000       1.3e-16     2.5e-17
+//    IEEE    -170,-33      20000       2.3e-15     3.3e-16
+//    IEEE     -33,  33     20000       9.4e-16     2.2e-16
+//    IEEE      33, 171.6   20000       2.3e-15     3.2e-16
+//
+// Error for arguments outside the test range will be larger
+// owing to error amplification by the exponential function.
+//
+// Cephes Math Library Release 2.8:  June, 2000
+// Copyright 1984, 1987, 1989, 1992, 2000 by Stephen L. Moshier
+//
+// The readme file at http://netlib.sandia.gov/cephes/ says:
+//    Some software in this archive may be from the book _Methods and
+// Programs for Mathematical Functions_ (Prentice-Hall or Simon & Schuster
+// International, 1989) or from the Cephes Mathematical Library, a
+// commercial product. In either event, it is copyrighted by the author.
+// What you see here may be used freely but it comes with no support or
+// guarantee.
+//
+//   The two known misprints in the book are repaired here in the
+// source listings for the gamma function and the incomplete beta
+// integral.
+//
+//   Stephen L. Moshier
+//   moshier@na-net.ornl.gov
+
+var _gamP = [...]float64{
+	1.60119522476751861407e-04,
+	1.19135147006586384913e-03,
+	1.04213797561761569935e-02,
+	4.76367800457137231464e-02,
+	2.07448227648435975150e-01,
+	4.94214826801497100753e-01,
+	9.99999999999999996796e-01,
+}
+var _gamQ = [...]float64{
+	-2.31581873324120129819e-05,
+	5.39605580493303397842e-04,
+	-4.45641913851797240494e-03,
+	1.18139785222060435552e-02,
+	3.58236398605498653373e-02,
+	-2.34591795718243348568e-01,
+	7.14304917030273074085e-02,
+	1.00000000000000000320e+00,
+}
+var _gamS = [...]float64{
+	7.87311395793093628397e-04,
+	-2.29549961613378126380e-04,
+	-2.68132617805781232825e-03,
+	3.47222221605458667310e-03,
+	8.33333333333482257126e-02,
+}
+
+// Gamma function computed by Stirling's formula.
+// The pair of results must be multiplied together to get the actual answer.
+// The multiplication is left to the caller so that, if careful, the caller can avoid
+// infinity for 172 <= x <= 180.
+// The polynomial is valid for 33 <= x <= 172; larger values are only used
+// in reciprocal and produce denormalized floats. The lower precision there
+// masks any imprecision in the polynomial.
+func stirling(x float64) (float64, float64) {
+	if x > 200 {
+		return Inf(1), 1
+	}
+	const (
+		SqrtTwoPi   = 2.506628274631000502417
+		MaxStirling = 143.01608
+	)
+	w := 1 / x
+	w = 1 + w*((((_gamS[0]*w+_gamS[1])*w+_gamS[2])*w+_gamS[3])*w+_gamS[4])
+	y1 := Exp(x)
+	y2 := 1.0
+	if x > MaxStirling { // avoid Pow() overflow
+		v := Pow(x, 0.5*x-0.25)
+		y1, y2 = v, v/y1
+	} else {
+		y1 = Pow(x, x-0.5) / y1
+	}
+	return y1, SqrtTwoPi * w * y2
+}
+
+// Gamma returns the Gamma function of x.
+//
+// Special cases are:
+//
+//	Gamma(+Inf) = +Inf
+//	Gamma(+0) = +Inf
+//	Gamma(-0) = -Inf
+//	Gamma(x) = NaN for integer x < 0
+//	Gamma(-Inf) = NaN
+//	Gamma(NaN) = NaN
+func Gamma(x float64) float64 {
+	const Euler = 0.57721566490153286060651209008240243104215933593992 // A001620
+	// special cases
+	switch {
+	case isNegInt(x) || IsInf(x, -1) || IsNaN(x):
+		return NaN()
+	case IsInf(x, 1):
+		return Inf(1)
+	case x == 0:
+		if Signbit(x) {
+			return Inf(-1)
+		}
+		return Inf(1)
+	}
+	q := Abs(x)
+	p := Floor(q)
+	if q > 33 {
+		if x >= 0 {
+			y1, y2 := stirling(x)
+			return y1 * y2
+		}
+		// Note: x is negative but (checked above) not a negative integer,
+		// so x must be small enough to be in range for conversion to int64.
+		// If |x| were >= 2⁶³ it would have to be an integer.
+		signgam := 1
+		if ip := int64(p); ip&1 == 0 {
+			signgam = -1
+		}
+		z := q - p
+		if z > 0.5 {
+			p = p + 1
+			z = q - p
+		}
+		z = q * Sin(Pi*z)
+		if z == 0 {
+			return Inf(signgam)
+		}
+		sq1, sq2 := stirling(q)
+		absz := Abs(z)
+		d := absz * sq1 * sq2
+		if IsInf(d, 0) {
+			z = Pi / absz / sq1 / sq2
+		} else {
+			z = Pi / d
+		}
+		return float64(signgam) * z
+	}
+
+	// Reduce argument
+	z := 1.0
+	for x >= 3 {
+		x = x - 1
+		z = z * x
+	}
+	for x < 0 {
+		if x > -1e-09 {
+			goto small
+		}
+		z = z / x
+		x = x + 1
+	}
+	for x < 2 {
+		if x < 1e-09 {
+			goto small
+		}
+		z = z / x
+		x = x + 1
+	}
+
+	if x == 2 {
+		return z
+	}
+
+	x = x - 2
+	p = (((((x*_gamP[0]+_gamP[1])*x+_gamP[2])*x+_gamP[3])*x+_gamP[4])*x+_gamP[5])*x + _gamP[6]
+	q = ((((((x*_gamQ[0]+_gamQ[1])*x+_gamQ[2])*x+_gamQ[3])*x+_gamQ[4])*x+_gamQ[5])*x+_gamQ[6])*x + _gamQ[7]
+	return z * p / q
+
+small:
+	if x == 0 {
+		return Inf(1)
+	}
+	return z / ((1 + Euler*x) * x)
+}
+
+func isNegInt(x float64) bool {
+	if x < 0 {
+		_, xf := Modf(x)
+		return xf == 0
+	}
+	return false
+}
diff --git a/src/math/huge_test.go b/src/math/huge_test.go
new file mode 100644
index 0000000..568b0c8
--- /dev/null
+++ b/src/math/huge_test.go
@@ -0,0 +1,131 @@
+// Copyright 2018 The Go Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+// Disabled for s390x because it uses assembly routines that are not
+// accurate for huge arguments.
+
+//go:build !s390x
+
+package math_test
+
+import (
+	. "math"
+	"testing"
+)
+
+// Inputs to test trig_reduce
+var trigHuge = []float64{
+	1 << 28,
+	1 << 29,
+	1 << 30,
+	1 << 35,
+	1 << 120,
+	1 << 240,
+	1 << 480,
+	1234567891234567 << 180,
+	1234567891234567 << 300,
+	MaxFloat64,
+}
+
+// Results for trigHuge[i] calculated with https://github.com/robpike/ivy
+// using 4096 bits of working precision.   Values requiring less than
+// 102 decimal digits (1 << 120, 1 << 240, 1 << 480, 1234567891234567 << 180)
+// were confirmed via https://keisan.casio.com/
+var cosHuge = []float64{
+	-0.16556897949057876,
+	-0.94517382606089662,
+	0.78670712294118812,
+	-0.76466301249635305,
+	-0.92587902285483787,
+	0.93601042593353793,
+	-0.28282777640193788,
+	-0.14616431394103619,
+	-0.79456058210671406,
+	-0.99998768942655994,
+}
+
+var sinHuge = []float64{
+	-0.98619821183697566,
+	0.32656766301856334,
+	-0.61732641504604217,
+	-0.64443035102329113,
+	0.37782010936075202,
+	-0.35197227524865778,
+	0.95917070894368716,
+	0.98926032637023618,
+	-0.60718488235646949,
+	0.00496195478918406,
+}
+
+var tanHuge = []float64{
+	5.95641897939639421,
+	-0.34551069233430392,
+	-0.78469661331920043,
+	0.84276385870875983,
+	-0.40806638884180424,
+	-0.37603456702698076,
+	-3.39135965054779932,
+	-6.76813854009065030,
+	0.76417695016604922,
+	-0.00496201587444489,
+}
+
+// Check that trig values of huge angles return accurate results.
+// This confirms that argument reduction works for very large values
+// up to MaxFloat64.
+func TestHugeCos(t *testing.T) {
+	for i := 0; i < len(trigHuge); i++ {
+		f1 := cosHuge[i]
+		f2 := Cos(trigHuge[i])
+		if !close(f1, f2) {
+			t.Errorf("Cos(%g) = %g, want %g", trigHuge[i], f2, f1)
+		}
+		f3 := Cos(-trigHuge[i])
+		if !close(f1, f3) {
+			t.Errorf("Cos(%g) = %g, want %g", -trigHuge[i], f3, f1)
+		}
+	}
+}
+
+func TestHugeSin(t *testing.T) {
+	for i := 0; i < len(trigHuge); i++ {
+		f1 := sinHuge[i]
+		f2 := Sin(trigHuge[i])
+		if !close(f1, f2) {
+			t.Errorf("Sin(%g) = %g, want %g", trigHuge[i], f2, f1)
+		}
+		f3 := Sin(-trigHuge[i])
+		if !close(-f1, f3) {
+			t.Errorf("Sin(%g) = %g, want %g", -trigHuge[i], f3, -f1)
+		}
+	}
+}
+
+func TestHugeSinCos(t *testing.T) {
+	for i := 0; i < len(trigHuge); i++ {
+		f1, g1 := sinHuge[i], cosHuge[i]
+		f2, g2 := Sincos(trigHuge[i])
+		if !close(f1, f2) || !close(g1, g2) {
+			t.Errorf("Sincos(%g) = %g, %g, want %g, %g", trigHuge[i], f2, g2, f1, g1)
+		}
+		f3, g3 := Sincos(-trigHuge[i])
+		if !close(-f1, f3) || !close(g1, g3) {
+			t.Errorf("Sincos(%g) = %g, %g, want %g, %g", -trigHuge[i], f3, g3, -f1, g1)
+		}
+	}
+}
+
+func TestHugeTan(t *testing.T) {
+	for i := 0; i < len(trigHuge); i++ {
+		f1 := tanHuge[i]
+		f2 := Tan(trigHuge[i])
+		if !close(f1, f2) {
+			t.Errorf("Tan(%g) = %g, want %g", trigHuge[i], f2, f1)
+		}
+		f3 := Tan(-trigHuge[i])
+		if !close(-f1, f3) {
+			t.Errorf("Tan(%g) = %g, want %g", -trigHuge[i], f3, -f1)
+		}
+	}
+}
diff --git a/src/math/hypot.go b/src/math/hypot.go
new file mode 100644
index 0000000..6ae70c1
--- /dev/null
+++ b/src/math/hypot.go
@@ -0,0 +1,44 @@
+// Copyright 2010 The Go Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+package math
+
+/*
+	Hypot -- sqrt(p*p + q*q), but overflows only if the result does.
+*/
+
+// Hypot returns Sqrt(p*p + q*q), taking care to avoid
+// unnecessary overflow and underflow.
+//
+// Special cases are:
+//
+//	Hypot(±Inf, q) = +Inf
+//	Hypot(p, ±Inf) = +Inf
+//	Hypot(NaN, q) = NaN
+//	Hypot(p, NaN) = NaN
+func Hypot(p, q float64) float64 {
+	if haveArchHypot {
+		return archHypot(p, q)
+	}
+	return hypot(p, q)
+}
+
+func hypot(p, q float64) float64 {
+	p, q = Abs(p), Abs(q)
+	// special cases
+	switch {
+	case IsInf(p, 1) || IsInf(q, 1):
+		return Inf(1)
+	case IsNaN(p) || IsNaN(q):
+		return NaN()
+	}
+	if p < q {
+		p, q = q, p
+	}
+	if p == 0 {
+		return 0
+	}
+	q = q / p
+	return p * Sqrt(1+q*q)
+}
diff --git a/src/math/hypot_386.s b/src/math/hypot_386.s
new file mode 100644
index 0000000..80a8fd3
--- /dev/null
+++ b/src/math/hypot_386.s
@@ -0,0 +1,59 @@
+// Copyright 2010 The Go Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+#include "textflag.h"
+
+// func archHypot(p, q float64) float64
+TEXT ·archHypot(SB),NOSPLIT,$0
+// test bits for not-finite
+	MOVL    p_hi+4(FP), AX   // high word p
+	ANDL    $0x7ff00000, AX
+	CMPL    AX, $0x7ff00000
+	JEQ     not_finite
+	MOVL    q_hi+12(FP), AX   // high word q
+	ANDL    $0x7ff00000, AX
+	CMPL    AX, $0x7ff00000
+	JEQ     not_finite
+	FMOVD   p+0(FP), F0  // F0=p
+	FABS                 // F0=|p|
+	FMOVD   q+8(FP), F0  // F0=q, F1=|p|
+	FABS                 // F0=|q|, F1=|p|
+	FUCOMI  F0, F1       // compare F0 to F1
+	JCC     2(PC)        // jump if F0 >= F1
+	FXCHD   F0, F1       // F0=|p| (larger), F1=|q| (smaller)
+	FTST                 // compare F0 to 0
+	FSTSW	AX
+	ANDW    $0x4000, AX
+	JNE     10(PC)       // jump if F0 = 0
+	FXCHD   F0, F1       // F0=q (smaller), F1=p (larger)
+	FDIVD   F1, F0       // F0=q(=q/p), F1=p
+	FMULD   F0, F0       // F0=q*q, F1=p
+	FLD1                 // F0=1, F1=q*q, F2=p
+	FADDDP  F0, F1       // F0=1+q*q, F1=p
+	FSQRT                // F0=sqrt(1+q*q), F1=p
+	FMULDP  F0, F1       // F0=p*sqrt(1+q*q)
+	FMOVDP  F0, ret+16(FP)
+	RET
+	FMOVDP  F0, F1       // F0=0
+	FMOVDP  F0, ret+16(FP)
+	RET
+not_finite:
+// test bits for -Inf or +Inf
+	MOVL    p_hi+4(FP), AX  // high word p
+	ORL     p_lo+0(FP), AX  // low word p
+	ANDL    $0x7fffffff, AX
+	CMPL    AX, $0x7ff00000
+	JEQ     is_inf
+	MOVL    q_hi+12(FP), AX  // high word q
+	ORL     q_lo+8(FP), AX   // low word q
+	ANDL    $0x7fffffff, AX
+	CMPL    AX, $0x7ff00000
+	JEQ     is_inf
+	MOVL    $0x7ff80000, ret_hi+20(FP)  // return NaN = 0x7FF8000000000001
+	MOVL    $0x00000001, ret_lo+16(FP)
+	RET
+is_inf:
+	MOVL    AX, ret_hi+20(FP)  // return +Inf = 0x7FF0000000000000
+	MOVL    $0x00000000, ret_lo+16(FP)
+	RET
diff --git a/src/math/hypot_amd64.s b/src/math/hypot_amd64.s
new file mode 100644
index 0000000..fe326c9
--- /dev/null
+++ b/src/math/hypot_amd64.s
@@ -0,0 +1,52 @@
+// Copyright 2010 The Go Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+#include "textflag.h"
+
+#define PosInf 0x7FF0000000000000
+#define NaN 0x7FF8000000000001
+
+// func archHypot(p, q float64) float64
+TEXT ·archHypot(SB),NOSPLIT,$0
+	// test bits for special cases
+	MOVQ    p+0(FP), BX
+	MOVQ    $~(1<<63), AX
+	ANDQ    AX, BX // p = |p|
+	MOVQ    q+8(FP), CX
+	ANDQ    AX, CX // q = |q|
+	MOVQ    $PosInf, AX
+	CMPQ    AX, BX
+	JLE     isInfOrNaN
+	CMPQ    AX, CX
+	JLE     isInfOrNaN
+	// hypot = max * sqrt(1 + (min/max)**2)
+	MOVQ    BX, X0
+	MOVQ    CX, X1
+	ORQ     CX, BX
+	JEQ     isZero
+	MOVAPD  X0, X2
+	MAXSD   X1, X0
+	MINSD   X2, X1
+	DIVSD   X0, X1
+	MULSD   X1, X1
+	ADDSD   $1.0, X1
+	SQRTSD  X1, X1
+	MULSD   X1, X0
+	MOVSD   X0, ret+16(FP)
+	RET
+isInfOrNaN:
+	CMPQ    AX, BX
+	JEQ     isInf
+	CMPQ    AX, CX
+	JEQ     isInf
+	MOVQ    $NaN, AX
+	MOVQ    AX, ret+16(FP) // return NaN
+	RET
+isInf:
+	MOVQ    AX, ret+16(FP) // return +Inf
+	RET
+isZero:
+	MOVQ    $0, AX
+	MOVQ    AX, ret+16(FP) // return 0
+	RET
diff --git a/src/math/hypot_asm.go b/src/math/hypot_asm.go
new file mode 100644
index 0000000..8526910
--- /dev/null
+++ b/src/math/hypot_asm.go
@@ -0,0 +1,11 @@
+// Copyright 2021 The Go Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+//go:build 386 || amd64
+
+package math
+
+const haveArchHypot = true
+
+func archHypot(p, q float64) float64
diff --git a/src/math/hypot_noasm.go b/src/math/hypot_noasm.go
new file mode 100644
index 0000000..8b64812
--- /dev/null
+++ b/src/math/hypot_noasm.go
@@ -0,0 +1,13 @@
+// Copyright 2021 The Go Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+//go:build !386 && !amd64
+
+package math
+
+const haveArchHypot = false
+
+func archHypot(p, q float64) float64 {
+	panic("not implemented")
+}
diff --git a/src/math/j0.go b/src/math/j0.go
new file mode 100644
index 0000000..a311e18
--- /dev/null
+++ b/src/math/j0.go
@@ -0,0 +1,429 @@
+// Copyright 2010 The Go Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+package math
+
+/*
+	Bessel function of the first and second kinds of order zero.
+*/
+
+// The original C code and the long comment below are
+// from FreeBSD's /usr/src/lib/msun/src/e_j0.c and
+// came with this notice. The go code is a simplified
+// version of the original C.
+//
+// ====================================================
+// Copyright (C) 1993 by Sun Microsystems, Inc. All rights reserved.
+//
+// Developed at SunPro, a Sun Microsystems, Inc. business.
+// Permission to use, copy, modify, and distribute this
+// software is freely granted, provided that this notice
+// is preserved.
+// ====================================================
+//
+// __ieee754_j0(x), __ieee754_y0(x)
+// Bessel function of the first and second kinds of order zero.
+// Method -- j0(x):
+//      1. For tiny x, we use j0(x) = 1 - x**2/4 + x**4/64 - ...
+//      2. Reduce x to |x| since j0(x)=j0(-x),  and
+//         for x in (0,2)
+//              j0(x) = 1-z/4+ z**2*R0/S0,  where z = x*x;
+//         (precision:  |j0-1+z/4-z**2R0/S0 |<2**-63.67 )
+//         for x in (2,inf)
+//              j0(x) = sqrt(2/(pi*x))*(p0(x)*cos(x0)-q0(x)*sin(x0))
+//         where x0 = x-pi/4. It is better to compute sin(x0),cos(x0)
+//         as follow:
+//              cos(x0) = cos(x)cos(pi/4)+sin(x)sin(pi/4)
+//                      = 1/sqrt(2) * (cos(x) + sin(x))
+//              sin(x0) = sin(x)cos(pi/4)-cos(x)sin(pi/4)
+//                      = 1/sqrt(2) * (sin(x) - cos(x))
+//         (To avoid cancellation, use
+//              sin(x) +- cos(x) = -cos(2x)/(sin(x) -+ cos(x))
+//         to compute the worse one.)
+//
+//      3 Special cases
+//              j0(nan)= nan
+//              j0(0) = 1
+//              j0(inf) = 0
+//
+// Method -- y0(x):
+//      1. For x<2.
+//         Since
+//              y0(x) = 2/pi*(j0(x)*(ln(x/2)+Euler) + x**2/4 - ...)
+//         therefore y0(x)-2/pi*j0(x)*ln(x) is an even function.
+//         We use the following function to approximate y0,
+//              y0(x) = U(z)/V(z) + (2/pi)*(j0(x)*ln(x)), z= x**2
+//         where
+//              U(z) = u00 + u01*z + ... + u06*z**6
+//              V(z) = 1  + v01*z + ... + v04*z**4
+//         with absolute approximation error bounded by 2**-72.
+//         Note: For tiny x, U/V = u0 and j0(x)~1, hence
+//              y0(tiny) = u0 + (2/pi)*ln(tiny), (choose tiny<2**-27)
+//      2. For x>=2.
+//              y0(x) = sqrt(2/(pi*x))*(p0(x)*cos(x0)+q0(x)*sin(x0))
+//         where x0 = x-pi/4. It is better to compute sin(x0),cos(x0)
+//         by the method mentioned above.
+//      3. Special cases: y0(0)=-inf, y0(x<0)=NaN, y0(inf)=0.
+//
+
+// J0 returns the order-zero Bessel function of the first kind.
+//
+// Special cases are:
+//
+//	J0(±Inf) = 0
+//	J0(0) = 1
+//	J0(NaN) = NaN
+func J0(x float64) float64 {
+	const (
+		Huge   = 1e300
+		TwoM27 = 1.0 / (1 << 27) // 2**-27 0x3e40000000000000
+		TwoM13 = 1.0 / (1 << 13) // 2**-13 0x3f20000000000000
+		Two129 = 1 << 129        // 2**129 0x4800000000000000
+		// R0/S0 on [0, 2]
+		R02 = 1.56249999999999947958e-02  // 0x3F8FFFFFFFFFFFFD
+		R03 = -1.89979294238854721751e-04 // 0xBF28E6A5B61AC6E9
+		R04 = 1.82954049532700665670e-06  // 0x3EBEB1D10C503919
+		R05 = -4.61832688532103189199e-09 // 0xBE33D5E773D63FCE
+		S01 = 1.56191029464890010492e-02  // 0x3F8FFCE882C8C2A4
+		S02 = 1.16926784663337450260e-04  // 0x3F1EA6D2DD57DBF4
+		S03 = 5.13546550207318111446e-07  // 0x3EA13B54CE84D5A9
+		S04 = 1.16614003333790000205e-09  // 0x3E1408BCF4745D8F
+	)
+	// special cases
+	switch {
+	case IsNaN(x):
+		return x
+	case IsInf(x, 0):
+		return 0
+	case x == 0:
+		return 1
+	}
+
+	x = Abs(x)
+	if x >= 2 {
+		s, c := Sincos(x)
+		ss := s - c
+		cc := s + c
+
+		// make sure x+x does not overflow
+		if x < MaxFloat64/2 {
+			z := -Cos(x + x)
+			if s*c < 0 {
+				cc = z / ss
+			} else {
+				ss = z / cc
+			}
+		}
+
+		// j0(x) = 1/sqrt(pi) * (P(0,x)*cc - Q(0,x)*ss) / sqrt(x)
+		// y0(x) = 1/sqrt(pi) * (P(0,x)*ss + Q(0,x)*cc) / sqrt(x)
+
+		var z float64
+		if x > Two129 { // |x| > ~6.8056e+38
+			z = (1 / SqrtPi) * cc / Sqrt(x)
+		} else {
+			u := pzero(x)
+			v := qzero(x)
+			z = (1 / SqrtPi) * (u*cc - v*ss) / Sqrt(x)
+		}
+		return z // |x| >= 2.0
+	}
+	if x < TwoM13 { // |x| < ~1.2207e-4
+		if x < TwoM27 {
+			return 1 // |x| < ~7.4506e-9
+		}
+		return 1 - 0.25*x*x // ~7.4506e-9 < |x| < ~1.2207e-4
+	}
+	z := x * x
+	r := z * (R02 + z*(R03+z*(R04+z*R05)))
+	s := 1 + z*(S01+z*(S02+z*(S03+z*S04)))
+	if x < 1 {
+		return 1 + z*(-0.25+(r/s)) // |x| < 1.00
+	}
+	u := 0.5 * x
+	return (1+u)*(1-u) + z*(r/s) // 1.0 < |x| < 2.0
+}
+
+// Y0 returns the order-zero Bessel function of the second kind.
+//
+// Special cases are:
+//
+//	Y0(+Inf) = 0
+//	Y0(0) = -Inf
+//	Y0(x < 0) = NaN
+//	Y0(NaN) = NaN
+func Y0(x float64) float64 {
+	const (
+		TwoM27 = 1.0 / (1 << 27)             // 2**-27 0x3e40000000000000
+		Two129 = 1 << 129                    // 2**129 0x4800000000000000
+		U00    = -7.38042951086872317523e-02 // 0xBFB2E4D699CBD01F
+		U01    = 1.76666452509181115538e-01  // 0x3FC69D019DE9E3FC
+		U02    = -1.38185671945596898896e-02 // 0xBF8C4CE8B16CFA97
+		U03    = 3.47453432093683650238e-04  // 0x3F36C54D20B29B6B
+		U04    = -3.81407053724364161125e-06 // 0xBECFFEA773D25CAD
+		U05    = 1.95590137035022920206e-08  // 0x3E5500573B4EABD4
+		U06    = -3.98205194132103398453e-11 // 0xBDC5E43D693FB3C8
+		V01    = 1.27304834834123699328e-02  // 0x3F8A127091C9C71A
+		V02    = 7.60068627350353253702e-05  // 0x3F13ECBBF578C6C1
+		V03    = 2.59150851840457805467e-07  // 0x3E91642D7FF202FD
+		V04    = 4.41110311332675467403e-10  // 0x3DFE50183BD6D9EF
+	)
+	// special cases
+	switch {
+	case x < 0 || IsNaN(x):
+		return NaN()
+	case IsInf(x, 1):
+		return 0
+	case x == 0:
+		return Inf(-1)
+	}
+
+	if x >= 2 { // |x| >= 2.0
+
+		// y0(x) = sqrt(2/(pi*x))*(p0(x)*sin(x0)+q0(x)*cos(x0))
+		//     where x0 = x-pi/4
+		// Better formula:
+		//     cos(x0) = cos(x)cos(pi/4)+sin(x)sin(pi/4)
+		//             =  1/sqrt(2) * (sin(x) + cos(x))
+		//     sin(x0) = sin(x)cos(3pi/4)-cos(x)sin(3pi/4)
+		//             =  1/sqrt(2) * (sin(x) - cos(x))
+		// To avoid cancellation, use
+		//     sin(x) +- cos(x) = -cos(2x)/(sin(x) -+ cos(x))
+		// to compute the worse one.
+
+		s, c := Sincos(x)
+		ss := s - c
+		cc := s + c
+
+		// j0(x) = 1/sqrt(pi) * (P(0,x)*cc - Q(0,x)*ss) / sqrt(x)
+		// y0(x) = 1/sqrt(pi) * (P(0,x)*ss + Q(0,x)*cc) / sqrt(x)
+
+		// make sure x+x does not overflow
+		if x < MaxFloat64/2 {
+			z := -Cos(x + x)
+			if s*c < 0 {
+				cc = z / ss
+			} else {
+				ss = z / cc
+			}
+		}
+		var z float64
+		if x > Two129 { // |x| > ~6.8056e+38
+			z = (1 / SqrtPi) * ss / Sqrt(x)
+		} else {
+			u := pzero(x)
+			v := qzero(x)
+			z = (1 / SqrtPi) * (u*ss + v*cc) / Sqrt(x)
+		}
+		return z // |x| >= 2.0
+	}
+	if x <= TwoM27 {
+		return U00 + (2/Pi)*Log(x) // |x| < ~7.4506e-9
+	}
+	z := x * x
+	u := U00 + z*(U01+z*(U02+z*(U03+z*(U04+z*(U05+z*U06)))))
+	v := 1 + z*(V01+z*(V02+z*(V03+z*V04)))
+	return u/v + (2/Pi)*J0(x)*Log(x) // ~7.4506e-9 < |x| < 2.0
+}
+
+// The asymptotic expansions of pzero is
+//      1 - 9/128 s**2 + 11025/98304 s**4 - ..., where s = 1/x.
+// For x >= 2, We approximate pzero by
+// 	pzero(x) = 1 + (R/S)
+// where  R = pR0 + pR1*s**2 + pR2*s**4 + ... + pR5*s**10
+// 	  S = 1 + pS0*s**2 + ... + pS4*s**10
+// and
+//      | pzero(x)-1-R/S | <= 2  ** ( -60.26)
+
+// for x in [inf, 8]=1/[0,0.125]
+var p0R8 = [6]float64{
+	0.00000000000000000000e+00,  // 0x0000000000000000
+	-7.03124999999900357484e-02, // 0xBFB1FFFFFFFFFD32
+	-8.08167041275349795626e+00, // 0xC02029D0B44FA779
+	-2.57063105679704847262e+02, // 0xC07011027B19E863
+	-2.48521641009428822144e+03, // 0xC0A36A6ECD4DCAFC
+	-5.25304380490729545272e+03, // 0xC0B4850B36CC643D
+}
+var p0S8 = [5]float64{
+	1.16534364619668181717e+02, // 0x405D223307A96751
+	3.83374475364121826715e+03, // 0x40ADF37D50596938
+	4.05978572648472545552e+04, // 0x40E3D2BB6EB6B05F
+	1.16752972564375915681e+05, // 0x40FC810F8F9FA9BD
+	4.76277284146730962675e+04, // 0x40E741774F2C49DC
+}
+
+// for x in [8,4.5454]=1/[0.125,0.22001]
+var p0R5 = [6]float64{
+	-1.14125464691894502584e-11, // 0xBDA918B147E495CC
+	-7.03124940873599280078e-02, // 0xBFB1FFFFE69AFBC6
+	-4.15961064470587782438e+00, // 0xC010A370F90C6BBF
+	-6.76747652265167261021e+01, // 0xC050EB2F5A7D1783
+	-3.31231299649172967747e+02, // 0xC074B3B36742CC63
+	-3.46433388365604912451e+02, // 0xC075A6EF28A38BD7
+}
+var p0S5 = [5]float64{
+	6.07539382692300335975e+01, // 0x404E60810C98C5DE
+	1.05125230595704579173e+03, // 0x40906D025C7E2864
+	5.97897094333855784498e+03, // 0x40B75AF88FBE1D60
+	9.62544514357774460223e+03, // 0x40C2CCB8FA76FA38
+	2.40605815922939109441e+03, // 0x40A2CC1DC70BE864
+}
+
+// for x in [4.547,2.8571]=1/[0.2199,0.35001]
+var p0R3 = [6]float64{
+	-2.54704601771951915620e-09, // 0xBE25E1036FE1AA86
+	-7.03119616381481654654e-02, // 0xBFB1FFF6F7C0E24B
+	-2.40903221549529611423e+00, // 0xC00345B2AEA48074
+	-2.19659774734883086467e+01, // 0xC035F74A4CB94E14
+	-5.80791704701737572236e+01, // 0xC04D0A22420A1A45
+	-3.14479470594888503854e+01, // 0xC03F72ACA892D80F
+}
+var p0S3 = [5]float64{
+	3.58560338055209726349e+01, // 0x4041ED9284077DD3
+	3.61513983050303863820e+02, // 0x40769839464A7C0E
+	1.19360783792111533330e+03, // 0x4092A66E6D1061D6
+	1.12799679856907414432e+03, // 0x40919FFCB8C39B7E
+	1.73580930813335754692e+02, // 0x4065B296FC379081
+}
+
+// for x in [2.8570,2]=1/[0.3499,0.5]
+var p0R2 = [6]float64{
+	-8.87534333032526411254e-08, // 0xBE77D316E927026D
+	-7.03030995483624743247e-02, // 0xBFB1FF62495E1E42
+	-1.45073846780952986357e+00, // 0xBFF736398A24A843
+	-7.63569613823527770791e+00, // 0xC01E8AF3EDAFA7F3
+	-1.11931668860356747786e+01, // 0xC02662E6C5246303
+	-3.23364579351335335033e+00, // 0xC009DE81AF8FE70F
+}
+var p0S2 = [5]float64{
+	2.22202997532088808441e+01, // 0x40363865908B5959
+	1.36206794218215208048e+02, // 0x4061069E0EE8878F
+	2.70470278658083486789e+02, // 0x4070E78642EA079B
+	1.53875394208320329881e+02, // 0x40633C033AB6FAFF
+	1.46576176948256193810e+01, // 0x402D50B344391809
+}
+
+func pzero(x float64) float64 {
+	var p *[6]float64
+	var q *[5]float64
+	if x >= 8 {
+		p = &p0R8
+		q = &p0S8
+	} else if x >= 4.5454 {
+		p = &p0R5
+		q = &p0S5
+	} else if x >= 2.8571 {
+		p = &p0R3
+		q = &p0S3
+	} else if x >= 2 {
+		p = &p0R2
+		q = &p0S2
+	}
+	z := 1 / (x * x)
+	r := p[0] + z*(p[1]+z*(p[2]+z*(p[3]+z*(p[4]+z*p[5]))))
+	s := 1 + z*(q[0]+z*(q[1]+z*(q[2]+z*(q[3]+z*q[4]))))
+	return 1 + r/s
+}
+
+// For x >= 8, the asymptotic expansions of qzero is
+//      -1/8 s + 75/1024 s**3 - ..., where s = 1/x.
+// We approximate pzero by
+//      qzero(x) = s*(-1.25 + (R/S))
+// where R = qR0 + qR1*s**2 + qR2*s**4 + ... + qR5*s**10
+//       S = 1 + qS0*s**2 + ... + qS5*s**12
+// and
+//      | qzero(x)/s +1.25-R/S | <= 2**(-61.22)
+
+// for x in [inf, 8]=1/[0,0.125]
+var q0R8 = [6]float64{
+	0.00000000000000000000e+00, // 0x0000000000000000
+	7.32421874999935051953e-02, // 0x3FB2BFFFFFFFFE2C
+	1.17682064682252693899e+01, // 0x402789525BB334D6
+	5.57673380256401856059e+02, // 0x40816D6315301825
+	8.85919720756468632317e+03, // 0x40C14D993E18F46D
+	3.70146267776887834771e+04, // 0x40E212D40E901566
+}
+var q0S8 = [6]float64{
+	1.63776026895689824414e+02,  // 0x406478D5365B39BC
+	8.09834494656449805916e+03,  // 0x40BFA2584E6B0563
+	1.42538291419120476348e+05,  // 0x4101665254D38C3F
+	8.03309257119514397345e+05,  // 0x412883DA83A52B43
+	8.40501579819060512818e+05,  // 0x4129A66B28DE0B3D
+	-3.43899293537866615225e+05, // 0xC114FD6D2C9530C5
+}
+
+// for x in [8,4.5454]=1/[0.125,0.22001]
+var q0R5 = [6]float64{
+	1.84085963594515531381e-11, // 0x3DB43D8F29CC8CD9
+	7.32421766612684765896e-02, // 0x3FB2BFFFD172B04C
+	5.83563508962056953777e+00, // 0x401757B0B9953DD3
+	1.35111577286449829671e+02, // 0x4060E3920A8788E9
+	1.02724376596164097464e+03, // 0x40900CF99DC8C481
+	1.98997785864605384631e+03, // 0x409F17E953C6E3A6
+}
+var q0S5 = [6]float64{
+	8.27766102236537761883e+01,  // 0x4054B1B3FB5E1543
+	2.07781416421392987104e+03,  // 0x40A03BA0DA21C0CE
+	1.88472887785718085070e+04,  // 0x40D267D27B591E6D
+	5.67511122894947329769e+04,  // 0x40EBB5E397E02372
+	3.59767538425114471465e+04,  // 0x40E191181F7A54A0
+	-5.35434275601944773371e+03, // 0xC0B4EA57BEDBC609
+}
+
+// for x in [4.547,2.8571]=1/[0.2199,0.35001]
+var q0R3 = [6]float64{
+	4.37741014089738620906e-09, // 0x3E32CD036ADECB82
+	7.32411180042911447163e-02, // 0x3FB2BFEE0E8D0842
+	3.34423137516170720929e+00, // 0x400AC0FC61149CF5
+	4.26218440745412650017e+01, // 0x40454F98962DAEDD
+	1.70808091340565596283e+02, // 0x406559DBE25EFD1F
+	1.66733948696651168575e+02, // 0x4064D77C81FA21E0
+}
+var q0S3 = [6]float64{
+	4.87588729724587182091e+01,  // 0x40486122BFE343A6
+	7.09689221056606015736e+02,  // 0x40862D8386544EB3
+	3.70414822620111362994e+03,  // 0x40ACF04BE44DFC63
+	6.46042516752568917582e+03,  // 0x40B93C6CD7C76A28
+	2.51633368920368957333e+03,  // 0x40A3A8AAD94FB1C0
+	-1.49247451836156386662e+02, // 0xC062A7EB201CF40F
+}
+
+// for x in [2.8570,2]=1/[0.3499,0.5]
+var q0R2 = [6]float64{
+	1.50444444886983272379e-07, // 0x3E84313B54F76BDB
+	7.32234265963079278272e-02, // 0x3FB2BEC53E883E34
+	1.99819174093815998816e+00, // 0x3FFFF897E727779C
+	1.44956029347885735348e+01, // 0x402CFDBFAAF96FE5
+	3.16662317504781540833e+01, // 0x403FAA8E29FBDC4A
+	1.62527075710929267416e+01, // 0x403040B171814BB4
+}
+var q0S2 = [6]float64{
+	3.03655848355219184498e+01,  // 0x403E5D96F7C07AED
+	2.69348118608049844624e+02,  // 0x4070D591E4D14B40
+	8.44783757595320139444e+02,  // 0x408A664522B3BF22
+	8.82935845112488550512e+02,  // 0x408B977C9C5CC214
+	2.12666388511798828631e+02,  // 0x406A95530E001365
+	-5.31095493882666946917e+00, // 0xC0153E6AF8B32931
+}
+
+func qzero(x float64) float64 {
+	var p, q *[6]float64
+	if x >= 8 {
+		p = &q0R8
+		q = &q0S8
+	} else if x >= 4.5454 {
+		p = &q0R5
+		q = &q0S5
+	} else if x >= 2.8571 {
+		p = &q0R3
+		q = &q0S3
+	} else if x >= 2 {
+		p = &q0R2
+		q = &q0S2
+	}
+	z := 1 / (x * x)
+	r := p[0] + z*(p[1]+z*(p[2]+z*(p[3]+z*(p[4]+z*p[5]))))
+	s := 1 + z*(q[0]+z*(q[1]+z*(q[2]+z*(q[3]+z*(q[4]+z*q[5])))))
+	return (-0.125 + r/s) / x
+}
diff --git a/src/math/j1.go b/src/math/j1.go
new file mode 100644
index 0000000..cc19e75
--- /dev/null
+++ b/src/math/j1.go
@@ -0,0 +1,424 @@
+// Copyright 2010 The Go Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+package math
+
+/*
+	Bessel function of the first and second kinds of order one.
+*/
+
+// The original C code and the long comment below are
+// from FreeBSD's /usr/src/lib/msun/src/e_j1.c and
+// came with this notice. The go code is a simplified
+// version of the original C.
+//
+// ====================================================
+// Copyright (C) 1993 by Sun Microsystems, Inc. All rights reserved.
+//
+// Developed at SunPro, a Sun Microsystems, Inc. business.
+// Permission to use, copy, modify, and distribute this
+// software is freely granted, provided that this notice
+// is preserved.
+// ====================================================
+//
+// __ieee754_j1(x), __ieee754_y1(x)
+// Bessel function of the first and second kinds of order one.
+// Method -- j1(x):
+//      1. For tiny x, we use j1(x) = x/2 - x**3/16 + x**5/384 - ...
+//      2. Reduce x to |x| since j1(x)=-j1(-x),  and
+//         for x in (0,2)
+//              j1(x) = x/2 + x*z*R0/S0,  where z = x*x;
+//         (precision:  |j1/x - 1/2 - R0/S0 |<2**-61.51 )
+//         for x in (2,inf)
+//              j1(x) = sqrt(2/(pi*x))*(p1(x)*cos(x1)-q1(x)*sin(x1))
+//              y1(x) = sqrt(2/(pi*x))*(p1(x)*sin(x1)+q1(x)*cos(x1))
+//         where x1 = x-3*pi/4. It is better to compute sin(x1),cos(x1)
+//         as follow:
+//              cos(x1) =  cos(x)cos(3pi/4)+sin(x)sin(3pi/4)
+//                      =  1/sqrt(2) * (sin(x) - cos(x))
+//              sin(x1) =  sin(x)cos(3pi/4)-cos(x)sin(3pi/4)
+//                      = -1/sqrt(2) * (sin(x) + cos(x))
+//         (To avoid cancellation, use
+//              sin(x) +- cos(x) = -cos(2x)/(sin(x) -+ cos(x))
+//         to compute the worse one.)
+//
+//      3 Special cases
+//              j1(nan)= nan
+//              j1(0) = 0
+//              j1(inf) = 0
+//
+// Method -- y1(x):
+//      1. screen out x<=0 cases: y1(0)=-inf, y1(x<0)=NaN
+//      2. For x<2.
+//         Since
+//              y1(x) = 2/pi*(j1(x)*(ln(x/2)+Euler)-1/x-x/2+5/64*x**3-...)
+//         therefore y1(x)-2/pi*j1(x)*ln(x)-1/x is an odd function.
+//         We use the following function to approximate y1,
+//              y1(x) = x*U(z)/V(z) + (2/pi)*(j1(x)*ln(x)-1/x), z= x**2
+//         where for x in [0,2] (abs err less than 2**-65.89)
+//              U(z) = U0[0] + U0[1]*z + ... + U0[4]*z**4
+//              V(z) = 1  + v0[0]*z + ... + v0[4]*z**5
+//         Note: For tiny x, 1/x dominate y1 and hence
+//              y1(tiny) = -2/pi/tiny, (choose tiny<2**-54)
+//      3. For x>=2.
+//               y1(x) = sqrt(2/(pi*x))*(p1(x)*sin(x1)+q1(x)*cos(x1))
+//         where x1 = x-3*pi/4. It is better to compute sin(x1),cos(x1)
+//         by method mentioned above.
+
+// J1 returns the order-one Bessel function of the first kind.
+//
+// Special cases are:
+//
+//	J1(±Inf) = 0
+//	J1(NaN) = NaN
+func J1(x float64) float64 {
+	const (
+		TwoM27 = 1.0 / (1 << 27) // 2**-27 0x3e40000000000000
+		Two129 = 1 << 129        // 2**129 0x4800000000000000
+		// R0/S0 on [0, 2]
+		R00 = -6.25000000000000000000e-02 // 0xBFB0000000000000
+		R01 = 1.40705666955189706048e-03  // 0x3F570D9F98472C61
+		R02 = -1.59955631084035597520e-05 // 0xBEF0C5C6BA169668
+		R03 = 4.96727999609584448412e-08  // 0x3E6AAAFA46CA0BD9
+		S01 = 1.91537599538363460805e-02  // 0x3F939D0B12637E53
+		S02 = 1.85946785588630915560e-04  // 0x3F285F56B9CDF664
+		S03 = 1.17718464042623683263e-06  // 0x3EB3BFF8333F8498
+		S04 = 5.04636257076217042715e-09  // 0x3E35AC88C97DFF2C
+		S05 = 1.23542274426137913908e-11  // 0x3DAB2ACFCFB97ED8
+	)
+	// special cases
+	switch {
+	case IsNaN(x):
+		return x
+	case IsInf(x, 0) || x == 0:
+		return 0
+	}
+
+	sign := false
+	if x < 0 {
+		x = -x
+		sign = true
+	}
+	if x >= 2 {
+		s, c := Sincos(x)
+		ss := -s - c
+		cc := s - c
+
+		// make sure x+x does not overflow
+		if x < MaxFloat64/2 {
+			z := Cos(x + x)
+			if s*c > 0 {
+				cc = z / ss
+			} else {
+				ss = z / cc
+			}
+		}
+
+		// j1(x) = 1/sqrt(pi) * (P(1,x)*cc - Q(1,x)*ss) / sqrt(x)
+		// y1(x) = 1/sqrt(pi) * (P(1,x)*ss + Q(1,x)*cc) / sqrt(x)
+
+		var z float64
+		if x > Two129 {
+			z = (1 / SqrtPi) * cc / Sqrt(x)
+		} else {
+			u := pone(x)
+			v := qone(x)
+			z = (1 / SqrtPi) * (u*cc - v*ss) / Sqrt(x)
+		}
+		if sign {
+			return -z
+		}
+		return z
+	}
+	if x < TwoM27 { // |x|<2**-27
+		return 0.5 * x // inexact if x!=0 necessary
+	}
+	z := x * x
+	r := z * (R00 + z*(R01+z*(R02+z*R03)))
+	s := 1.0 + z*(S01+z*(S02+z*(S03+z*(S04+z*S05))))
+	r *= x
+	z = 0.5*x + r/s
+	if sign {
+		return -z
+	}
+	return z
+}
+
+// Y1 returns the order-one Bessel function of the second kind.
+//
+// Special cases are:
+//
+//	Y1(+Inf) = 0
+//	Y1(0) = -Inf
+//	Y1(x < 0) = NaN
+//	Y1(NaN) = NaN
+func Y1(x float64) float64 {
+	const (
+		TwoM54 = 1.0 / (1 << 54)             // 2**-54 0x3c90000000000000
+		Two129 = 1 << 129                    // 2**129 0x4800000000000000
+		U00    = -1.96057090646238940668e-01 // 0xBFC91866143CBC8A
+		U01    = 5.04438716639811282616e-02  // 0x3FA9D3C776292CD1
+		U02    = -1.91256895875763547298e-03 // 0xBF5F55E54844F50F
+		U03    = 2.35252600561610495928e-05  // 0x3EF8AB038FA6B88E
+		U04    = -9.19099158039878874504e-08 // 0xBE78AC00569105B8
+		V00    = 1.99167318236649903973e-02  // 0x3F94650D3F4DA9F0
+		V01    = 2.02552581025135171496e-04  // 0x3F2A8C896C257764
+		V02    = 1.35608801097516229404e-06  // 0x3EB6C05A894E8CA6
+		V03    = 6.22741452364621501295e-09  // 0x3E3ABF1D5BA69A86
+		V04    = 1.66559246207992079114e-11  // 0x3DB25039DACA772A
+	)
+	// special cases
+	switch {
+	case x < 0 || IsNaN(x):
+		return NaN()
+	case IsInf(x, 1):
+		return 0
+	case x == 0:
+		return Inf(-1)
+	}
+
+	if x >= 2 {
+		s, c := Sincos(x)
+		ss := -s - c
+		cc := s - c
+
+		// make sure x+x does not overflow
+		if x < MaxFloat64/2 {
+			z := Cos(x + x)
+			if s*c > 0 {
+				cc = z / ss
+			} else {
+				ss = z / cc
+			}
+		}
+		// y1(x) = sqrt(2/(pi*x))*(p1(x)*sin(x0)+q1(x)*cos(x0))
+		// where x0 = x-3pi/4
+		//     Better formula:
+		//         cos(x0) = cos(x)cos(3pi/4)+sin(x)sin(3pi/4)
+		//                 =  1/sqrt(2) * (sin(x) - cos(x))
+		//         sin(x0) = sin(x)cos(3pi/4)-cos(x)sin(3pi/4)
+		//                 = -1/sqrt(2) * (cos(x) + sin(x))
+		// To avoid cancellation, use
+		//     sin(x) +- cos(x) = -cos(2x)/(sin(x) -+ cos(x))
+		// to compute the worse one.
+
+		var z float64
+		if x > Two129 {
+			z = (1 / SqrtPi) * ss / Sqrt(x)
+		} else {
+			u := pone(x)
+			v := qone(x)
+			z = (1 / SqrtPi) * (u*ss + v*cc) / Sqrt(x)
+		}
+		return z
+	}
+	if x <= TwoM54 { // x < 2**-54
+		return -(2 / Pi) / x
+	}
+	z := x * x
+	u := U00 + z*(U01+z*(U02+z*(U03+z*U04)))
+	v := 1 + z*(V00+z*(V01+z*(V02+z*(V03+z*V04))))
+	return x*(u/v) + (2/Pi)*(J1(x)*Log(x)-1/x)
+}
+
+// For x >= 8, the asymptotic expansions of pone is
+//      1 + 15/128 s**2 - 4725/2**15 s**4 - ..., where s = 1/x.
+// We approximate pone by
+//      pone(x) = 1 + (R/S)
+// where R = pr0 + pr1*s**2 + pr2*s**4 + ... + pr5*s**10
+//       S = 1 + ps0*s**2 + ... + ps4*s**10
+// and
+//      | pone(x)-1-R/S | <= 2**(-60.06)
+
+// for x in [inf, 8]=1/[0,0.125]
+var p1R8 = [6]float64{
+	0.00000000000000000000e+00, // 0x0000000000000000
+	1.17187499999988647970e-01, // 0x3FBDFFFFFFFFFCCE
+	1.32394806593073575129e+01, // 0x402A7A9D357F7FCE
+	4.12051854307378562225e+02, // 0x4079C0D4652EA590
+	3.87474538913960532227e+03, // 0x40AE457DA3A532CC
+	7.91447954031891731574e+03, // 0x40BEEA7AC32782DD
+}
+var p1S8 = [5]float64{
+	1.14207370375678408436e+02, // 0x405C8D458E656CAC
+	3.65093083420853463394e+03, // 0x40AC85DC964D274F
+	3.69562060269033463555e+04, // 0x40E20B8697C5BB7F
+	9.76027935934950801311e+04, // 0x40F7D42CB28F17BB
+	3.08042720627888811578e+04, // 0x40DE1511697A0B2D
+}
+
+// for x in [8,4.5454] = 1/[0.125,0.22001]
+var p1R5 = [6]float64{
+	1.31990519556243522749e-11, // 0x3DAD0667DAE1CA7D
+	1.17187493190614097638e-01, // 0x3FBDFFFFE2C10043
+	6.80275127868432871736e+00, // 0x401B36046E6315E3
+	1.08308182990189109773e+02, // 0x405B13B9452602ED
+	5.17636139533199752805e+02, // 0x40802D16D052D649
+	5.28715201363337541807e+02, // 0x408085B8BB7E0CB7
+}
+var p1S5 = [5]float64{
+	5.92805987221131331921e+01, // 0x404DA3EAA8AF633D
+	9.91401418733614377743e+02, // 0x408EFB361B066701
+	5.35326695291487976647e+03, // 0x40B4E9445706B6FB
+	7.84469031749551231769e+03, // 0x40BEA4B0B8A5BB15
+	1.50404688810361062679e+03, // 0x40978030036F5E51
+}
+
+// for x in[4.5453,2.8571] = 1/[0.2199,0.35001]
+var p1R3 = [6]float64{
+	3.02503916137373618024e-09, // 0x3E29FC21A7AD9EDD
+	1.17186865567253592491e-01, // 0x3FBDFFF55B21D17B
+	3.93297750033315640650e+00, // 0x400F76BCE85EAD8A
+	3.51194035591636932736e+01, // 0x40418F489DA6D129
+	9.10550110750781271918e+01, // 0x4056C3854D2C1837
+	4.85590685197364919645e+01, // 0x4048478F8EA83EE5
+}
+var p1S3 = [5]float64{
+	3.47913095001251519989e+01, // 0x40416549A134069C
+	3.36762458747825746741e+02, // 0x40750C3307F1A75F
+	1.04687139975775130551e+03, // 0x40905B7C5037D523
+	8.90811346398256432622e+02, // 0x408BD67DA32E31E9
+	1.03787932439639277504e+02, // 0x4059F26D7C2EED53
+}
+
+// for x in [2.8570,2] = 1/[0.3499,0.5]
+var p1R2 = [6]float64{
+	1.07710830106873743082e-07, // 0x3E7CE9D4F65544F4
+	1.17176219462683348094e-01, // 0x3FBDFF42BE760D83
+	2.36851496667608785174e+00, // 0x4002F2B7F98FAEC0
+	1.22426109148261232917e+01, // 0x40287C377F71A964
+	1.76939711271687727390e+01, // 0x4031B1A8177F8EE2
+	5.07352312588818499250e+00, // 0x40144B49A574C1FE
+}
+var p1S2 = [5]float64{
+	2.14364859363821409488e+01, // 0x40356FBD8AD5ECDC
+	1.25290227168402751090e+02, // 0x405F529314F92CD5
+	2.32276469057162813669e+02, // 0x406D08D8D5A2DBD9
+	1.17679373287147100768e+02, // 0x405D6B7ADA1884A9
+	8.36463893371618283368e+00, // 0x4020BAB1F44E5192
+}
+
+func pone(x float64) float64 {
+	var p *[6]float64
+	var q *[5]float64
+	if x >= 8 {
+		p = &p1R8
+		q = &p1S8
+	} else if x >= 4.5454 {
+		p = &p1R5
+		q = &p1S5
+	} else if x >= 2.8571 {
+		p = &p1R3
+		q = &p1S3
+	} else if x >= 2 {
+		p = &p1R2
+		q = &p1S2
+	}
+	z := 1 / (x * x)
+	r := p[0] + z*(p[1]+z*(p[2]+z*(p[3]+z*(p[4]+z*p[5]))))
+	s := 1.0 + z*(q[0]+z*(q[1]+z*(q[2]+z*(q[3]+z*q[4]))))
+	return 1 + r/s
+}
+
+// For x >= 8, the asymptotic expansions of qone is
+//      3/8 s - 105/1024 s**3 - ..., where s = 1/x.
+// We approximate qone by
+//      qone(x) = s*(0.375 + (R/S))
+// where R = qr1*s**2 + qr2*s**4 + ... + qr5*s**10
+//       S = 1 + qs1*s**2 + ... + qs6*s**12
+// and
+//      | qone(x)/s -0.375-R/S | <= 2**(-61.13)
+
+// for x in [inf, 8] = 1/[0,0.125]
+var q1R8 = [6]float64{
+	0.00000000000000000000e+00,  // 0x0000000000000000
+	-1.02539062499992714161e-01, // 0xBFBA3FFFFFFFFDF3
+	-1.62717534544589987888e+01, // 0xC0304591A26779F7
+	-7.59601722513950107896e+02, // 0xC087BCD053E4B576
+	-1.18498066702429587167e+04, // 0xC0C724E740F87415
+	-4.84385124285750353010e+04, // 0xC0E7A6D065D09C6A
+}
+var q1S8 = [6]float64{
+	1.61395369700722909556e+02,  // 0x40642CA6DE5BCDE5
+	7.82538599923348465381e+03,  // 0x40BE9162D0D88419
+	1.33875336287249578163e+05,  // 0x4100579AB0B75E98
+	7.19657723683240939863e+05,  // 0x4125F65372869C19
+	6.66601232617776375264e+05,  // 0x412457D27719AD5C
+	-2.94490264303834643215e+05, // 0xC111F9690EA5AA18
+}
+
+// for x in [8,4.5454] = 1/[0.125,0.22001]
+var q1R5 = [6]float64{
+	-2.08979931141764104297e-11, // 0xBDB6FA431AA1A098
+	-1.02539050241375426231e-01, // 0xBFBA3FFFCB597FEF
+	-8.05644828123936029840e+00, // 0xC0201CE6CA03AD4B
+	-1.83669607474888380239e+02, // 0xC066F56D6CA7B9B0
+	-1.37319376065508163265e+03, // 0xC09574C66931734F
+	-2.61244440453215656817e+03, // 0xC0A468E388FDA79D
+}
+var q1S5 = [6]float64{
+	8.12765501384335777857e+01,  // 0x405451B2FF5A11B2
+	1.99179873460485964642e+03,  // 0x409F1F31E77BF839
+	1.74684851924908907677e+04,  // 0x40D10F1F0D64CE29
+	4.98514270910352279316e+04,  // 0x40E8576DAABAD197
+	2.79480751638918118260e+04,  // 0x40DB4B04CF7C364B
+	-4.71918354795128470869e+03, // 0xC0B26F2EFCFFA004
+}
+
+// for x in [4.5454,2.8571] = 1/[0.2199,0.35001] ???
+var q1R3 = [6]float64{
+	-5.07831226461766561369e-09, // 0xBE35CFA9D38FC84F
+	-1.02537829820837089745e-01, // 0xBFBA3FEB51AEED54
+	-4.61011581139473403113e+00, // 0xC01270C23302D9FF
+	-5.78472216562783643212e+01, // 0xC04CEC71C25D16DA
+	-2.28244540737631695038e+02, // 0xC06C87D34718D55F
+	-2.19210128478909325622e+02, // 0xC06B66B95F5C1BF6
+}
+var q1S3 = [6]float64{
+	4.76651550323729509273e+01,  // 0x4047D523CCD367E4
+	6.73865112676699709482e+02,  // 0x40850EEBC031EE3E
+	3.38015286679526343505e+03,  // 0x40AA684E448E7C9A
+	5.54772909720722782367e+03,  // 0x40B5ABBAA61D54A6
+	1.90311919338810798763e+03,  // 0x409DBC7A0DD4DF4B
+	-1.35201191444307340817e+02, // 0xC060E670290A311F
+}
+
+// for x in [2.8570,2] = 1/[0.3499,0.5]
+var q1R2 = [6]float64{
+	-1.78381727510958865572e-07, // 0xBE87F12644C626D2
+	-1.02517042607985553460e-01, // 0xBFBA3E8E9148B010
+	-2.75220568278187460720e+00, // 0xC006048469BB4EDA
+	-1.96636162643703720221e+01, // 0xC033A9E2C168907F
+	-4.23253133372830490089e+01, // 0xC04529A3DE104AAA
+	-2.13719211703704061733e+01, // 0xC0355F3639CF6E52
+}
+var q1S2 = [6]float64{
+	2.95333629060523854548e+01,  // 0x403D888A78AE64FF
+	2.52981549982190529136e+02,  // 0x406F9F68DB821CBA
+	7.57502834868645436472e+02,  // 0x4087AC05CE49A0F7
+	7.39393205320467245656e+02,  // 0x40871B2548D4C029
+	1.55949003336666123687e+02,  // 0x40637E5E3C3ED8D4
+	-4.95949898822628210127e+00, // 0xC013D686E71BE86B
+}
+
+func qone(x float64) float64 {
+	var p, q *[6]float64
+	if x >= 8 {
+		p = &q1R8
+		q = &q1S8
+	} else if x >= 4.5454 {
+		p = &q1R5
+		q = &q1S5
+	} else if x >= 2.8571 {
+		p = &q1R3
+		q = &q1S3
+	} else if x >= 2 {
+		p = &q1R2
+		q = &q1S2
+	}
+	z := 1 / (x * x)
+	r := p[0] + z*(p[1]+z*(p[2]+z*(p[3]+z*(p[4]+z*p[5]))))
+	s := 1 + z*(q[0]+z*(q[1]+z*(q[2]+z*(q[3]+z*(q[4]+z*q[5])))))
+	return (0.375 + r/s) / x
+}
diff --git a/src/math/jn.go b/src/math/jn.go
new file mode 100644
index 0000000..3491692
--- /dev/null
+++ b/src/math/jn.go
@@ -0,0 +1,306 @@
+// Copyright 2010 The Go Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+package math
+
+/*
+	Bessel function of the first and second kinds of order n.
+*/
+
+// The original C code and the long comment below are
+// from FreeBSD's /usr/src/lib/msun/src/e_jn.c and
+// came with this notice. The go code is a simplified
+// version of the original C.
+//
+// ====================================================
+// Copyright (C) 1993 by Sun Microsystems, Inc. All rights reserved.
+//
+// Developed at SunPro, a Sun Microsystems, Inc. business.
+// Permission to use, copy, modify, and distribute this
+// software is freely granted, provided that this notice
+// is preserved.
+// ====================================================
+//
+// __ieee754_jn(n, x), __ieee754_yn(n, x)
+// floating point Bessel's function of the 1st and 2nd kind
+// of order n
+//
+// Special cases:
+//      y0(0)=y1(0)=yn(n,0) = -inf with division by zero signal;
+//      y0(-ve)=y1(-ve)=yn(n,-ve) are NaN with invalid signal.
+// Note 2. About jn(n,x), yn(n,x)
+//      For n=0, j0(x) is called,
+//      for n=1, j1(x) is called,
+//      for n<x, forward recursion is used starting
+//      from values of j0(x) and j1(x).
+//      for n>x, a continued fraction approximation to
+//      j(n,x)/j(n-1,x) is evaluated and then backward
+//      recursion is used starting from a supposed value
+//      for j(n,x). The resulting value of j(0,x) is
+//      compared with the actual value to correct the
+//      supposed value of j(n,x).
+//
+//      yn(n,x) is similar in all respects, except
+//      that forward recursion is used for all
+//      values of n>1.
+
+// Jn returns the order-n Bessel function of the first kind.
+//
+// Special cases are:
+//
+//	Jn(n, ±Inf) = 0
+//	Jn(n, NaN) = NaN
+func Jn(n int, x float64) float64 {
+	const (
+		TwoM29 = 1.0 / (1 << 29) // 2**-29 0x3e10000000000000
+		Two302 = 1 << 302        // 2**302 0x52D0000000000000
+	)
+	// special cases
+	switch {
+	case IsNaN(x):
+		return x
+	case IsInf(x, 0):
+		return 0
+	}
+	// J(-n, x) = (-1)**n * J(n, x), J(n, -x) = (-1)**n * J(n, x)
+	// Thus, J(-n, x) = J(n, -x)
+
+	if n == 0 {
+		return J0(x)
+	}
+	if x == 0 {
+		return 0
+	}
+	if n < 0 {
+		n, x = -n, -x
+	}
+	if n == 1 {
+		return J1(x)
+	}
+	sign := false
+	if x < 0 {
+		x = -x
+		if n&1 == 1 {
+			sign = true // odd n and negative x
+		}
+	}
+	var b float64
+	if float64(n) <= x {
+		// Safe to use J(n+1,x)=2n/x *J(n,x)-J(n-1,x)
+		if x >= Two302 { // x > 2**302
+
+			// (x >> n**2)
+			//          Jn(x) = cos(x-(2n+1)*pi/4)*sqrt(2/x*pi)
+			//          Yn(x) = sin(x-(2n+1)*pi/4)*sqrt(2/x*pi)
+			//          Let s=sin(x), c=cos(x),
+			//              xn=x-(2n+1)*pi/4, sqt2 = sqrt(2),then
+			//
+			//                 n    sin(xn)*sqt2    cos(xn)*sqt2
+			//              ----------------------------------
+			//                 0     s-c             c+s
+			//                 1    -s-c            -c+s
+			//                 2    -s+c            -c-s
+			//                 3     s+c             c-s
+
+			var temp float64
+			switch s, c := Sincos(x); n & 3 {
+			case 0:
+				temp = c + s
+			case 1:
+				temp = -c + s
+			case 2:
+				temp = -c - s
+			case 3:
+				temp = c - s
+			}
+			b = (1 / SqrtPi) * temp / Sqrt(x)
+		} else {
+			b = J1(x)
+			for i, a := 1, J0(x); i < n; i++ {
+				a, b = b, b*(float64(i+i)/x)-a // avoid underflow
+			}
+		}
+	} else {
+		if x < TwoM29 { // x < 2**-29
+			// x is tiny, return the first Taylor expansion of J(n,x)
+			// J(n,x) = 1/n!*(x/2)**n  - ...
+
+			if n > 33 { // underflow
+				b = 0
+			} else {
+				temp := x * 0.5
+				b = temp
+				a := 1.0
+				for i := 2; i <= n; i++ {
+					a *= float64(i) // a = n!
+					b *= temp       // b = (x/2)**n
+				}
+				b /= a
+			}
+		} else {
+			// use backward recurrence
+			//                      x      x**2      x**2
+			//  J(n,x)/J(n-1,x) =  ----   ------   ------   .....
+			//                      2n  - 2(n+1) - 2(n+2)
+			//
+			//                      1      1        1
+			//  (for large x)   =  ----  ------   ------   .....
+			//                      2n   2(n+1)   2(n+2)
+			//                      -- - ------ - ------ -
+			//                       x     x         x
+			//
+			// Let w = 2n/x and h=2/x, then the above quotient
+			// is equal to the continued fraction:
+			//                  1
+			//      = -----------------------
+			//                     1
+			//         w - -----------------
+			//                        1
+			//              w+h - ---------
+			//                     w+2h - ...
+			//
+			// To determine how many terms needed, let
+			// Q(0) = w, Q(1) = w(w+h) - 1,
+			// Q(k) = (w+k*h)*Q(k-1) - Q(k-2),
+			// When Q(k) > 1e4	good for single
+			// When Q(k) > 1e9	good for double
+			// When Q(k) > 1e17	good for quadruple
+
+			// determine k
+			w := float64(n+n) / x
+			h := 2 / x
+			q0 := w
+			z := w + h
+			q1 := w*z - 1
+			k := 1
+			for q1 < 1e9 {
+				k++
+				z += h
+				q0, q1 = q1, z*q1-q0
+			}
+			m := n + n
+			t := 0.0
+			for i := 2 * (n + k); i >= m; i -= 2 {
+				t = 1 / (float64(i)/x - t)
+			}
+			a := t
+			b = 1
+			//  estimate log((2/x)**n*n!) = n*log(2/x)+n*ln(n)
+			//  Hence, if n*(log(2n/x)) > ...
+			//  single 8.8722839355e+01
+			//  double 7.09782712893383973096e+02
+			//  long double 1.1356523406294143949491931077970765006170e+04
+			//  then recurrent value may overflow and the result is
+			//  likely underflow to zero
+
+			tmp := float64(n)
+			v := 2 / x
+			tmp = tmp * Log(Abs(v*tmp))
+			if tmp < 7.09782712893383973096e+02 {
+				for i := n - 1; i > 0; i-- {
+					di := float64(i + i)
+					a, b = b, b*di/x-a
+				}
+			} else {
+				for i := n - 1; i > 0; i-- {
+					di := float64(i + i)
+					a, b = b, b*di/x-a
+					// scale b to avoid spurious overflow
+					if b > 1e100 {
+						a /= b
+						t /= b
+						b = 1
+					}
+				}
+			}
+			b = t * J0(x) / b
+		}
+	}
+	if sign {
+		return -b
+	}
+	return b
+}
+
+// Yn returns the order-n Bessel function of the second kind.
+//
+// Special cases are:
+//
+//	Yn(n, +Inf) = 0
+//	Yn(n ≥ 0, 0) = -Inf
+//	Yn(n < 0, 0) = +Inf if n is odd, -Inf if n is even
+//	Yn(n, x < 0) = NaN
+//	Yn(n, NaN) = NaN
+func Yn(n int, x float64) float64 {
+	const Two302 = 1 << 302 // 2**302 0x52D0000000000000
+	// special cases
+	switch {
+	case x < 0 || IsNaN(x):
+		return NaN()
+	case IsInf(x, 1):
+		return 0
+	}
+
+	if n == 0 {
+		return Y0(x)
+	}
+	if x == 0 {
+		if n < 0 && n&1 == 1 {
+			return Inf(1)
+		}
+		return Inf(-1)
+	}
+	sign := false
+	if n < 0 {
+		n = -n
+		if n&1 == 1 {
+			sign = true // sign true if n < 0 && |n| odd
+		}
+	}
+	if n == 1 {
+		if sign {
+			return -Y1(x)
+		}
+		return Y1(x)
+	}
+	var b float64
+	if x >= Two302 { // x > 2**302
+		// (x >> n**2)
+		//	    Jn(x) = cos(x-(2n+1)*pi/4)*sqrt(2/x*pi)
+		//	    Yn(x) = sin(x-(2n+1)*pi/4)*sqrt(2/x*pi)
+		//	    Let s=sin(x), c=cos(x),
+		//		xn=x-(2n+1)*pi/4, sqt2 = sqrt(2),then
+		//
+		//		   n	sin(xn)*sqt2	cos(xn)*sqt2
+		//		----------------------------------
+		//		   0	 s-c		 c+s
+		//		   1	-s-c 		-c+s
+		//		   2	-s+c		-c-s
+		//		   3	 s+c		 c-s
+
+		var temp float64
+		switch s, c := Sincos(x); n & 3 {
+		case 0:
+			temp = s - c
+		case 1:
+			temp = -s - c
+		case 2:
+			temp = -s + c
+		case 3:
+			temp = s + c
+		}
+		b = (1 / SqrtPi) * temp / Sqrt(x)
+	} else {
+		a := Y0(x)
+		b = Y1(x)
+		// quit if b is -inf
+		for i := 1; i < n && !IsInf(b, -1); i++ {
+			a, b = b, (float64(i+i)/x)*b-a
+		}
+	}
+	if sign {
+		return -b
+	}
+	return b
+}
diff --git a/src/math/ldexp.go b/src/math/ldexp.go
new file mode 100644
index 0000000..df365c0
--- /dev/null
+++ b/src/math/ldexp.go
@@ -0,0 +1,51 @@
+// Copyright 2009 The Go Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+package math
+
+// Ldexp is the inverse of Frexp.
+// It returns frac × 2**exp.
+//
+// Special cases are:
+//
+//	Ldexp(±0, exp) = ±0
+//	Ldexp(±Inf, exp) = ±Inf
+//	Ldexp(NaN, exp) = NaN
+func Ldexp(frac float64, exp int) float64 {
+	if haveArchLdexp {
+		return archLdexp(frac, exp)
+	}
+	return ldexp(frac, exp)
+}
+
+func ldexp(frac float64, exp int) float64 {
+	// special cases
+	switch {
+	case frac == 0:
+		return frac // correctly return -0
+	case IsInf(frac, 0) || IsNaN(frac):
+		return frac
+	}
+	frac, e := normalize(frac)
+	exp += e
+	x := Float64bits(frac)
+	exp += int(x>>shift)&mask - bias
+	if exp < -1075 {
+		return Copysign(0, frac) // underflow
+	}
+	if exp > 1023 { // overflow
+		if frac < 0 {
+			return Inf(-1)
+		}
+		return Inf(1)
+	}
+	var m float64 = 1
+	if exp < -1022 { // denormal
+		exp += 53
+		m = 1.0 / (1 << 53) // 2**-53
+	}
+	x &^= mask << shift
+	x |= uint64(exp+bias) << shift
+	return m * Float64frombits(x)
+}
diff --git a/src/math/lgamma.go b/src/math/lgamma.go
new file mode 100644
index 0000000..4058ad6
--- /dev/null
+++ b/src/math/lgamma.go
@@ -0,0 +1,366 @@
+// Copyright 2010 The Go Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+package math
+
+/*
+	Floating-point logarithm of the Gamma function.
+*/
+
+// The original C code and the long comment below are
+// from FreeBSD's /usr/src/lib/msun/src/e_lgamma_r.c and
+// came with this notice. The go code is a simplified
+// version of the original C.
+//
+// ====================================================
+// Copyright (C) 1993 by Sun Microsystems, Inc. All rights reserved.
+//
+// Developed at SunPro, a Sun Microsystems, Inc. business.
+// Permission to use, copy, modify, and distribute this
+// software is freely granted, provided that this notice
+// is preserved.
+// ====================================================
+//
+// __ieee754_lgamma_r(x, signgamp)
+// Reentrant version of the logarithm of the Gamma function
+// with user provided pointer for the sign of Gamma(x).
+//
+// Method:
+//   1. Argument Reduction for 0 < x <= 8
+//      Since gamma(1+s)=s*gamma(s), for x in [0,8], we may
+//      reduce x to a number in [1.5,2.5] by
+//              lgamma(1+s) = log(s) + lgamma(s)
+//      for example,
+//              lgamma(7.3) = log(6.3) + lgamma(6.3)
+//                          = log(6.3*5.3) + lgamma(5.3)
+//                          = log(6.3*5.3*4.3*3.3*2.3) + lgamma(2.3)
+//   2. Polynomial approximation of lgamma around its
+//      minimum (ymin=1.461632144968362245) to maintain monotonicity.
+//      On [ymin-0.23, ymin+0.27] (i.e., [1.23164,1.73163]), use
+//              Let z = x-ymin;
+//              lgamma(x) = -1.214862905358496078218 + z**2*poly(z)
+//              poly(z) is a 14 degree polynomial.
+//   2. Rational approximation in the primary interval [2,3]
+//      We use the following approximation:
+//              s = x-2.0;
+//              lgamma(x) = 0.5*s + s*P(s)/Q(s)
+//      with accuracy
+//              |P/Q - (lgamma(x)-0.5s)| < 2**-61.71
+//      Our algorithms are based on the following observation
+//
+//                             zeta(2)-1    2    zeta(3)-1    3
+// lgamma(2+s) = s*(1-Euler) + --------- * s  -  --------- * s  + ...
+//                                 2                 3
+//
+//      where Euler = 0.5772156649... is the Euler constant, which
+//      is very close to 0.5.
+//
+//   3. For x>=8, we have
+//      lgamma(x)~(x-0.5)log(x)-x+0.5*log(2pi)+1/(12x)-1/(360x**3)+....
+//      (better formula:
+//         lgamma(x)~(x-0.5)*(log(x)-1)-.5*(log(2pi)-1) + ...)
+//      Let z = 1/x, then we approximation
+//              f(z) = lgamma(x) - (x-0.5)(log(x)-1)
+//      by
+//                                  3       5             11
+//              w = w0 + w1*z + w2*z  + w3*z  + ... + w6*z
+//      where
+//              |w - f(z)| < 2**-58.74
+//
+//   4. For negative x, since (G is gamma function)
+//              -x*G(-x)*G(x) = pi/sin(pi*x),
+//      we have
+//              G(x) = pi/(sin(pi*x)*(-x)*G(-x))
+//      since G(-x) is positive, sign(G(x)) = sign(sin(pi*x)) for x<0
+//      Hence, for x<0, signgam = sign(sin(pi*x)) and
+//              lgamma(x) = log(|Gamma(x)|)
+//                        = log(pi/(|x*sin(pi*x)|)) - lgamma(-x);
+//      Note: one should avoid computing pi*(-x) directly in the
+//            computation of sin(pi*(-x)).
+//
+//   5. Special Cases
+//              lgamma(2+s) ~ s*(1-Euler) for tiny s
+//              lgamma(1)=lgamma(2)=0
+//              lgamma(x) ~ -log(x) for tiny x
+//              lgamma(0) = lgamma(inf) = inf
+//              lgamma(-integer) = +-inf
+//
+//
+
+var _lgamA = [...]float64{
+	7.72156649015328655494e-02, // 0x3FB3C467E37DB0C8
+	3.22467033424113591611e-01, // 0x3FD4A34CC4A60FAD
+	6.73523010531292681824e-02, // 0x3FB13E001A5562A7
+	2.05808084325167332806e-02, // 0x3F951322AC92547B
+	7.38555086081402883957e-03, // 0x3F7E404FB68FEFE8
+	2.89051383673415629091e-03, // 0x3F67ADD8CCB7926B
+	1.19270763183362067845e-03, // 0x3F538A94116F3F5D
+	5.10069792153511336608e-04, // 0x3F40B6C689B99C00
+	2.20862790713908385557e-04, // 0x3F2CF2ECED10E54D
+	1.08011567247583939954e-04, // 0x3F1C5088987DFB07
+	2.52144565451257326939e-05, // 0x3EFA7074428CFA52
+	4.48640949618915160150e-05, // 0x3F07858E90A45837
+}
+var _lgamR = [...]float64{
+	1.0,                        // placeholder
+	1.39200533467621045958e+00, // 0x3FF645A762C4AB74
+	7.21935547567138069525e-01, // 0x3FE71A1893D3DCDC
+	1.71933865632803078993e-01, // 0x3FC601EDCCFBDF27
+	1.86459191715652901344e-02, // 0x3F9317EA742ED475
+	7.77942496381893596434e-04, // 0x3F497DDACA41A95B
+	7.32668430744625636189e-06, // 0x3EDEBAF7A5B38140
+}
+var _lgamS = [...]float64{
+	-7.72156649015328655494e-02, // 0xBFB3C467E37DB0C8
+	2.14982415960608852501e-01,  // 0x3FCB848B36E20878
+	3.25778796408930981787e-01,  // 0x3FD4D98F4F139F59
+	1.46350472652464452805e-01,  // 0x3FC2BB9CBEE5F2F7
+	2.66422703033638609560e-02,  // 0x3F9B481C7E939961
+	1.84028451407337715652e-03,  // 0x3F5E26B67368F239
+	3.19475326584100867617e-05,  // 0x3F00BFECDD17E945
+}
+var _lgamT = [...]float64{
+	4.83836122723810047042e-01,  // 0x3FDEF72BC8EE38A2
+	-1.47587722994593911752e-01, // 0xBFC2E4278DC6C509
+	6.46249402391333854778e-02,  // 0x3FB08B4294D5419B
+	-3.27885410759859649565e-02, // 0xBFA0C9A8DF35B713
+	1.79706750811820387126e-02,  // 0x3F9266E7970AF9EC
+	-1.03142241298341437450e-02, // 0xBF851F9FBA91EC6A
+	6.10053870246291332635e-03,  // 0x3F78FCE0E370E344
+	-3.68452016781138256760e-03, // 0xBF6E2EFFB3E914D7
+	2.25964780900612472250e-03,  // 0x3F6282D32E15C915
+	-1.40346469989232843813e-03, // 0xBF56FE8EBF2D1AF1
+	8.81081882437654011382e-04,  // 0x3F4CDF0CEF61A8E9
+	-5.38595305356740546715e-04, // 0xBF41A6109C73E0EC
+	3.15632070903625950361e-04,  // 0x3F34AF6D6C0EBBF7
+	-3.12754168375120860518e-04, // 0xBF347F24ECC38C38
+	3.35529192635519073543e-04,  // 0x3F35FD3EE8C2D3F4
+}
+var _lgamU = [...]float64{
+	-7.72156649015328655494e-02, // 0xBFB3C467E37DB0C8
+	6.32827064025093366517e-01,  // 0x3FE4401E8B005DFF
+	1.45492250137234768737e+00,  // 0x3FF7475CD119BD6F
+	9.77717527963372745603e-01,  // 0x3FEF497644EA8450
+	2.28963728064692451092e-01,  // 0x3FCD4EAEF6010924
+	1.33810918536787660377e-02,  // 0x3F8B678BBF2BAB09
+}
+var _lgamV = [...]float64{
+	1.0,
+	2.45597793713041134822e+00, // 0x4003A5D7C2BD619C
+	2.12848976379893395361e+00, // 0x40010725A42B18F5
+	7.69285150456672783825e-01, // 0x3FE89DFBE45050AF
+	1.04222645593369134254e-01, // 0x3FBAAE55D6537C88
+	3.21709242282423911810e-03, // 0x3F6A5ABB57D0CF61
+}
+var _lgamW = [...]float64{
+	4.18938533204672725052e-01,  // 0x3FDACFE390C97D69
+	8.33333333333329678849e-02,  // 0x3FB555555555553B
+	-2.77777777728775536470e-03, // 0xBF66C16C16B02E5C
+	7.93650558643019558500e-04,  // 0x3F4A019F98CF38B6
+	-5.95187557450339963135e-04, // 0xBF4380CB8C0FE741
+	8.36339918996282139126e-04,  // 0x3F4B67BA4CDAD5D1
+	-1.63092934096575273989e-03, // 0xBF5AB89D0B9E43E4
+}
+
+// Lgamma returns the natural logarithm and sign (-1 or +1) of Gamma(x).
+//
+// Special cases are:
+//
+//	Lgamma(+Inf) = +Inf
+//	Lgamma(0) = +Inf
+//	Lgamma(-integer) = +Inf
+//	Lgamma(-Inf) = -Inf
+//	Lgamma(NaN) = NaN
+func Lgamma(x float64) (lgamma float64, sign int) {
+	const (
+		Ymin  = 1.461632144968362245
+		Two52 = 1 << 52                     // 0x4330000000000000 ~4.5036e+15
+		Two53 = 1 << 53                     // 0x4340000000000000 ~9.0072e+15
+		Two58 = 1 << 58                     // 0x4390000000000000 ~2.8823e+17
+		Tiny  = 1.0 / (1 << 70)             // 0x3b90000000000000 ~8.47033e-22
+		Tc    = 1.46163214496836224576e+00  // 0x3FF762D86356BE3F
+		Tf    = -1.21486290535849611461e-01 // 0xBFBF19B9BCC38A42
+		// Tt = -(tail of Tf)
+		Tt = -3.63867699703950536541e-18 // 0xBC50C7CAA48A971F
+	)
+	// special cases
+	sign = 1
+	switch {
+	case IsNaN(x):
+		lgamma = x
+		return
+	case IsInf(x, 0):
+		lgamma = x
+		return
+	case x == 0:
+		lgamma = Inf(1)
+		return
+	}
+
+	neg := false
+	if x < 0 {
+		x = -x
+		neg = true
+	}
+
+	if x < Tiny { // if |x| < 2**-70, return -log(|x|)
+		if neg {
+			sign = -1
+		}
+		lgamma = -Log(x)
+		return
+	}
+	var nadj float64
+	if neg {
+		if x >= Two52 { // |x| >= 2**52, must be -integer
+			lgamma = Inf(1)
+			return
+		}
+		t := sinPi(x)
+		if t == 0 {
+			lgamma = Inf(1) // -integer
+			return
+		}
+		nadj = Log(Pi / Abs(t*x))
+		if t < 0 {
+			sign = -1
+		}
+	}
+
+	switch {
+	case x == 1 || x == 2: // purge off 1 and 2
+		lgamma = 0
+		return
+	case x < 2: // use lgamma(x) = lgamma(x+1) - log(x)
+		var y float64
+		var i int
+		if x <= 0.9 {
+			lgamma = -Log(x)
+			switch {
+			case x >= (Ymin - 1 + 0.27): // 0.7316 <= x <=  0.9
+				y = 1 - x
+				i = 0
+			case x >= (Ymin - 1 - 0.27): // 0.2316 <= x < 0.7316
+				y = x - (Tc - 1)
+				i = 1
+			default: // 0 < x < 0.2316
+				y = x
+				i = 2
+			}
+		} else {
+			lgamma = 0
+			switch {
+			case x >= (Ymin + 0.27): // 1.7316 <= x < 2
+				y = 2 - x
+				i = 0
+			case x >= (Ymin - 0.27): // 1.2316 <= x < 1.7316
+				y = x - Tc
+				i = 1
+			default: // 0.9 < x < 1.2316
+				y = x - 1
+				i = 2
+			}
+		}
+		switch i {
+		case 0:
+			z := y * y
+			p1 := _lgamA[0] + z*(_lgamA[2]+z*(_lgamA[4]+z*(_lgamA[6]+z*(_lgamA[8]+z*_lgamA[10]))))
+			p2 := z * (_lgamA[1] + z*(+_lgamA[3]+z*(_lgamA[5]+z*(_lgamA[7]+z*(_lgamA[9]+z*_lgamA[11])))))
+			p := y*p1 + p2
+			lgamma += (p - 0.5*y)
+		case 1:
+			z := y * y
+			w := z * y
+			p1 := _lgamT[0] + w*(_lgamT[3]+w*(_lgamT[6]+w*(_lgamT[9]+w*_lgamT[12]))) // parallel comp
+			p2 := _lgamT[1] + w*(_lgamT[4]+w*(_lgamT[7]+w*(_lgamT[10]+w*_lgamT[13])))
+			p3 := _lgamT[2] + w*(_lgamT[5]+w*(_lgamT[8]+w*(_lgamT[11]+w*_lgamT[14])))
+			p := z*p1 - (Tt - w*(p2+y*p3))
+			lgamma += (Tf + p)
+		case 2:
+			p1 := y * (_lgamU[0] + y*(_lgamU[1]+y*(_lgamU[2]+y*(_lgamU[3]+y*(_lgamU[4]+y*_lgamU[5])))))
+			p2 := 1 + y*(_lgamV[1]+y*(_lgamV[2]+y*(_lgamV[3]+y*(_lgamV[4]+y*_lgamV[5]))))
+			lgamma += (-0.5*y + p1/p2)
+		}
+	case x < 8: // 2 <= x < 8
+		i := int(x)
+		y := x - float64(i)
+		p := y * (_lgamS[0] + y*(_lgamS[1]+y*(_lgamS[2]+y*(_lgamS[3]+y*(_lgamS[4]+y*(_lgamS[5]+y*_lgamS[6]))))))
+		q := 1 + y*(_lgamR[1]+y*(_lgamR[2]+y*(_lgamR[3]+y*(_lgamR[4]+y*(_lgamR[5]+y*_lgamR[6])))))
+		lgamma = 0.5*y + p/q
+		z := 1.0 // Lgamma(1+s) = Log(s) + Lgamma(s)
+		switch i {
+		case 7:
+			z *= (y + 6)
+			fallthrough
+		case 6:
+			z *= (y + 5)
+			fallthrough
+		case 5:
+			z *= (y + 4)
+			fallthrough
+		case 4:
+			z *= (y + 3)
+			fallthrough
+		case 3:
+			z *= (y + 2)
+			lgamma += Log(z)
+		}
+	case x < Two58: // 8 <= x < 2**58
+		t := Log(x)
+		z := 1 / x
+		y := z * z
+		w := _lgamW[0] + z*(_lgamW[1]+y*(_lgamW[2]+y*(_lgamW[3]+y*(_lgamW[4]+y*(_lgamW[5]+y*_lgamW[6])))))
+		lgamma = (x-0.5)*(t-1) + w
+	default: // 2**58 <= x <= Inf
+		lgamma = x * (Log(x) - 1)
+	}
+	if neg {
+		lgamma = nadj - lgamma
+	}
+	return
+}
+
+// sinPi(x) is a helper function for negative x
+func sinPi(x float64) float64 {
+	const (
+		Two52 = 1 << 52 // 0x4330000000000000 ~4.5036e+15
+		Two53 = 1 << 53 // 0x4340000000000000 ~9.0072e+15
+	)
+	if x < 0.25 {
+		return -Sin(Pi * x)
+	}
+
+	// argument reduction
+	z := Floor(x)
+	var n int
+	if z != x { // inexact
+		x = Mod(x, 2)
+		n = int(x * 4)
+	} else {
+		if x >= Two53 { // x must be even
+			x = 0
+			n = 0
+		} else {
+			if x < Two52 {
+				z = x + Two52 // exact
+			}
+			n = int(1 & Float64bits(z))
+			x = float64(n)
+			n <<= 2
+		}
+	}
+	switch n {
+	case 0:
+		x = Sin(Pi * x)
+	case 1, 2:
+		x = Cos(Pi * (0.5 - x))
+	case 3, 4:
+		x = Sin(Pi * (1 - x))
+	case 5, 6:
+		x = -Cos(Pi * (x - 1.5))
+	default:
+		x = Sin(Pi * (x - 2))
+	}
+	return -x
+}
diff --git a/src/math/log.go b/src/math/log.go
new file mode 100644
index 0000000..695a545
--- /dev/null
+++ b/src/math/log.go
@@ -0,0 +1,129 @@
+// Copyright 2009 The Go Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+package math
+
+/*
+	Floating-point logarithm.
+*/
+
+// The original C code, the long comment, and the constants
+// below are from FreeBSD's /usr/src/lib/msun/src/e_log.c
+// and came with this notice. The go code is a simpler
+// version of the original C.
+//
+// ====================================================
+// Copyright (C) 1993 by Sun Microsystems, Inc. All rights reserved.
+//
+// Developed at SunPro, a Sun Microsystems, Inc. business.
+// Permission to use, copy, modify, and distribute this
+// software is freely granted, provided that this notice
+// is preserved.
+// ====================================================
+//
+// __ieee754_log(x)
+// Return the logarithm of x
+//
+// Method :
+//   1. Argument Reduction: find k and f such that
+//			x = 2**k * (1+f),
+//	   where  sqrt(2)/2 < 1+f < sqrt(2) .
+//
+//   2. Approximation of log(1+f).
+//	Let s = f/(2+f) ; based on log(1+f) = log(1+s) - log(1-s)
+//		 = 2s + 2/3 s**3 + 2/5 s**5 + .....,
+//	     	 = 2s + s*R
+//      We use a special Reme algorithm on [0,0.1716] to generate
+//	a polynomial of degree 14 to approximate R.  The maximum error
+//	of this polynomial approximation is bounded by 2**-58.45. In
+//	other words,
+//		        2      4      6      8      10      12      14
+//	    R(z) ~ L1*s +L2*s +L3*s +L4*s +L5*s  +L6*s  +L7*s
+//	(the values of L1 to L7 are listed in the program) and
+//	    |      2          14          |     -58.45
+//	    | L1*s +...+L7*s    -  R(z) | <= 2
+//	    |                             |
+//	Note that 2s = f - s*f = f - hfsq + s*hfsq, where hfsq = f*f/2.
+//	In order to guarantee error in log below 1ulp, we compute log by
+//		log(1+f) = f - s*(f - R)		(if f is not too large)
+//		log(1+f) = f - (hfsq - s*(hfsq+R)).	(better accuracy)
+//
+//	3. Finally,  log(x) = k*Ln2 + log(1+f).
+//			    = k*Ln2_hi+(f-(hfsq-(s*(hfsq+R)+k*Ln2_lo)))
+//	   Here Ln2 is split into two floating point number:
+//			Ln2_hi + Ln2_lo,
+//	   where n*Ln2_hi is always exact for |n| < 2000.
+//
+// Special cases:
+//	log(x) is NaN with signal if x < 0 (including -INF) ;
+//	log(+INF) is +INF; log(0) is -INF with signal;
+//	log(NaN) is that NaN with no signal.
+//
+// Accuracy:
+//	according to an error analysis, the error is always less than
+//	1 ulp (unit in the last place).
+//
+// Constants:
+// The hexadecimal values are the intended ones for the following
+// constants. The decimal values may be used, provided that the
+// compiler will convert from decimal to binary accurately enough
+// to produce the hexadecimal values shown.
+
+// Log returns the natural logarithm of x.
+//
+// Special cases are:
+//
+//	Log(+Inf) = +Inf
+//	Log(0) = -Inf
+//	Log(x < 0) = NaN
+//	Log(NaN) = NaN
+func Log(x float64) float64 {
+	if haveArchLog {
+		return archLog(x)
+	}
+	return log(x)
+}
+
+func log(x float64) float64 {
+	const (
+		Ln2Hi = 6.93147180369123816490e-01 /* 3fe62e42 fee00000 */
+		Ln2Lo = 1.90821492927058770002e-10 /* 3dea39ef 35793c76 */
+		L1    = 6.666666666666735130e-01   /* 3FE55555 55555593 */
+		L2    = 3.999999999940941908e-01   /* 3FD99999 9997FA04 */
+		L3    = 2.857142874366239149e-01   /* 3FD24924 94229359 */
+		L4    = 2.222219843214978396e-01   /* 3FCC71C5 1D8E78AF */
+		L5    = 1.818357216161805012e-01   /* 3FC74664 96CB03DE */
+		L6    = 1.531383769920937332e-01   /* 3FC39A09 D078C69F */
+		L7    = 1.479819860511658591e-01   /* 3FC2F112 DF3E5244 */
+	)
+
+	// special cases
+	switch {
+	case IsNaN(x) || IsInf(x, 1):
+		return x
+	case x < 0:
+		return NaN()
+	case x == 0:
+		return Inf(-1)
+	}
+
+	// reduce
+	f1, ki := Frexp(x)
+	if f1 < Sqrt2/2 {
+		f1 *= 2
+		ki--
+	}
+	f := f1 - 1
+	k := float64(ki)
+
+	// compute
+	s := f / (2 + f)
+	s2 := s * s
+	s4 := s2 * s2
+	t1 := s2 * (L1 + s4*(L3+s4*(L5+s4*L7)))
+	t2 := s4 * (L2 + s4*(L4+s4*L6))
+	R := t1 + t2
+	hfsq := 0.5 * f * f
+	return k*Ln2Hi - ((hfsq - (s*(hfsq+R) + k*Ln2Lo)) - f)
+}
diff --git a/src/math/log10.go b/src/math/log10.go
new file mode 100644
index 0000000..e6916a5
--- /dev/null
+++ b/src/math/log10.go
@@ -0,0 +1,37 @@
+// Copyright 2009 The Go Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+package math
+
+// Log10 returns the decimal logarithm of x.
+// The special cases are the same as for Log.
+func Log10(x float64) float64 {
+	if haveArchLog10 {
+		return archLog10(x)
+	}
+	return log10(x)
+}
+
+func log10(x float64) float64 {
+	return Log(x) * (1 / Ln10)
+}
+
+// Log2 returns the binary logarithm of x.
+// The special cases are the same as for Log.
+func Log2(x float64) float64 {
+	if haveArchLog2 {
+		return archLog2(x)
+	}
+	return log2(x)
+}
+
+func log2(x float64) float64 {
+	frac, exp := Frexp(x)
+	// Make sure exact powers of two give an exact answer.
+	// Don't depend on Log(0.5)*(1/Ln2)+exp being exactly exp-1.
+	if frac == 0.5 {
+		return float64(exp - 1)
+	}
+	return Log(frac)*(1/Ln2) + float64(exp)
+}
diff --git a/src/math/log10_s390x.s b/src/math/log10_s390x.s
new file mode 100644
index 0000000..3638afe
--- /dev/null
+++ b/src/math/log10_s390x.s
@@ -0,0 +1,156 @@
+// Copyright 2016 The Go Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+#include "textflag.h"
+
+// Minimax polynomial coefficients and other constants
+DATA log10rodataL19<>+0(SB)/8, $0.000000000000000000E+00
+DATA log10rodataL19<>+8(SB)/8, $-1.0
+DATA log10rodataL19<>+16(SB)/8, $0x7FF8000000000000   //+NanN
+DATA log10rodataL19<>+24(SB)/8, $.15375570329280596749
+DATA log10rodataL19<>+32(SB)/8, $.60171950900703668594E+04
+DATA log10rodataL19<>+40(SB)/8, $-1.9578460454940795898
+DATA log10rodataL19<>+48(SB)/8, $0.78962633073318517310E-01
+DATA log10rodataL19<>+56(SB)/8, $-.71784211884836937993E-02
+DATA log10rodataL19<>+64(SB)/8, $0.87011165920689940661E-03
+DATA log10rodataL19<>+72(SB)/8, $-.11865158981621437541E-03
+DATA log10rodataL19<>+80(SB)/8, $0.17258413403018680410E-04
+DATA log10rodataL19<>+88(SB)/8, $0.40752932047883484315E-06
+DATA log10rodataL19<>+96(SB)/8, $-.26149194688832680410E-05
+DATA log10rodataL19<>+104(SB)/8, $0.92453396963875026759E-08
+DATA log10rodataL19<>+112(SB)/8, $-.64572084905921579630E-07
+DATA log10rodataL19<>+120(SB)/8, $-5.5
+DATA log10rodataL19<>+128(SB)/8, $18446744073709551616.
+GLOBL log10rodataL19<>+0(SB), RODATA, $136
+
+// Table of log10 correction terms
+DATA log10tab2074<>+0(SB)/8, $0.254164497922885069E-01
+DATA log10tab2074<>+8(SB)/8, $0.179018857989381839E-01
+DATA log10tab2074<>+16(SB)/8, $0.118926768029048674E-01
+DATA log10tab2074<>+24(SB)/8, $0.722595568238080033E-02
+DATA log10tab2074<>+32(SB)/8, $0.376393570022739135E-02
+DATA log10tab2074<>+40(SB)/8, $0.138901135928814326E-02
+DATA log10tab2074<>+48(SB)/8, $0
+DATA log10tab2074<>+56(SB)/8, $-0.490780466387818203E-03
+DATA log10tab2074<>+64(SB)/8, $-0.159811431402137571E-03
+DATA log10tab2074<>+72(SB)/8, $0.925796337165100494E-03
+DATA log10tab2074<>+80(SB)/8, $0.270683176738357035E-02
+DATA log10tab2074<>+88(SB)/8, $0.513079030821304758E-02
+DATA log10tab2074<>+96(SB)/8, $0.815089785397996303E-02
+DATA log10tab2074<>+104(SB)/8, $0.117253060262419215E-01
+DATA log10tab2074<>+112(SB)/8, $0.158164239345343963E-01
+DATA log10tab2074<>+120(SB)/8, $0.203903595489229786E-01
+GLOBL log10tab2074<>+0(SB), RODATA, $128
+
+// Log10 returns the decimal logarithm of the argument.
+//
+// Special cases are:
+//      Log(+Inf) = +Inf
+//      Log(0) = -Inf
+//      Log(x < 0) = NaN
+//      Log(NaN) = NaN
+// The algorithm used is minimax polynomial approximation
+// with coefficients determined with a Remez exchange algorithm.
+
+TEXT ·log10Asm(SB),NOSPLIT,$8-16
+	FMOVD   x+0(FP), F0
+	MOVD    $log10rodataL19<>+0(SB), R9
+	FMOVD   F0, x-8(SP)
+	WORD    $0xC0298006     //iilf %r2,2147909631
+	BYTE    $0x7F
+	BYTE    $0xFF
+	WORD    $0x5840F008     //l %r4, 8(%r15)
+	SUBW    R4, R2, R3
+	RISBGZ	$32, $47, $0, R3, R5
+	MOVH    $0x0, R1
+	RISBGN	$0, $31, $32, R5, R1
+	WORD    $0xC0590016     //iilf %r5,1507327
+	BYTE    $0xFF
+	BYTE    $0xFF
+	MOVW    R4, R10
+	MOVW    R5, R11
+	CMPBLE  R10, R11, L2
+	WORD    $0xC0297FEF     //iilf %r2,2146435071
+	BYTE    $0xFF
+	BYTE    $0xFF
+	MOVW    R4, R10
+	MOVW    R2, R11
+	CMPBLE  R10, R11, L16
+L3:
+L1:
+	FMOVD   F0, ret+8(FP)
+	RET
+
+L2:
+	LTDBR	F0, F0
+	BLEU    L13
+	WORD    $0xED009080     //mdb %f0,.L20-.L19(%r9)
+	BYTE    $0x00
+	BYTE    $0x1C
+	FMOVD   F0, x-8(SP)
+	WORD    $0x5B20F008     //s %r2, 8(%r15)
+	RISBGZ	$57, $60, $51, R2, R3
+	ANDW    $0xFFFF0000, R2
+	RISBGN	$0, $31, $32, R2, R1
+	ADDW    $0x4000000, R2
+	BLEU    L17
+L8:
+	SRW     $8, R2, R2
+	ORW     $0x45000000, R2
+L4:
+	FMOVD   log10rodataL19<>+120(SB), F2
+	LDGR    R1, F4
+	WFMADB  V4, V0, V2, V0
+	FMOVD   log10rodataL19<>+112(SB), F4
+	FMOVD   log10rodataL19<>+104(SB), F6
+	WFMADB  V0, V6, V4, V6
+	FMOVD   log10rodataL19<>+96(SB), F4
+	FMOVD   log10rodataL19<>+88(SB), F1
+	WFMADB  V0, V1, V4, V1
+	WFMDB   V0, V0, V4
+	FMOVD   log10rodataL19<>+80(SB), F2
+	WFMADB  V6, V4, V1, V6
+	FMOVD   log10rodataL19<>+72(SB), F1
+	WFMADB  V0, V2, V1, V2
+	FMOVD   log10rodataL19<>+64(SB), F1
+	RISBGZ	$57, $60, $0, R3, R3
+	WFMADB  V4, V6, V2, V6
+	FMOVD   log10rodataL19<>+56(SB), F2
+	WFMADB  V0, V1, V2, V1
+	VLVGF   $0, R2, V2
+	WFMADB  V4, V6, V1, V4
+	LDEBR   F2, F2
+	FMOVD   log10rodataL19<>+48(SB), F6
+	WFMADB  V0, V4, V6, V4
+	FMOVD   log10rodataL19<>+40(SB), F1
+	FMOVD   log10rodataL19<>+32(SB), F6
+	MOVD    $log10tab2074<>+0(SB), R1
+	WFMADB  V2, V1, V6, V2
+	WORD    $0x68331000     //ld %f3,0(%r3,%r1)
+	WFMADB  V0, V4, V3, V0
+	FMOVD   log10rodataL19<>+24(SB), F4
+	FMADD   F4, F2, F0
+	FMOVD   F0, ret+8(FP)
+	RET
+
+L16:
+	RISBGZ	$40, $55, $56, R3, R2
+	RISBGZ	$57, $60, $51, R3, R3
+	ORW     $0x45000000, R2
+	BR      L4
+L13:
+	BGE     L18     //jnl .L18
+	BVS     L18
+	FMOVD   log10rodataL19<>+16(SB), F0
+	BR      L1
+L17:
+	SRAW    $1, R2, R2
+	SUBW    $0x40000000, R2
+	BR      L8
+L18:
+	FMOVD   log10rodataL19<>+8(SB), F0
+	WORD    $0xED009000     //ddb %f0,.L36-.L19(%r9)
+	BYTE    $0x00
+	BYTE    $0x1D
+	BR      L1
diff --git a/src/math/log1p.go b/src/math/log1p.go
new file mode 100644
index 0000000..3a7b385
--- /dev/null
+++ b/src/math/log1p.go
@@ -0,0 +1,203 @@
+// Copyright 2010 The Go Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+package math
+
+// The original C code, the long comment, and the constants
+// below are from FreeBSD's /usr/src/lib/msun/src/s_log1p.c
+// and came with this notice. The go code is a simplified
+// version of the original C.
+//
+// ====================================================
+// Copyright (C) 1993 by Sun Microsystems, Inc. All rights reserved.
+//
+// Developed at SunPro, a Sun Microsystems, Inc. business.
+// Permission to use, copy, modify, and distribute this
+// software is freely granted, provided that this notice
+// is preserved.
+// ====================================================
+//
+//
+// double log1p(double x)
+//
+// Method :
+//   1. Argument Reduction: find k and f such that
+//                      1+x = 2**k * (1+f),
+//         where  sqrt(2)/2 < 1+f < sqrt(2) .
+//
+//      Note. If k=0, then f=x is exact. However, if k!=0, then f
+//      may not be representable exactly. In that case, a correction
+//      term is need. Let u=1+x rounded. Let c = (1+x)-u, then
+//      log(1+x) - log(u) ~ c/u. Thus, we proceed to compute log(u),
+//      and add back the correction term c/u.
+//      (Note: when x > 2**53, one can simply return log(x))
+//
+//   2. Approximation of log1p(f).
+//      Let s = f/(2+f) ; based on log(1+f) = log(1+s) - log(1-s)
+//               = 2s + 2/3 s**3 + 2/5 s**5 + .....,
+//               = 2s + s*R
+//      We use a special Reme algorithm on [0,0.1716] to generate
+//      a polynomial of degree 14 to approximate R The maximum error
+//      of this polynomial approximation is bounded by 2**-58.45. In
+//      other words,
+//                      2      4      6      8      10      12      14
+//          R(z) ~ Lp1*s +Lp2*s +Lp3*s +Lp4*s +Lp5*s  +Lp6*s  +Lp7*s
+//      (the values of Lp1 to Lp7 are listed in the program)
+//      and
+//          |      2          14          |     -58.45
+//          | Lp1*s +...+Lp7*s    -  R(z) | <= 2
+//          |                             |
+//      Note that 2s = f - s*f = f - hfsq + s*hfsq, where hfsq = f*f/2.
+//      In order to guarantee error in log below 1ulp, we compute log
+//      by
+//              log1p(f) = f - (hfsq - s*(hfsq+R)).
+//
+//   3. Finally, log1p(x) = k*ln2 + log1p(f).
+//                        = k*ln2_hi+(f-(hfsq-(s*(hfsq+R)+k*ln2_lo)))
+//      Here ln2 is split into two floating point number:
+//                   ln2_hi + ln2_lo,
+//      where n*ln2_hi is always exact for |n| < 2000.
+//
+// Special cases:
+//      log1p(x) is NaN with signal if x < -1 (including -INF) ;
+//      log1p(+INF) is +INF; log1p(-1) is -INF with signal;
+//      log1p(NaN) is that NaN with no signal.
+//
+// Accuracy:
+//      according to an error analysis, the error is always less than
+//      1 ulp (unit in the last place).
+//
+// Constants:
+// The hexadecimal values are the intended ones for the following
+// constants. The decimal values may be used, provided that the
+// compiler will convert from decimal to binary accurately enough
+// to produce the hexadecimal values shown.
+//
+// Note: Assuming log() return accurate answer, the following
+//       algorithm can be used to compute log1p(x) to within a few ULP:
+//
+//              u = 1+x;
+//              if(u==1.0) return x ; else
+//                         return log(u)*(x/(u-1.0));
+//
+//       See HP-15C Advanced Functions Handbook, p.193.
+
+// Log1p returns the natural logarithm of 1 plus its argument x.
+// It is more accurate than Log(1 + x) when x is near zero.
+//
+// Special cases are:
+//
+//	Log1p(+Inf) = +Inf
+//	Log1p(±0) = ±0
+//	Log1p(-1) = -Inf
+//	Log1p(x < -1) = NaN
+//	Log1p(NaN) = NaN
+func Log1p(x float64) float64 {
+	if haveArchLog1p {
+		return archLog1p(x)
+	}
+	return log1p(x)
+}
+
+func log1p(x float64) float64 {
+	const (
+		Sqrt2M1     = 4.142135623730950488017e-01  // Sqrt(2)-1 = 0x3fda827999fcef34
+		Sqrt2HalfM1 = -2.928932188134524755992e-01 // Sqrt(2)/2-1 = 0xbfd2bec333018866
+		Small       = 1.0 / (1 << 29)              // 2**-29 = 0x3e20000000000000
+		Tiny        = 1.0 / (1 << 54)              // 2**-54
+		Two53       = 1 << 53                      // 2**53
+		Ln2Hi       = 6.93147180369123816490e-01   // 3fe62e42fee00000
+		Ln2Lo       = 1.90821492927058770002e-10   // 3dea39ef35793c76
+		Lp1         = 6.666666666666735130e-01     // 3FE5555555555593
+		Lp2         = 3.999999999940941908e-01     // 3FD999999997FA04
+		Lp3         = 2.857142874366239149e-01     // 3FD2492494229359
+		Lp4         = 2.222219843214978396e-01     // 3FCC71C51D8E78AF
+		Lp5         = 1.818357216161805012e-01     // 3FC7466496CB03DE
+		Lp6         = 1.531383769920937332e-01     // 3FC39A09D078C69F
+		Lp7         = 1.479819860511658591e-01     // 3FC2F112DF3E5244
+	)
+
+	// special cases
+	switch {
+	case x < -1 || IsNaN(x): // includes -Inf
+		return NaN()
+	case x == -1:
+		return Inf(-1)
+	case IsInf(x, 1):
+		return Inf(1)
+	}
+
+	absx := Abs(x)
+
+	var f float64
+	var iu uint64
+	k := 1
+	if absx < Sqrt2M1 { //  |x| < Sqrt(2)-1
+		if absx < Small { // |x| < 2**-29
+			if absx < Tiny { // |x| < 2**-54
+				return x
+			}
+			return x - x*x*0.5
+		}
+		if x > Sqrt2HalfM1 { // Sqrt(2)/2-1 < x
+			// (Sqrt(2)/2-1) < x < (Sqrt(2)-1)
+			k = 0
+			f = x
+			iu = 1
+		}
+	}
+	var c float64
+	if k != 0 {
+		var u float64
+		if absx < Two53 { // 1<<53
+			u = 1.0 + x
+			iu = Float64bits(u)
+			k = int((iu >> 52) - 1023)
+			// correction term
+			if k > 0 {
+				c = 1.0 - (u - x)
+			} else {
+				c = x - (u - 1.0)
+			}
+			c /= u
+		} else {
+			u = x
+			iu = Float64bits(u)
+			k = int((iu >> 52) - 1023)
+			c = 0
+		}
+		iu &= 0x000fffffffffffff
+		if iu < 0x0006a09e667f3bcd { // mantissa of Sqrt(2)
+			u = Float64frombits(iu | 0x3ff0000000000000) // normalize u
+		} else {
+			k++
+			u = Float64frombits(iu | 0x3fe0000000000000) // normalize u/2
+			iu = (0x0010000000000000 - iu) >> 2
+		}
+		f = u - 1.0 // Sqrt(2)/2 < u < Sqrt(2)
+	}
+	hfsq := 0.5 * f * f
+	var s, R, z float64
+	if iu == 0 { // |f| < 2**-20
+		if f == 0 {
+			if k == 0 {
+				return 0
+			}
+			c += float64(k) * Ln2Lo
+			return float64(k)*Ln2Hi + c
+		}
+		R = hfsq * (1.0 - 0.66666666666666666*f) // avoid division
+		if k == 0 {
+			return f - R
+		}
+		return float64(k)*Ln2Hi - ((R - (float64(k)*Ln2Lo + c)) - f)
+	}
+	s = f / (2.0 + f)
+	z = s * s
+	R = z * (Lp1 + z*(Lp2+z*(Lp3+z*(Lp4+z*(Lp5+z*(Lp6+z*Lp7))))))
+	if k == 0 {
+		return f - (hfsq - s*(hfsq+R))
+	}
+	return float64(k)*Ln2Hi - ((hfsq - (s*(hfsq+R) + (float64(k)*Ln2Lo + c))) - f)
+}
diff --git a/src/math/log1p_s390x.s b/src/math/log1p_s390x.s
new file mode 100644
index 0000000..00eb374
--- /dev/null
+++ b/src/math/log1p_s390x.s
@@ -0,0 +1,180 @@
+// Copyright 2017 The Go Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+#include "textflag.h"
+
+// Constants
+DATA ·log1pxlim<> + 0(SB)/4, $0xfff00000
+GLOBL ·log1pxlim<> + 0(SB), RODATA, $4
+DATA ·log1pxzero<> + 0(SB)/8, $0.0
+GLOBL ·log1pxzero<> + 0(SB), RODATA, $8
+DATA ·log1pxminf<> + 0(SB)/8, $0xfff0000000000000
+GLOBL ·log1pxminf<> + 0(SB), RODATA, $8
+DATA ·log1pxnan<> + 0(SB)/8, $0x7ff8000000000000
+GLOBL ·log1pxnan<> + 0(SB), RODATA, $8
+DATA ·log1pyout<> + 0(SB)/8, $0x40fce621e71da000
+GLOBL ·log1pyout<> + 0(SB), RODATA, $8
+DATA ·log1pxout<> + 0(SB)/8, $0x40f1000000000000
+GLOBL ·log1pxout<> + 0(SB), RODATA, $8
+DATA ·log1pxl2<> + 0(SB)/8, $0xbfda7aecbeba4e46
+GLOBL ·log1pxl2<> + 0(SB), RODATA, $8
+DATA ·log1pxl1<> + 0(SB)/8, $0x3ffacde700000000
+GLOBL ·log1pxl1<> + 0(SB), RODATA, $8
+DATA ·log1pxa<> + 0(SB)/8, $5.5
+GLOBL ·log1pxa<> + 0(SB), RODATA, $8
+DATA ·log1pxmone<> + 0(SB)/8, $-1.0
+GLOBL ·log1pxmone<> + 0(SB), RODATA, $8
+
+// Minimax polynomial approximations
+DATA ·log1pc8<> + 0(SB)/8, $0.212881813645679599E-07
+GLOBL ·log1pc8<> + 0(SB), RODATA, $8
+DATA ·log1pc7<> + 0(SB)/8, $-.148682720127920854E-06
+GLOBL ·log1pc7<> + 0(SB), RODATA, $8
+DATA ·log1pc6<> + 0(SB)/8, $0.938370938292558173E-06
+GLOBL ·log1pc6<> + 0(SB), RODATA, $8
+DATA ·log1pc5<> + 0(SB)/8, $-.602107458843052029E-05
+GLOBL ·log1pc5<> + 0(SB), RODATA, $8
+DATA ·log1pc4<> + 0(SB)/8, $0.397389654305194527E-04
+GLOBL ·log1pc4<> + 0(SB), RODATA, $8
+DATA ·log1pc3<> + 0(SB)/8, $-.273205381970859341E-03
+GLOBL ·log1pc3<> + 0(SB), RODATA, $8
+DATA ·log1pc2<> + 0(SB)/8, $0.200350613573012186E-02
+GLOBL ·log1pc2<> + 0(SB), RODATA, $8
+DATA ·log1pc1<> + 0(SB)/8, $-.165289256198351540E-01
+GLOBL ·log1pc1<> + 0(SB), RODATA, $8
+DATA ·log1pc0<> + 0(SB)/8, $0.181818181818181826E+00
+GLOBL ·log1pc0<> + 0(SB), RODATA, $8
+
+
+// Table of log10 correction terms
+DATA ·log1ptab<> + 0(SB)/8, $0.585235384085551248E-01
+DATA ·log1ptab<> + 8(SB)/8, $0.412206153771168640E-01
+DATA ·log1ptab<> + 16(SB)/8, $0.273839003221648339E-01
+DATA ·log1ptab<> + 24(SB)/8, $0.166383778368856480E-01
+DATA ·log1ptab<> + 32(SB)/8, $0.866678223433169637E-02
+DATA ·log1ptab<> + 40(SB)/8, $0.319831684989627514E-02
+DATA ·log1ptab<> + 48(SB)/8, $-.000000000000000000E+00
+DATA ·log1ptab<> + 56(SB)/8, $-.113006378583725549E-02
+DATA ·log1ptab<> + 64(SB)/8, $-.367979419636602491E-03
+DATA ·log1ptab<> + 72(SB)/8, $0.213172484510484979E-02
+DATA ·log1ptab<> + 80(SB)/8, $0.623271047682013536E-02
+DATA ·log1ptab<> + 88(SB)/8, $0.118140812789696885E-01
+DATA ·log1ptab<> + 96(SB)/8, $0.187681358930914206E-01
+DATA ·log1ptab<> + 104(SB)/8, $0.269985148668178992E-01
+DATA ·log1ptab<> + 112(SB)/8, $0.364186619761331328E-01
+DATA ·log1ptab<> + 120(SB)/8, $0.469505379381388441E-01
+GLOBL ·log1ptab<> + 0(SB), RODATA, $128
+
+// Log1p returns the natural logarithm of 1 plus its argument x.
+// It is more accurate than Log(1 + x) when x is near zero.
+//
+// Special cases are:
+//      Log1p(+Inf) = +Inf
+//      Log1p(±0) = ±0
+//      Log1p(-1) = -Inf
+//      Log1p(x < -1) = NaN
+//      Log1p(NaN) = NaN
+// The algorithm used is minimax polynomial approximation
+// with coefficients determined with a Remez exchange algorithm.
+
+TEXT	·log1pAsm(SB), NOSPLIT, $0-16
+	FMOVD	x+0(FP), F0
+	MOVD	$·log1pxmone<>+0(SB), R1
+	MOVD	·log1pxout<>+0(SB), R2
+	FMOVD	0(R1), F3
+	MOVD	$·log1pxa<>+0(SB), R1
+	MOVWZ	·log1pxlim<>+0(SB), R0
+	FMOVD	0(R1), F1
+	MOVD	$·log1pc8<>+0(SB), R1
+	FMOVD	0(R1), F5
+	MOVD	$·log1pc7<>+0(SB), R1
+	VLEG	$0, 0(R1), V20
+	MOVD	$·log1pc6<>+0(SB), R1
+	WFSDB	V0, V3, V4
+	VLEG	$0, 0(R1), V18
+	MOVD	$·log1pc5<>+0(SB), R1
+	VLEG	$0, 0(R1), V16
+	MOVD	R2, R5
+	LGDR	F4, R3
+	WORD	$0xC0190006	//iilf	%r1,425983
+	BYTE	$0x7F
+	BYTE	$0xFF
+	SRAD	$32, R3, R3
+	SUBW	R3, R1
+	SRW	$16, R1, R1
+	BYTE	$0x18	//lr	%r4,%r1
+	BYTE	$0x41
+	RISBGN	$0, $15, $48, R4, R2
+	RISBGN	$16, $31, $32, R4, R5
+	MOVW	R0, R6
+	MOVW	R3, R7
+	CMPBGT	R6, R7, L8
+	WFCEDBS	V4, V4, V6
+	MOVD	$·log1pxzero<>+0(SB), R1
+	FMOVD	0(R1), F2
+	BVS	LEXITTAGlog1p
+	WORD	$0xB3130044	// lcdbr %f4,%f4
+	WFCEDBS	V2, V4, V6
+	BEQ	L9
+	WFCHDBS	V4, V2, V2
+	BEQ	LEXITTAGlog1p
+	MOVD	$·log1pxnan<>+0(SB), R1
+	FMOVD	0(R1), F0
+	FMOVD	F0, ret+8(FP)
+	RET
+
+L8:
+	LDGR	R2, F2
+	FSUB	F4, F3
+	FMADD	F2, F4, F1
+	MOVD	$·log1pc4<>+0(SB), R2
+	WORD	$0xB3130041	// lcdbr %f4,%f1
+	FMOVD	0(R2), F7
+	FSUB	F3, F0
+	MOVD	$·log1pc3<>+0(SB), R2
+	FMOVD	0(R2), F3
+	MOVD	$·log1pc2<>+0(SB), R2
+	WFMDB	V1, V1, V6
+	FMADD	F7, F4, F3
+	WFMSDB	V0, V2, V1, V0
+	FMOVD	0(R2), F7
+	WFMADB	V4, V5, V20, V5
+	MOVD	$·log1pc1<>+0(SB), R2
+	FMOVD	0(R2), F2
+	FMADD	F7, F4, F2
+	WFMADB	V4, V18, V16, V4
+	FMADD	F3, F6, F2
+	WFMADB	V5, V6, V4, V5
+	FMUL	F6, F6
+	MOVD	$·log1pc0<>+0(SB), R2
+	WFMADB	V6, V5, V2, V6
+	FMOVD	0(R2), F4
+	WFMADB	V0, V6, V4, V6
+	RISBGZ	$57, $60, $3, R1, R1
+	MOVD	$·log1ptab<>+0(SB), R2
+	MOVD	$·log1pxl1<>+0(SB), R3
+	WORD	$0x68112000	//ld	%f1,0(%r1,%r2)
+	FMOVD	0(R3), F2
+	WFMADB	V0, V6, V1, V0
+	MOVD	$·log1pyout<>+0(SB), R1
+	LDGR	R5, F6
+	FMOVD	0(R1), F4
+	WFMSDB	V2, V6, V4, V2
+	MOVD	$·log1pxl2<>+0(SB), R1
+	FMOVD	0(R1), F4
+	FMADD	F4, F2, F0
+	FMOVD	F0, ret+8(FP)
+	RET
+
+L9:
+	MOVD	$·log1pxminf<>+0(SB), R1
+	FMOVD	0(R1), F0
+	FMOVD	F0, ret+8(FP)
+	RET
+
+
+LEXITTAGlog1p:
+	FMOVD	F0, ret+8(FP)
+	RET
+
diff --git a/src/math/log_amd64.s b/src/math/log_amd64.s
new file mode 100644
index 0000000..508df68
--- /dev/null
+++ b/src/math/log_amd64.s
@@ -0,0 +1,112 @@
+// Copyright 2010 The Go Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+#include "textflag.h"
+
+#define HSqrt2 7.07106781186547524401e-01 // sqrt(2)/2
+#define Ln2Hi  6.93147180369123816490e-01 // 0x3fe62e42fee00000
+#define Ln2Lo  1.90821492927058770002e-10 // 0x3dea39ef35793c76
+#define L1     6.666666666666735130e-01   // 0x3FE5555555555593
+#define L2     3.999999999940941908e-01   // 0x3FD999999997FA04
+#define L3     2.857142874366239149e-01   // 0x3FD2492494229359
+#define L4     2.222219843214978396e-01   // 0x3FCC71C51D8E78AF
+#define L5     1.818357216161805012e-01   // 0x3FC7466496CB03DE
+#define L6     1.531383769920937332e-01   // 0x3FC39A09D078C69F
+#define L7     1.479819860511658591e-01   // 0x3FC2F112DF3E5244
+#define NaN    0x7FF8000000000001
+#define NegInf 0xFFF0000000000000
+#define PosInf 0x7FF0000000000000
+
+// func Log(x float64) float64
+TEXT ·archLog(SB),NOSPLIT,$0
+	// test bits for special cases
+	MOVQ    x+0(FP), BX
+	MOVQ    $~(1<<63), AX // sign bit mask
+	ANDQ    BX, AX
+	JEQ     isZero
+	MOVQ    $0, AX
+	CMPQ    AX, BX
+	JGT     isNegative
+	MOVQ    $PosInf, AX
+	CMPQ    AX, BX
+	JLE     isInfOrNaN
+	// f1, ki := math.Frexp(x); k := float64(ki)
+	MOVQ    BX, X0
+	MOVQ    $0x000FFFFFFFFFFFFF, AX
+	MOVQ    AX, X2
+	ANDPD   X0, X2
+	MOVSD   $0.5, X0 // 0x3FE0000000000000
+	ORPD    X0, X2 // X2= f1
+	SHRQ    $52, BX
+	ANDL    $0x7FF, BX
+	SUBL    $0x3FE, BX
+	XORPS   X1, X1 // break dependency for CVTSL2SD
+	CVTSL2SD BX, X1 // x1= k, x2= f1
+	// if f1 < math.Sqrt2/2 { k -= 1; f1 *= 2 }
+	MOVSD   $HSqrt2, X0 // x0= 0.7071, x1= k, x2= f1
+	CMPSD   X2, X0, 5 // cmpnlt; x0= 0 or ^0, x1= k, x2 = f1
+	MOVSD   $1.0, X3 // x0= 0 or ^0, x1= k, x2 = f1, x3= 1
+	ANDPD   X0, X3 // x0= 0 or ^0, x1= k, x2 = f1, x3= 0 or 1
+	SUBSD   X3, X1 // x0= 0 or ^0, x1= k, x2 = f1, x3= 0 or 1
+	MOVSD   $1.0, X0 // x0= 1, x1= k, x2= f1, x3= 0 or 1
+	ADDSD   X0, X3 // x0= 1, x1= k, x2= f1, x3= 1 or 2
+	MULSD   X3, X2 // x0= 1, x1= k, x2= f1
+	// f := f1 - 1
+	SUBSD   X0, X2 // x1= k, x2= f
+	// s := f / (2 + f)
+	MOVSD   $2.0, X0
+	ADDSD   X2, X0
+	MOVAPD  X2, X3
+	DIVSD   X0, X3 // x1=k, x2= f, x3= s
+	// s2 := s * s
+	MOVAPD  X3, X4 // x1= k, x2= f, x3= s
+	MULSD   X4, X4 // x1= k, x2= f, x3= s, x4= s2
+	// s4 := s2 * s2
+	MOVAPD  X4, X5 // x1= k, x2= f, x3= s, x4= s2
+	MULSD   X5, X5 // x1= k, x2= f, x3= s, x4= s2, x5= s4
+	// t1 := s2 * (L1 + s4*(L3+s4*(L5+s4*L7)))
+	MOVSD   $L7, X6
+	MULSD   X5, X6
+	ADDSD   $L5, X6
+	MULSD   X5, X6
+	ADDSD   $L3, X6
+	MULSD   X5, X6
+	ADDSD   $L1, X6
+	MULSD   X6, X4 // x1= k, x2= f, x3= s, x4= t1, x5= s4
+	// t2 := s4 * (L2 + s4*(L4+s4*L6))
+	MOVSD   $L6, X6
+	MULSD   X5, X6
+	ADDSD   $L4, X6
+	MULSD   X5, X6
+	ADDSD   $L2, X6
+	MULSD   X6, X5 // x1= k, x2= f, x3= s, x4= t1, x5= t2
+	// R := t1 + t2
+	ADDSD   X5, X4 // x1= k, x2= f, x3= s, x4= R
+	// hfsq := 0.5 * f * f
+	MOVSD   $0.5, X0
+	MULSD   X2, X0
+	MULSD   X2, X0 // x0= hfsq, x1= k, x2= f, x3= s, x4= R
+	// return k*Ln2Hi - ((hfsq - (s*(hfsq+R) + k*Ln2Lo)) - f)
+	ADDSD   X0, X4 // x0= hfsq, x1= k, x2= f, x3= s, x4= hfsq+R
+	MULSD   X4, X3 // x0= hfsq, x1= k, x2= f, x3= s*(hfsq+R)
+	MOVSD   $Ln2Lo, X4
+	MULSD   X1, X4 // x4= k*Ln2Lo
+	ADDSD   X4, X3 // x0= hfsq, x1= k, x2= f, x3= s*(hfsq+R)+k*Ln2Lo
+	SUBSD   X3, X0 // x0= hfsq-(s*(hfsq+R)+k*Ln2Lo), x1= k, x2= f
+	SUBSD   X2, X0 // x0= (hfsq-(s*(hfsq+R)+k*Ln2Lo))-f, x1= k
+	MULSD   $Ln2Hi, X1 // x0= (hfsq-(s*(hfsq+R)+k*Ln2Lo))-f, x1= k*Ln2Hi
+	SUBSD   X0, X1 // x1= k*Ln2Hi-((hfsq-(s*(hfsq+R)+k*Ln2Lo))-f)
+	MOVSD   X1, ret+8(FP)
+	RET
+isInfOrNaN:
+	MOVQ    BX, ret+8(FP) // +Inf or NaN, return x
+	RET
+isNegative:
+	MOVQ    $NaN, AX
+	MOVQ    AX, ret+8(FP) // return NaN
+	RET
+isZero:
+	MOVQ    $NegInf, AX
+	MOVQ    AX, ret+8(FP) // return -Inf
+	RET
diff --git a/src/math/log_asm.go b/src/math/log_asm.go
new file mode 100644
index 0000000..848cce1
--- /dev/null
+++ b/src/math/log_asm.go
@@ -0,0 +1,11 @@
+// Copyright 2021 The Go Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+//go:build amd64 || s390x
+
+package math
+
+const haveArchLog = true
+
+func archLog(x float64) float64
diff --git a/src/math/log_s390x.s b/src/math/log_s390x.s
new file mode 100644
index 0000000..4b514f3
--- /dev/null
+++ b/src/math/log_s390x.s
@@ -0,0 +1,168 @@
+// Copyright 2017 The Go Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+#include "textflag.h"
+
+// Minimax polynomial approximations
+DATA ·logrodataL21<> + 0(SB)/8, $-.499999999999999778E+00
+DATA ·logrodataL21<> + 8(SB)/8, $0.333333333333343751E+00
+DATA ·logrodataL21<> + 16(SB)/8, $-.250000000001606881E+00
+DATA ·logrodataL21<> + 24(SB)/8, $0.199999999971603032E+00
+DATA ·logrodataL21<> + 32(SB)/8, $-.166666663114122038E+00
+DATA ·logrodataL21<> + 40(SB)/8, $-.125002923782692399E+00
+DATA ·logrodataL21<> + 48(SB)/8, $0.111142014580396256E+00
+DATA ·logrodataL21<> + 56(SB)/8, $0.759438932618934220E-01
+DATA ·logrodataL21<> + 64(SB)/8, $0.142857144267212549E+00
+DATA ·logrodataL21<> + 72(SB)/8, $-.993038938793590759E-01
+DATA ·logrodataL21<> + 80(SB)/8, $-1.0
+GLOBL ·logrodataL21<> + 0(SB), RODATA, $88
+
+// Constants
+DATA ·logxminf<> + 0(SB)/8, $0xfff0000000000000
+GLOBL ·logxminf<> + 0(SB), RODATA, $8
+DATA ·logxnan<> + 0(SB)/8, $0x7ff8000000000000
+GLOBL ·logxnan<> + 0(SB), RODATA, $8
+DATA ·logx43f<> + 0(SB)/8, $0x43f0000000000000
+GLOBL ·logx43f<> + 0(SB), RODATA, $8
+DATA ·logxl2<> + 0(SB)/8, $0x3fda7aecbeba4e46
+GLOBL ·logxl2<> + 0(SB), RODATA, $8
+DATA ·logxl1<> + 0(SB)/8, $0x3ffacde700000000
+GLOBL ·logxl1<> + 0(SB), RODATA, $8
+
+/* Input transform scale and add constants */
+DATA ·logxm<> + 0(SB)/8, $0x3fc77604e63c84b1
+DATA ·logxm<> + 8(SB)/8, $0x40fb39456ab53250
+DATA ·logxm<> + 16(SB)/8, $0x3fc9ee358b945f3f
+DATA ·logxm<> + 24(SB)/8, $0x40fb39418bf3b137
+DATA ·logxm<> + 32(SB)/8, $0x3fccfb2e1304f4b6
+DATA ·logxm<> + 40(SB)/8, $0x40fb393d3eda3022
+DATA ·logxm<> + 48(SB)/8, $0x3fd0000000000000
+DATA ·logxm<> + 56(SB)/8, $0x40fb393969e70000
+DATA ·logxm<> + 64(SB)/8, $0x3fd11117aafbfe04
+DATA ·logxm<> + 72(SB)/8, $0x40fb3936eaefafcf
+DATA ·logxm<> + 80(SB)/8, $0x3fd2492af5e658b2
+DATA ·logxm<> + 88(SB)/8, $0x40fb39343ff01715
+DATA ·logxm<> + 96(SB)/8, $0x3fd3b50c622a43dd
+DATA ·logxm<> + 104(SB)/8, $0x40fb39315adae2f3
+DATA ·logxm<> + 112(SB)/8, $0x3fd56bbeea918777
+DATA ·logxm<> + 120(SB)/8, $0x40fb392e21698552
+GLOBL ·logxm<> + 0(SB), RODATA, $128
+
+// Log returns the natural logarithm of the argument.
+//
+// Special cases are:
+//      Log(+Inf) = +Inf
+//      Log(0) = -Inf
+//      Log(x < 0) = NaN
+//      Log(NaN) = NaN
+// The algorithm used is minimax polynomial approximation using a table of
+// polynomial coefficients determined with a Remez exchange algorithm.
+
+TEXT	·logAsm(SB), NOSPLIT, $0-16
+	FMOVD	x+0(FP), F0
+	MOVD	$·logrodataL21<>+0(SB), R9
+	MOVH	$0x8006, R4
+	LGDR	F0, R1
+	MOVD	$0x3FF0000000000000, R6
+	SRAD	$48, R1, R1
+	MOVD	$0x40F03E8000000000, R8
+	SUBW	R1, R4
+	RISBGZ	$32, $59, $0, R4, R2
+	RISBGN	$0, $15, $48, R2, R6
+	RISBGN	$16, $31, $32, R2, R8
+	MOVW	R1, R7
+	CMPBGT	R7, $22, L17
+	LTDBR	F0, F0
+	MOVD	$·logx43f<>+0(SB), R1
+	FMOVD	0(R1), F2
+	BLEU	L3
+	MOVH	$0x8005, R12
+	MOVH	$0x8405, R0
+	BR	L15
+L7:
+	LTDBR	F0, F0
+	BLEU	L3
+L15:
+	FMUL	F2, F0
+	LGDR	F0, R1
+	SRAD	$48, R1, R1
+	SUBW	R1, R0, R2
+	SUBW	R1, R12, R3
+	BYTE	$0x18	//lr	%r4,%r2
+	BYTE	$0x42
+	ANDW	$0xFFFFFFF0, R3
+	ANDW	$0xFFFFFFF0, R2
+	BYTE	$0x18	//lr	%r5,%r1
+	BYTE	$0x51
+	MOVW	R1, R7
+	CMPBLE	R7, $22, L7
+	RISBGN	$0, $15, $48, R3, R6
+	RISBGN	$16, $31, $32, R2, R8
+L2:
+	MOVH	R5, R5
+	MOVH	$0x7FEF, R1
+	CMPW	R5, R1
+	BGT	L1
+	LDGR	R6, F2
+	FMUL	F2, F0
+	RISBGZ	$57, $59, $3, R4, R4
+	FMOVD	80(R9), F2
+	MOVD	$·logxm<>+0(SB), R7
+	ADD	R7, R4
+	FMOVD	72(R9), F4
+	WORD	$0xED004000	//madb	%f2,%f0,0(%r4)
+	BYTE	$0x20
+	BYTE	$0x1E
+	FMOVD	64(R9), F1
+	FMOVD	F2, F0
+	FMOVD	56(R9), F2
+	WFMADB	V0, V2, V4, V2
+	WFMDB	V0, V0, V6
+	FMOVD	48(R9), F4
+	WFMADB	V0, V2, V4, V2
+	FMOVD	40(R9), F4
+	WFMADB	V2, V6, V1, V2
+	FMOVD	32(R9), F1
+	WFMADB	V6, V4, V1, V4
+	FMOVD	24(R9), F1
+	WFMADB	V6, V2, V1, V2
+	FMOVD	16(R9), F1
+	WFMADB	V6, V4, V1, V4
+	MOVD	$·logxl1<>+0(SB), R1
+	FMOVD	8(R9), F1
+	WFMADB	V6, V2, V1, V2
+	FMOVD	0(R9), F1
+	WFMADB	V6, V4, V1, V4
+	FMOVD	8(R4), F1
+	WFMADB	V0, V2, V4, V2
+	LDGR	R8, F4
+	WFMADB	V6, V2, V0, V2
+	WORD	$0xED401000	//msdb	%f1,%f4,0(%r1)
+	BYTE	$0x10
+	BYTE	$0x1F
+	MOVD	·logxl2<>+0(SB), R1
+	WORD	$0xB3130001	//lcdbr	%f0,%f1
+	LDGR	R1, F4
+	WFMADB	V0, V4, V2, V0
+L1:
+	FMOVD	F0, ret+8(FP)
+	RET
+L3:
+	LTDBR	F0, F0
+	BEQ	L20
+	BGE	L1
+	BVS	L1
+
+	MOVD	$·logxnan<>+0(SB), R1
+	FMOVD	0(R1), F0
+	BR	L1
+L20:
+	MOVD	$·logxminf<>+0(SB), R1
+	FMOVD	0(R1), F0
+	FMOVD	F0, ret+8(FP)
+	RET
+L17:
+	BYTE	$0x18	//lr	%r5,%r1
+	BYTE	$0x51
+	BR	L2
diff --git a/src/math/log_stub.go b/src/math/log_stub.go
new file mode 100644
index 0000000..d35992b
--- /dev/null
+++ b/src/math/log_stub.go
@@ -0,0 +1,13 @@
+// Copyright 2021 The Go Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+//go:build !amd64 && !s390x
+
+package math
+
+const haveArchLog = false
+
+func archLog(x float64) float64 {
+	panic("not implemented")
+}
diff --git a/src/math/logb.go b/src/math/logb.go
new file mode 100644
index 0000000..1a46464
--- /dev/null
+++ b/src/math/logb.go
@@ -0,0 +1,52 @@
+// Copyright 2010 The Go Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+package math
+
+// Logb returns the binary exponent of x.
+//
+// Special cases are:
+//
+//	Logb(±Inf) = +Inf
+//	Logb(0) = -Inf
+//	Logb(NaN) = NaN
+func Logb(x float64) float64 {
+	// special cases
+	switch {
+	case x == 0:
+		return Inf(-1)
+	case IsInf(x, 0):
+		return Inf(1)
+	case IsNaN(x):
+		return x
+	}
+	return float64(ilogb(x))
+}
+
+// Ilogb returns the binary exponent of x as an integer.
+//
+// Special cases are:
+//
+//	Ilogb(±Inf) = MaxInt32
+//	Ilogb(0) = MinInt32
+//	Ilogb(NaN) = MaxInt32
+func Ilogb(x float64) int {
+	// special cases
+	switch {
+	case x == 0:
+		return MinInt32
+	case IsNaN(x):
+		return MaxInt32
+	case IsInf(x, 0):
+		return MaxInt32
+	}
+	return ilogb(x)
+}
+
+// ilogb returns the binary exponent of x. It assumes x is finite and
+// non-zero.
+func ilogb(x float64) int {
+	x, exp := normalize(x)
+	return int((Float64bits(x)>>shift)&mask) - bias + exp
+}
diff --git a/src/math/mod.go b/src/math/mod.go
new file mode 100644
index 0000000..6f24250
--- /dev/null
+++ b/src/math/mod.go
@@ -0,0 +1,52 @@
+// Copyright 2009-2010 The Go Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+package math
+
+/*
+	Floating-point mod function.
+*/
+
+// Mod returns the floating-point remainder of x/y.
+// The magnitude of the result is less than y and its
+// sign agrees with that of x.
+//
+// Special cases are:
+//
+//	Mod(±Inf, y) = NaN
+//	Mod(NaN, y) = NaN
+//	Mod(x, 0) = NaN
+//	Mod(x, ±Inf) = x
+//	Mod(x, NaN) = NaN
+func Mod(x, y float64) float64 {
+	if haveArchMod {
+		return archMod(x, y)
+	}
+	return mod(x, y)
+}
+
+func mod(x, y float64) float64 {
+	if y == 0 || IsInf(x, 0) || IsNaN(x) || IsNaN(y) {
+		return NaN()
+	}
+	y = Abs(y)
+
+	yfr, yexp := Frexp(y)
+	r := x
+	if x < 0 {
+		r = -x
+	}
+
+	for r >= y {
+		rfr, rexp := Frexp(r)
+		if rfr < yfr {
+			rexp = rexp - 1
+		}
+		r = r - Ldexp(y, rexp-yexp)
+	}
+	if x < 0 {
+		r = -r
+	}
+	return r
+}
diff --git a/src/math/modf.go b/src/math/modf.go
new file mode 100644
index 0000000..613a75f
--- /dev/null
+++ b/src/math/modf.go
@@ -0,0 +1,43 @@
+// Copyright 2009 The Go Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+package math
+
+// Modf returns integer and fractional floating-point numbers
+// that sum to f. Both values have the same sign as f.
+//
+// Special cases are:
+//
+//	Modf(±Inf) = ±Inf, NaN
+//	Modf(NaN) = NaN, NaN
+func Modf(f float64) (int float64, frac float64) {
+	if haveArchModf {
+		return archModf(f)
+	}
+	return modf(f)
+}
+
+func modf(f float64) (int float64, frac float64) {
+	if f < 1 {
+		switch {
+		case f < 0:
+			int, frac = Modf(-f)
+			return -int, -frac
+		case f == 0:
+			return f, f // Return -0, -0 when f == -0
+		}
+		return 0, f
+	}
+
+	x := Float64bits(f)
+	e := uint(x>>shift)&mask - bias
+
+	// Keep the top 12+e bits, the integer part; clear the rest.
+	if e < 64-12 {
+		x &^= 1<<(64-12-e) - 1
+	}
+	int = Float64frombits(x)
+	frac = f - int
+	return
+}
diff --git a/src/math/modf_arm64.s b/src/math/modf_arm64.s
new file mode 100644
index 0000000..1e4a329
--- /dev/null
+++ b/src/math/modf_arm64.s
@@ -0,0 +1,18 @@
+// Copyright 2016 The Go Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+#include "textflag.h"
+
+// func archModf(f float64) (int float64, frac float64)
+TEXT ·archModf(SB),NOSPLIT,$0
+	MOVD	f+0(FP), R0
+	FMOVD	R0, F0
+	FRINTZD	F0, F1
+	FMOVD	F1, int+8(FP)
+	FSUBD	F1, F0
+	FMOVD	F0, R1
+	AND	$(1<<63), R0
+	ORR	R0, R1 // must have same sign
+	MOVD	R1, frac+16(FP)
+	RET
diff --git a/src/math/modf_asm.go b/src/math/modf_asm.go
new file mode 100644
index 0000000..c63be6c
--- /dev/null
+++ b/src/math/modf_asm.go
@@ -0,0 +1,11 @@
+// Copyright 2021 The Go Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+//go:build arm64 || ppc64 || ppc64le
+
+package math
+
+const haveArchModf = true
+
+func archModf(f float64) (int float64, frac float64)
diff --git a/src/math/modf_noasm.go b/src/math/modf_noasm.go
new file mode 100644
index 0000000..55c6a7f
--- /dev/null
+++ b/src/math/modf_noasm.go
@@ -0,0 +1,13 @@
+// Copyright 2021 The Go Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+//go:build !arm64 && !ppc64 && !ppc64le
+
+package math
+
+const haveArchModf = false
+
+func archModf(f float64) (int float64, frac float64) {
+	panic("not implemented")
+}
diff --git a/src/math/modf_ppc64x.s b/src/math/modf_ppc64x.s
new file mode 100644
index 0000000..1303067
--- /dev/null
+++ b/src/math/modf_ppc64x.s
@@ -0,0 +1,18 @@
+// Copyright 2017 The Go Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+//go:build ppc64 || ppc64le
+// +build ppc64 ppc64le
+
+#include "textflag.h"
+
+// func archModf(f float64) (int float64, frac float64)
+TEXT ·archModf(SB),NOSPLIT,$0
+	FMOVD	f+0(FP), F0
+	FRIZ	F0, F1
+	FMOVD	F1, int+8(FP)
+	FSUB	F1, F0, F2
+	FCPSGN	F2, F0, F2
+	FMOVD	F2, frac+16(FP)
+	RET
diff --git a/src/math/nextafter.go b/src/math/nextafter.go
new file mode 100644
index 0000000..ec18d54
--- /dev/null
+++ b/src/math/nextafter.go
@@ -0,0 +1,51 @@
+// Copyright 2010 The Go Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+package math
+
+// Nextafter32 returns the next representable float32 value after x towards y.
+//
+// Special cases are:
+//
+//	Nextafter32(x, x)   = x
+//	Nextafter32(NaN, y) = NaN
+//	Nextafter32(x, NaN) = NaN
+func Nextafter32(x, y float32) (r float32) {
+	switch {
+	case IsNaN(float64(x)) || IsNaN(float64(y)): // special case
+		r = float32(NaN())
+	case x == y:
+		r = x
+	case x == 0:
+		r = float32(Copysign(float64(Float32frombits(1)), float64(y)))
+	case (y > x) == (x > 0):
+		r = Float32frombits(Float32bits(x) + 1)
+	default:
+		r = Float32frombits(Float32bits(x) - 1)
+	}
+	return
+}
+
+// Nextafter returns the next representable float64 value after x towards y.
+//
+// Special cases are:
+//
+//	Nextafter(x, x)   = x
+//	Nextafter(NaN, y) = NaN
+//	Nextafter(x, NaN) = NaN
+func Nextafter(x, y float64) (r float64) {
+	switch {
+	case IsNaN(x) || IsNaN(y): // special case
+		r = NaN()
+	case x == y:
+		r = x
+	case x == 0:
+		r = Copysign(Float64frombits(1), y)
+	case (y > x) == (x > 0):
+		r = Float64frombits(Float64bits(x) + 1)
+	default:
+		r = Float64frombits(Float64bits(x) - 1)
+	}
+	return
+}
diff --git a/src/math/pow.go b/src/math/pow.go
new file mode 100644
index 0000000..3f42945
--- /dev/null
+++ b/src/math/pow.go
@@ -0,0 +1,166 @@
+// Copyright 2009 The Go Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+package math
+
+func isOddInt(x float64) bool {
+	if Abs(x) >= (1 << 53) {
+		// 1 << 53 is the largest exact integer in the float64 format.
+		// Any number outside this range will be truncated before the decimal point and therefore will always be
+		// an even integer.
+		// Without this check and if x overflows int64 the int64(xi) conversion below may produce incorrect results
+		// on some architectures (and does so on arm64). See issue #57465.
+		return false
+	}
+
+	xi, xf := Modf(x)
+	return xf == 0 && int64(xi)&1 == 1
+}
+
+// Special cases taken from FreeBSD's /usr/src/lib/msun/src/e_pow.c
+// updated by IEEE Std. 754-2008 "Section 9.2.1 Special values".
+
+// Pow returns x**y, the base-x exponential of y.
+//
+// Special cases are (in order):
+//
+//	Pow(x, ±0) = 1 for any x
+//	Pow(1, y) = 1 for any y
+//	Pow(x, 1) = x for any x
+//	Pow(NaN, y) = NaN
+//	Pow(x, NaN) = NaN
+//	Pow(±0, y) = ±Inf for y an odd integer < 0
+//	Pow(±0, -Inf) = +Inf
+//	Pow(±0, +Inf) = +0
+//	Pow(±0, y) = +Inf for finite y < 0 and not an odd integer
+//	Pow(±0, y) = ±0 for y an odd integer > 0
+//	Pow(±0, y) = +0 for finite y > 0 and not an odd integer
+//	Pow(-1, ±Inf) = 1
+//	Pow(x, +Inf) = +Inf for |x| > 1
+//	Pow(x, -Inf) = +0 for |x| > 1
+//	Pow(x, +Inf) = +0 for |x| < 1
+//	Pow(x, -Inf) = +Inf for |x| < 1
+//	Pow(+Inf, y) = +Inf for y > 0
+//	Pow(+Inf, y) = +0 for y < 0
+//	Pow(-Inf, y) = Pow(-0, -y)
+//	Pow(x, y) = NaN for finite x < 0 and finite non-integer y
+func Pow(x, y float64) float64 {
+	if haveArchPow {
+		return archPow(x, y)
+	}
+	return pow(x, y)
+}
+
+func pow(x, y float64) float64 {
+	switch {
+	case y == 0 || x == 1:
+		return 1
+	case y == 1:
+		return x
+	case IsNaN(x) || IsNaN(y):
+		return NaN()
+	case x == 0:
+		switch {
+		case y < 0:
+			if Signbit(x) && isOddInt(y) {
+				return Inf(-1)
+			}
+			return Inf(1)
+		case y > 0:
+			if Signbit(x) && isOddInt(y) {
+				return x
+			}
+			return 0
+		}
+	case IsInf(y, 0):
+		switch {
+		case x == -1:
+			return 1
+		case (Abs(x) < 1) == IsInf(y, 1):
+			return 0
+		default:
+			return Inf(1)
+		}
+	case IsInf(x, 0):
+		if IsInf(x, -1) {
+			return Pow(1/x, -y) // Pow(-0, -y)
+		}
+		switch {
+		case y < 0:
+			return 0
+		case y > 0:
+			return Inf(1)
+		}
+	case y == 0.5:
+		return Sqrt(x)
+	case y == -0.5:
+		return 1 / Sqrt(x)
+	}
+
+	yi, yf := Modf(Abs(y))
+	if yf != 0 && x < 0 {
+		return NaN()
+	}
+	if yi >= 1<<63 {
+		// yi is a large even int that will lead to overflow (or underflow to 0)
+		// for all x except -1 (x == 1 was handled earlier)
+		switch {
+		case x == -1:
+			return 1
+		case (Abs(x) < 1) == (y > 0):
+			return 0
+		default:
+			return Inf(1)
+		}
+	}
+
+	// ans = a1 * 2**ae (= 1 for now).
+	a1 := 1.0
+	ae := 0
+
+	// ans *= x**yf
+	if yf != 0 {
+		if yf > 0.5 {
+			yf--
+			yi++
+		}
+		a1 = Exp(yf * Log(x))
+	}
+
+	// ans *= x**yi
+	// by multiplying in successive squarings
+	// of x according to bits of yi.
+	// accumulate powers of two into exp.
+	x1, xe := Frexp(x)
+	for i := int64(yi); i != 0; i >>= 1 {
+		if xe < -1<<12 || 1<<12 < xe {
+			// catch xe before it overflows the left shift below
+			// Since i !=0 it has at least one bit still set, so ae will accumulate xe
+			// on at least one more iteration, ae += xe is a lower bound on ae
+			// the lower bound on ae exceeds the size of a float64 exp
+			// so the final call to Ldexp will produce under/overflow (0/Inf)
+			ae += xe
+			break
+		}
+		if i&1 == 1 {
+			a1 *= x1
+			ae += xe
+		}
+		x1 *= x1
+		xe <<= 1
+		if x1 < .5 {
+			x1 += x1
+			xe--
+		}
+	}
+
+	// ans = a1*2**ae
+	// if y < 0 { ans = 1 / ans }
+	// but in the opposite order
+	if y < 0 {
+		a1 = 1 / a1
+		ae = -ae
+	}
+	return Ldexp(a1, ae)
+}
diff --git a/src/math/pow10.go b/src/math/pow10.go
new file mode 100644
index 0000000..c31ad8d
--- /dev/null
+++ b/src/math/pow10.go
@@ -0,0 +1,47 @@
+// Copyright 2009 The Go Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+package math
+
+// pow10tab stores the pre-computed values 10**i for i < 32.
+var pow10tab = [...]float64{
+	1e00, 1e01, 1e02, 1e03, 1e04, 1e05, 1e06, 1e07, 1e08, 1e09,
+	1e10, 1e11, 1e12, 1e13, 1e14, 1e15, 1e16, 1e17, 1e18, 1e19,
+	1e20, 1e21, 1e22, 1e23, 1e24, 1e25, 1e26, 1e27, 1e28, 1e29,
+	1e30, 1e31,
+}
+
+// pow10postab32 stores the pre-computed value for 10**(i*32) at index i.
+var pow10postab32 = [...]float64{
+	1e00, 1e32, 1e64, 1e96, 1e128, 1e160, 1e192, 1e224, 1e256, 1e288,
+}
+
+// pow10negtab32 stores the pre-computed value for 10**(-i*32) at index i.
+var pow10negtab32 = [...]float64{
+	1e-00, 1e-32, 1e-64, 1e-96, 1e-128, 1e-160, 1e-192, 1e-224, 1e-256, 1e-288, 1e-320,
+}
+
+// Pow10 returns 10**n, the base-10 exponential of n.
+//
+// Special cases are:
+//
+//	Pow10(n) =    0 for n < -323
+//	Pow10(n) = +Inf for n > 308
+func Pow10(n int) float64 {
+	if 0 <= n && n <= 308 {
+		return pow10postab32[uint(n)/32] * pow10tab[uint(n)%32]
+	}
+
+	if -323 <= n && n <= 0 {
+		return pow10negtab32[uint(-n)/32] / pow10tab[uint(-n)%32]
+	}
+
+	// n < -323 || 308 < n
+	if n > 0 {
+		return Inf(1)
+	}
+
+	// n < -323
+	return 0
+}
diff --git a/src/math/pow_s390x.s b/src/math/pow_s390x.s
new file mode 100644
index 0000000..c8758fc
--- /dev/null
+++ b/src/math/pow_s390x.s
@@ -0,0 +1,634 @@
+// Copyright 2017 The Go Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+#include "textflag.h"
+
+#define PosInf   0x7FF0000000000000
+#define NaN      0x7FF8000000000001
+#define NegInf   0xFFF0000000000000
+#define PosOne   0x3FF0000000000000
+#define NegOne   0xBFF0000000000000
+#define NegZero  0x8000000000000000
+
+// Minimax polynomial approximation
+DATA ·powrodataL51<> + 0(SB)/8, $-1.0
+DATA ·powrodataL51<> + 8(SB)/8, $1.0
+DATA ·powrodataL51<> + 16(SB)/8, $0.24022650695910110361E+00
+DATA ·powrodataL51<> + 24(SB)/8, $0.69314718055994686185E+00
+DATA ·powrodataL51<> + 32(SB)/8, $0.96181291057109484809E-02
+DATA ·powrodataL51<> + 40(SB)/8, $0.15403814778342868389E-03
+DATA ·powrodataL51<> + 48(SB)/8, $0.55504108652095235601E-01
+DATA ·powrodataL51<> + 56(SB)/8, $0.13333818813168698658E-02
+DATA ·powrodataL51<> + 64(SB)/8, $0.68205322933914439200E-12
+DATA ·powrodataL51<> + 72(SB)/8, $-.18466496523378731640E-01
+DATA ·powrodataL51<> + 80(SB)/8, $0.19697596291603973706E-02
+DATA ·powrodataL51<> + 88(SB)/8, $0.23083120654155209200E+00
+DATA ·powrodataL51<> + 96(SB)/8, $0.55324356012093416771E-06
+DATA ·powrodataL51<> + 104(SB)/8, $-.40340677224649339048E-05
+DATA ·powrodataL51<> + 112(SB)/8, $0.30255507904062541562E-04
+DATA ·powrodataL51<> + 120(SB)/8, $-.77453979912413008787E-07
+DATA ·powrodataL51<> + 128(SB)/8, $-.23637115549923464737E-03
+DATA ·powrodataL51<> + 136(SB)/8, $0.11016119077267717198E-07
+DATA ·powrodataL51<> + 144(SB)/8, $0.22608272174486123035E-09
+DATA ·powrodataL51<> + 152(SB)/8, $-.15895808101370190382E-08
+DATA ·powrodataL51<> + 160(SB)/8, $0x4540190000000000
+GLOBL ·powrodataL51<> + 0(SB), RODATA, $168
+
+// Constants
+DATA ·pow_x001a<> + 0(SB)/8, $0x1a000000000000
+GLOBL ·pow_x001a<> + 0(SB), RODATA, $8
+DATA ·pow_xinf<> + 0(SB)/8, $0x7ff0000000000000      //+Inf
+GLOBL ·pow_xinf<> + 0(SB), RODATA, $8
+DATA ·pow_xnan<> + 0(SB)/8, $0x7ff8000000000000      //NaN
+GLOBL ·pow_xnan<> + 0(SB), RODATA, $8
+DATA ·pow_x434<> + 0(SB)/8, $0x4340000000000000
+GLOBL ·pow_x434<> + 0(SB), RODATA, $8
+DATA ·pow_x433<> + 0(SB)/8, $0x4330000000000000
+GLOBL ·pow_x433<> + 0(SB), RODATA, $8
+DATA ·pow_x43f<> + 0(SB)/8, $0x43f0000000000000
+GLOBL ·pow_x43f<> + 0(SB), RODATA, $8
+DATA ·pow_xadd<> + 0(SB)/8, $0xc2f0000100003fef
+GLOBL ·pow_xadd<> + 0(SB), RODATA, $8
+DATA ·pow_xa<> + 0(SB)/8, $0x4019000000000000
+GLOBL ·pow_xa<> + 0(SB), RODATA, $8
+
+// Scale correction tables
+DATA powiadd<> + 0(SB)/8, $0xf000000000000000
+DATA powiadd<> + 8(SB)/8, $0x1000000000000000
+GLOBL powiadd<> + 0(SB), RODATA, $16
+DATA powxscale<> + 0(SB)/8, $0x4ff0000000000000
+DATA powxscale<> + 8(SB)/8, $0x2ff0000000000000
+GLOBL powxscale<> + 0(SB), RODATA, $16
+
+// Fractional powers of 2 table
+DATA ·powtexp<> + 0(SB)/8, $0.442737824274138381E-01
+DATA ·powtexp<> + 8(SB)/8, $0.263602189790660309E-01
+DATA ·powtexp<> + 16(SB)/8, $0.122565642281703586E-01
+DATA ·powtexp<> + 24(SB)/8, $0.143757052860721398E-02
+DATA ·powtexp<> + 32(SB)/8, $-.651375034121276075E-02
+DATA ·powtexp<> + 40(SB)/8, $-.119317678849450159E-01
+DATA ·powtexp<> + 48(SB)/8, $-.150868749549871069E-01
+DATA ·powtexp<> + 56(SB)/8, $-.161992609578469234E-01
+DATA ·powtexp<> + 64(SB)/8, $-.154492360403337917E-01
+DATA ·powtexp<> + 72(SB)/8, $-.129850717389178721E-01
+DATA ·powtexp<> + 80(SB)/8, $-.892902649276657891E-02
+DATA ·powtexp<> + 88(SB)/8, $-.338202636596794887E-02
+DATA ·powtexp<> + 96(SB)/8, $0.357266307045684762E-02
+DATA ·powtexp<> + 104(SB)/8, $0.118665304327406698E-01
+DATA ·powtexp<> + 112(SB)/8, $0.214434994118118914E-01
+DATA ·powtexp<> + 120(SB)/8, $0.322580645161290314E-01
+GLOBL ·powtexp<> + 0(SB), RODATA, $128
+
+// Log multiplier tables
+DATA ·powtl<> + 0(SB)/8, $0xbdf9723a80db6a05
+DATA ·powtl<> + 8(SB)/8, $0x3e0cfe4a0babe862
+DATA ·powtl<> + 16(SB)/8, $0xbe163b42dd33dada
+DATA ·powtl<> + 24(SB)/8, $0xbe0cdf9de2a8429c
+DATA ·powtl<> + 32(SB)/8, $0xbde9723a80db6a05
+DATA ·powtl<> + 40(SB)/8, $0xbdb37fcae081745e
+DATA ·powtl<> + 48(SB)/8, $0xbdd8b2f901ac662c
+DATA ·powtl<> + 56(SB)/8, $0xbde867dc68c36cc9
+DATA ·powtl<> + 64(SB)/8, $0xbdd23e36b47256b7
+DATA ·powtl<> + 72(SB)/8, $0xbde4c9b89fcc7933
+DATA ·powtl<> + 80(SB)/8, $0xbdd16905cad7cf66
+DATA ·powtl<> + 88(SB)/8, $0x3ddb417414aa5529
+DATA ·powtl<> + 96(SB)/8, $0xbdce046f2889983c
+DATA ·powtl<> + 104(SB)/8, $0x3dc2c3865d072897
+DATA ·powtl<> + 112(SB)/8, $0x8000000000000000
+DATA ·powtl<> + 120(SB)/8, $0x3dc1ca48817f8afe
+DATA ·powtl<> + 128(SB)/8, $0xbdd703518a88bfb7
+DATA ·powtl<> + 136(SB)/8, $0x3dc64afcc46942ce
+DATA ·powtl<> + 144(SB)/8, $0xbd9d79191389891a
+DATA ·powtl<> + 152(SB)/8, $0x3ddd563044da4fa0
+DATA ·powtl<> + 160(SB)/8, $0x3e0f42b5e5f8f4b6
+DATA ·powtl<> + 168(SB)/8, $0x3e0dfa2c2cbf6ead
+DATA ·powtl<> + 176(SB)/8, $0x3e14e25e91661293
+DATA ·powtl<> + 184(SB)/8, $0x3e0aac461509e20c
+GLOBL ·powtl<> + 0(SB), RODATA, $192
+
+DATA ·powtm<> + 0(SB)/8, $0x3da69e13
+DATA ·powtm<> + 8(SB)/8, $0x100003d66fcb6
+DATA ·powtm<> + 16(SB)/8, $0x200003d1538df
+DATA ·powtm<> + 24(SB)/8, $0x300003cab729e
+DATA ·powtm<> + 32(SB)/8, $0x400003c1a784c
+DATA ·powtm<> + 40(SB)/8, $0x500003ac9b074
+DATA ·powtm<> + 48(SB)/8, $0x60000bb498d22
+DATA ·powtm<> + 56(SB)/8, $0x68000bb8b29a2
+DATA ·powtm<> + 64(SB)/8, $0x70000bb9a32d4
+DATA ·powtm<> + 72(SB)/8, $0x74000bb9946bb
+DATA ·powtm<> + 80(SB)/8, $0x78000bb92e34b
+DATA ·powtm<> + 88(SB)/8, $0x80000bb6c57dc
+DATA ·powtm<> + 96(SB)/8, $0x84000bb4020f7
+DATA ·powtm<> + 104(SB)/8, $0x8c000ba93832d
+DATA ·powtm<> + 112(SB)/8, $0x9000080000000
+DATA ·powtm<> + 120(SB)/8, $0x940003aa66c4c
+DATA ·powtm<> + 128(SB)/8, $0x980003b2fb12a
+DATA ·powtm<> + 136(SB)/8, $0xa00003bc1def6
+DATA ·powtm<> + 144(SB)/8, $0xa80003c1eb0eb
+DATA ·powtm<> + 152(SB)/8, $0xb00003c64dcec
+DATA ·powtm<> + 160(SB)/8, $0xc00003cc49e4e
+DATA ·powtm<> + 168(SB)/8, $0xd00003d12f1de
+DATA ·powtm<> + 176(SB)/8, $0xe00003d4a9c6f
+DATA ·powtm<> + 184(SB)/8, $0xf00003d846c66
+GLOBL ·powtm<> + 0(SB), RODATA, $192
+
+// Table of indices into multiplier tables
+// Adjusted from asm to remove offset and convert
+DATA ·powtabi<> + 0(SB)/8, $0x1010101
+DATA ·powtabi<> + 8(SB)/8, $0x101020202020203
+DATA ·powtabi<> + 16(SB)/8, $0x303030404040405
+DATA ·powtabi<> + 24(SB)/8, $0x505050606060708
+DATA ·powtabi<> + 32(SB)/8, $0x90a0b0c0d0e0f10
+DATA ·powtabi<> + 40(SB)/8, $0x1011111212121313
+DATA ·powtabi<> + 48(SB)/8, $0x1314141414151515
+DATA ·powtabi<> + 56(SB)/8, $0x1516161617171717
+GLOBL ·powtabi<> + 0(SB), RODATA, $64
+
+// Pow returns x**y, the base-x exponential of y.
+//
+// Special cases are (in order):
+//      Pow(x, ±0) = 1 for any x
+//      Pow(1, y) = 1 for any y
+//      Pow(x, 1) = x for any x
+//      Pow(NaN, y) = NaN
+//      Pow(x, NaN) = NaN
+//      Pow(±0, y) = ±Inf for y an odd integer < 0
+//      Pow(±0, -Inf) = +Inf
+//      Pow(±0, +Inf) = +0
+//      Pow(±0, y) = +Inf for finite y < 0 and not an odd integer
+//      Pow(±0, y) = ±0 for y an odd integer > 0
+//      Pow(±0, y) = +0 for finite y > 0 and not an odd integer
+//      Pow(-1, ±Inf) = 1
+//      Pow(x, +Inf) = +Inf for |x| > 1
+//      Pow(x, -Inf) = +0 for |x| > 1
+//      Pow(x, +Inf) = +0 for |x| < 1
+//      Pow(x, -Inf) = +Inf for |x| < 1
+//      Pow(+Inf, y) = +Inf for y > 0
+//      Pow(+Inf, y) = +0 for y < 0
+//      Pow(-Inf, y) = Pow(-0, -y)
+//      Pow(x, y) = NaN for finite x < 0 and finite non-integer y
+
+TEXT	·powAsm(SB), NOSPLIT, $0-24
+	// special case
+	MOVD	x+0(FP), R1
+	MOVD	y+8(FP), R2
+
+	// special case Pow(1, y) = 1 for any y
+	MOVD	$PosOne, R3
+	CMPUBEQ	R1, R3, xIsOne
+
+	// special case Pow(x, 1) = x for any x
+	MOVD	$PosOne, R4
+	CMPUBEQ	R2, R4, yIsOne
+
+	// special case Pow(x, NaN) = NaN for any x
+	MOVD	$~(1<<63), R5
+	AND	R2, R5    // y = |y|
+	MOVD	$PosInf, R4
+	CMPUBLT R4, R5, yIsNan
+
+	MOVD	$NegInf, R3
+	CMPUBEQ	R1, R3, xIsNegInf
+
+	MOVD	$NegOne, R3
+	CMPUBEQ	R1, R3, xIsNegOne
+
+	MOVD	$PosInf, R3
+	CMPUBEQ	R1, R3, xIsPosInf
+
+	MOVD	$NegZero, R3
+	CMPUBEQ	R1, R3, xIsNegZero
+
+	MOVD	$PosInf, R4
+	CMPUBEQ	R2, R4, yIsPosInf
+
+	MOVD	$0x0, R3
+	CMPUBEQ	R1, R3, xIsPosZero
+	CMPBLT	R1, R3, xLtZero
+	BR	Normal
+xIsPosInf:
+	// special case Pow(+Inf, y) = +Inf for y > 0
+	MOVD	$0x0, R4
+	CMPBGT	R2, R4, posInfGeZero
+	BR	Normal
+xIsNegInf:
+	//Pow(-Inf, y) = Pow(-0, -y)
+	FMOVD y+8(FP), F2
+	FNEG F2, F2			// y = -y
+	BR negZeroNegY		// call Pow(-0, -y)
+xIsNegOne:
+	// special case Pow(-1, ±Inf) = 1
+	MOVD	$PosInf, R4
+	CMPUBEQ	R2, R4, negOnePosInf
+	MOVD	$NegInf, R4
+	CMPUBEQ	R2, R4, negOneNegInf
+	BR	Normal
+xIsPosZero:
+	// special case Pow(+0, -Inf) = +Inf
+	MOVD	$NegInf, R4
+	CMPUBEQ	R2, R4, zeroNegInf
+
+	// special case Pow(+0, y < 0) = +Inf
+	FMOVD	y+8(FP), F2
+	FMOVD	$(0.0), F4
+	FCMPU	F2, F4
+	BLT	posZeroLtZero				//y < 0.0
+	BR	Normal
+xIsNegZero:
+	// special case Pow(-0, -Inf) = +Inf
+	MOVD	$NegInf, R4
+	CMPUBEQ	R2, R4, zeroNegInf
+	FMOVD	y+8(FP), F2
+negZeroNegY:
+	// special case Pow(x, ±0) = 1 for any x
+	FMOVD	$(0.0), F4
+	FCMPU	F4, F2
+	BLT	negZeroGtZero		// y > 0.0
+	BEQ yIsZero				// y = 0.0
+
+	FMOVD $(-0.0), F4
+	FCMPU F4, F2
+	BLT negZeroGtZero				// y > -0.0
+	BEQ yIsZero				// y = -0.0
+
+	// special case Pow(-0, y) = -Inf for y an odd integer < 0
+	// special case Pow(-0, y) = +Inf for finite y < 0 and not an odd integer
+	FIDBR	$5, F2, F4		//F2 translate to integer F4
+	FCMPU	F2, F4
+	BNE	zeroNotOdd			// y is not an (odd) integer and y < 0
+	FMOVD	$(2.0), F4
+	FDIV	F4, F2			// F2 = F2 / 2.0
+	FIDBR	$5, F2, F4		//F2 translate to integer F4
+	FCMPU	F2, F4
+	BNE	negZeroOddInt		// y is an odd integer and y < 0
+	BR	zeroNotOdd			// y is not an (odd) integer and y < 0
+
+negZeroGtZero:
+	// special case Pow(-0, y) = -0 for y an odd integer > 0
+	// special case Pow(±0, y) = +0 for finite y > 0 and not an odd integer
+	FIDBR	$5, F2, F4      //F2 translate to integer F4
+	FCMPU	F2, F4
+	BNE	zeroNotOddGtZero    // y is not an (odd) integer and y > 0
+	FMOVD	$(2.0), F4
+	FDIV	F4, F2          // F2 = F2 / 2.0
+	FIDBR	$5, F2, F4      //F2 translate to integer F4
+	FCMPU	F2, F4
+	BNE	negZeroOddIntGtZero       // y is an odd integer and y > 0
+	BR	zeroNotOddGtZero          // y is not an (odd) integer
+
+xLtZero:
+	// special case Pow(x, y) = NaN for finite x < 0 and finite non-integer y
+	FMOVD	y+8(FP), F2
+	FIDBR	$5, F2, F4
+	FCMPU	F2, F4
+	BNE	ltZeroInt
+	BR	Normal
+yIsPosInf:
+	// special case Pow(x, +Inf) = +Inf for |x| > 1
+	FMOVD	x+0(FP), F1
+	FMOVD	$(1.0), F3
+	FCMPU	F1, F3
+	BGT	gtOnePosInf
+	FMOVD	$(-1.0), F3
+	FCMPU	F1, F3
+	BLT	ltNegOnePosInf
+Normal:
+	FMOVD	x+0(FP), F0
+	FMOVD	y+8(FP), F2
+	MOVD	$·powrodataL51<>+0(SB), R9
+	LGDR	F0, R3
+	WORD	$0xC0298009	//iilf	%r2,2148095317
+	BYTE	$0x55
+	BYTE	$0x55
+	RISBGNZ	$32, $63, $32, R3, R1
+	SUBW	R1, R2
+	RISBGNZ	$58, $63, $50, R2, R3
+	BYTE	$0x18	//lr	%r5,%r1
+	BYTE	$0x51
+	MOVD	$·powtabi<>+0(SB), R12
+	WORD	$0xE303C000	//llgc	%r0,0(%r3,%r12)
+	BYTE	$0x00
+	BYTE	$0x90
+	SUBW	$0x1A0000, R5
+	SLD	$3, R0, R3
+	MOVD	$·powtm<>+0(SB), R4
+	MOVH	$0x0, R8
+	ANDW	$0x7FF00000, R2
+	ORW	R5, R1
+	WORD	$0x5A234000	//a	%r2,0(%r3,%r4)
+	MOVD	$0x3FF0000000000000, R5
+	RISBGZ	$40, $63, $56, R2, R3
+	RISBGN	$0, $31, $32, R2, R8
+	ORW	$0x45000000, R3
+	MOVW	R1, R6
+	CMPBLT	R6, $0, L42
+	FMOVD	F0, F4
+L2:
+	VLVGF	$0, R3, V1
+	MOVD	$·pow_xa<>+0(SB), R2
+	WORD	$0xED3090A0	//lde	%f3,.L52-.L51(%r9)
+	BYTE	$0x00
+	BYTE	$0x24
+	FMOVD	0(R2), F6
+	FSUBS	F1, F3
+	LDGR	R8, F1
+	WFMSDB	V4, V1, V6, V4
+	FMOVD	152(R9), F6
+	WFMDB	V4, V4, V7
+	FMOVD	144(R9), F1
+	FMOVD	136(R9), F5
+	WFMADB	V4, V1, V6, V1
+	VLEG	$0, 128(R9), V16
+	FMOVD	120(R9), F6
+	WFMADB	V4, V5, V6, V5
+	FMOVD	112(R9), F6
+	WFMADB	V1, V7, V5, V1
+	WFMADB	V4, V6, V16, V16
+	SLD	$3, R0, R2
+	FMOVD	104(R9), F5
+	WORD	$0xED824004	//ldeb	%f8,4(%r2,%r4)
+	BYTE	$0x00
+	BYTE	$0x04
+	LDEBR	F3, F3
+	FMOVD	96(R9), F6
+	WFMADB	V4, V6, V5, V6
+	FADD	F8, F3
+	WFMADB	V7, V6, V16, V6
+	FMUL	F7, F7
+	FMOVD	88(R9), F5
+	FMADD	F7, F1, F6
+	WFMADB	V4, V5, V3, V16
+	FMOVD	80(R9), F1
+	WFSDB	V16, V3, V3
+	MOVD	$·powtl<>+0(SB), R3
+	WFMADB	V4, V6, V1, V6
+	FMADD	F5, F4, F3
+	FMOVD	72(R9), F1
+	WFMADB	V4, V6, V1, V6
+	WORD	$0xED323000	//adb	%f3,0(%r2,%r3)
+	BYTE	$0x00
+	BYTE	$0x1A
+	FMOVD	64(R9), F1
+	WFMADB	V4, V6, V1, V6
+	MOVD	$·pow_xadd<>+0(SB), R2
+	WFMADB	V4, V6, V3, V4
+	FMOVD	0(R2), F5
+	WFADB	V4, V16, V3
+	VLEG	$0, 56(R9), V20
+	WFMSDB	V2, V3, V5, V3
+	VLEG	$0, 48(R9), V18
+	WFADB	V3, V5, V6
+	LGDR	F3, R2
+	WFMSDB	V2, V16, V6, V16
+	FMOVD	40(R9), F1
+	WFMADB	V2, V4, V16, V4
+	FMOVD	32(R9), F7
+	WFMDB	V4, V4, V3
+	WFMADB	V4, V1, V20, V1
+	WFMADB	V4, V7, V18, V7
+	VLEG	$0, 24(R9), V16
+	WFMADB	V1, V3, V7, V1
+	FMOVD	16(R9), F5
+	WFMADB	V4, V5, V16, V5
+	RISBGZ	$57, $60, $3, R2, R4
+	WFMADB	V3, V1, V5, V1
+	MOVD	$·powtexp<>+0(SB), R3
+	WORD	$0x68343000	//ld	%f3,0(%r4,%r3)
+	FMADD	F3, F4, F4
+	RISBGN	$0, $15, $48, R2, R5
+	WFMADB	V4, V1, V3, V4
+	LGDR	F6, R2
+	LDGR	R5, F1
+	SRAD	$48, R2, R2
+	FMADD	F1, F4, F1
+	RLL	$16, R2, R2
+	ANDW	$0x7FFF0000, R2
+	WORD	$0xC22B3F71	//alfi	%r2,1064370176
+	BYTE	$0x00
+	BYTE	$0x00
+	ORW	R2, R1, R3
+	MOVW	R3, R6
+	CMPBLT	R6, $0, L43
+L1:
+	FMOVD	F1, ret+16(FP)
+	RET
+L43:
+	LTDBR	F0, F0
+	BLTU	L44
+	FMOVD	F0, F3
+L7:
+	MOVD	$·pow_xinf<>+0(SB), R3
+	FMOVD	0(R3), F5
+	WFCEDBS	V3, V5, V7
+	BVS	L8
+	WFMDB	V3, V2, V6
+L8:
+	WFCEDBS	V2, V2, V3
+	BVS	L9
+	LTDBR	F2, F2
+	BEQ	L26
+	MOVW	R1, R6
+	CMPBLT	R6, $0, L45
+L11:
+	WORD	$0xC0190003	//iilf	%r1,262143
+	BYTE	$0xFF
+	BYTE	$0xFF
+	MOVW	R2, R7
+	MOVW	R1, R6
+	CMPBLE	R7, R6, L34
+	RISBGNZ	$32, $63, $32, R5, R1
+	LGDR	F6, R2
+	MOVD	$powiadd<>+0(SB), R3
+	RISBGZ	$60, $60, $4, R2, R2
+	WORD	$0x5A123000	//a	%r1,0(%r2,%r3)
+	RISBGN	$0, $31, $32, R1, R5
+	LDGR	R5, F1
+	FMADD	F1, F4, F1
+	MOVD	$powxscale<>+0(SB), R1
+	WORD	$0xED121000	//mdb	%f1,0(%r2,%r1)
+	BYTE	$0x00
+	BYTE	$0x1C
+	BR	L1
+L42:
+	LTDBR	F0, F0
+	BLTU	L46
+	FMOVD	F0, F4
+L3:
+	MOVD	$·pow_x001a<>+0(SB), R2
+	WORD	$0xED402000	//cdb	%f4,0(%r2)
+	BYTE	$0x00
+	BYTE	$0x19
+	BGE	L2
+	BVS	L2
+	MOVD	$·pow_x43f<>+0(SB), R2
+	WORD	$0xED402000	//mdb	%f4,0(%r2)
+	BYTE	$0x00
+	BYTE	$0x1C
+	WORD	$0xC0298009	//iilf	%r2,2148095317
+	BYTE	$0x55
+	BYTE	$0x55
+	LGDR	F4, R3
+	RISBGNZ	$32, $63, $32, R3, R3
+	SUBW	R3, R2, R3
+	RISBGZ	$33, $43, $0, R3, R2
+	RISBGNZ	$58, $63, $50, R3, R3
+	WORD	$0xE303C000	//llgc	%r0,0(%r3,%r12)
+	BYTE	$0x00
+	BYTE	$0x90
+	SLD	$3, R0, R3
+	WORD	$0x5A234000	//a	%r2,0(%r3,%r4)
+	BYTE	$0x18	//lr	%r3,%r2
+	BYTE	$0x32
+	RISBGN	$0, $31, $32, R3, R8
+	ADDW	$0x4000000, R3
+	BLEU	L5
+	RISBGZ	$40, $63, $56, R3, R3
+	ORW	$0x45000000, R3
+	BR	L2
+L9:
+	WFCEDBS	V0, V0, V4
+	BVS	L35
+	FMOVD	F2, F1
+	BR	L1
+L46:
+	WORD	$0xB3130040	//lcdbr	%f4,%f0
+	BR	L3
+L44:
+	WORD	$0xB3130030	//lcdbr	%f3,%f0
+	BR	L7
+L35:
+	FMOVD	F0, F1
+	BR	L1
+L26:
+	FMOVD	8(R9), F1
+	BR	L1
+L34:
+	FMOVD	8(R9), F4
+L19:
+	LTDBR	F6, F6
+	BLEU	L47
+L18:
+	WFMDB	V4, V5, V1
+	BR	L1
+L5:
+	RISBGZ	$33, $50, $63, R3, R3
+	WORD	$0xC23B4000	//alfi	%r3,1073741824
+	BYTE	$0x00
+	BYTE	$0x00
+	RLL	$24, R3, R3
+	ORW	$0x45000000, R3
+	BR	L2
+L45:
+	WFCEDBS	V0, V0, V4
+	BVS	L35
+	LTDBR	F0, F0
+	BLEU	L48
+	FMOVD	8(R9), F4
+L12:
+	MOVW	R2, R6
+	CMPBLT	R6, $0, L19
+	FMUL	F4, F1
+	BR	L1
+L47:
+	BLT	L40
+	WFCEDBS	V0, V0, V2
+	BVS	L49
+L16:
+	MOVD	·pow_xnan<>+0(SB), R1
+	LDGR	R1, F0
+	WFMDB	V4, V0, V1
+	BR	L1
+L48:
+	LGDR	F0, R3
+	RISBGNZ	$32, $63, $32, R3, R1
+	MOVW	R1, R6
+	CMPBEQ	R6, $0, L29
+	LTDBR	F2, F2
+	BLTU	L50
+	FMOVD	F2, F4
+L14:
+	MOVD	$·pow_x433<>+0(SB), R1
+	FMOVD	0(R1), F7
+	WFCHDBS	V4, V7, V3
+	BEQ	L15
+	WFADB	V7, V4, V3
+	FSUB	F7, F3
+	WFCEDBS	V4, V3, V3
+	BEQ	L15
+	LTDBR	F0, F0
+	FMOVD	8(R9), F4
+	BNE	L16
+L13:
+	LTDBR	F2, F2
+	BLT	L18
+L40:
+	FMOVD	$0, F0
+	WFMDB	V4, V0, V1
+	BR	L1
+L49:
+	WFMDB	V0, V4, V1
+	BR	L1
+L29:
+	FMOVD	8(R9), F4
+	BR	L13
+L15:
+	MOVD	$·pow_x434<>+0(SB), R1
+	FMOVD	0(R1), F7
+	WFCHDBS	V4, V7, V3
+	BEQ	L32
+	WFADB	V7, V4, V3
+	FSUB	F7, F3
+	WFCEDBS	V4, V3, V4
+	BEQ	L32
+	FMOVD	0(R9), F4
+L17:
+	LTDBR	F0, F0
+	BNE	L12
+	BR	L13
+L32:
+	FMOVD	8(R9), F4
+	BR	L17
+L50:
+	WORD	$0xB3130042	//lcdbr	%f4,%f2
+	BR	L14
+xIsOne:			// Pow(1, y) = 1 for any y
+yIsOne:			// Pow(x, 1) = x for any x
+posInfGeZero:	// Pow(+Inf, y) = +Inf for y > 0
+	MOVD	R1, ret+16(FP)
+	RET
+yIsNan:			//  Pow(NaN, y) = NaN
+ltZeroInt:		// Pow(x, y) = NaN for finite x < 0 and finite non-integer y
+	MOVD	$NaN, R2
+	MOVD	R2, ret+16(FP)
+	RET
+negOnePosInf:	// Pow(-1, ±Inf) = 1
+negOneNegInf:
+	MOVD	$PosOne, R3
+	MOVD	R3, ret+16(FP)
+	RET
+negZeroOddInt:
+	MOVD	$NegInf, R3
+	MOVD	R3, ret+16(FP)
+	RET
+zeroNotOdd:		// Pow(±0, y) = +Inf for finite y < 0 and not an odd integer
+posZeroLtZero:	// special case Pow(+0, y < 0) = +Inf
+zeroNegInf:		// Pow(±0, -Inf) = +Inf
+	MOVD	$PosInf, R3
+	MOVD	R3, ret+16(FP)
+	RET
+gtOnePosInf:	//Pow(x, +Inf) = +Inf for |x| > 1
+ltNegOnePosInf:
+	MOVD	R2, ret+16(FP)
+	RET
+yIsZero:		//Pow(x, ±0) = 1 for any x
+	MOVD	$PosOne, R4
+	MOVD	R4, ret+16(FP)
+	RET
+negZeroOddIntGtZero:        // Pow(-0, y) = -0 for y an odd integer > 0
+	MOVD	$NegZero, R3
+	MOVD	R3, ret+16(FP)
+	RET
+zeroNotOddGtZero:        // Pow(±0, y) = +0 for finite y > 0 and not an odd integer
+	MOVD	$0, ret+16(FP)
+	RET
diff --git a/src/math/rand/auto_test.go b/src/math/rand/auto_test.go
new file mode 100644
index 0000000..b057370
--- /dev/null
+++ b/src/math/rand/auto_test.go
@@ -0,0 +1,40 @@
+// Copyright 2022 The Go Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+package rand_test
+
+import (
+	. "math/rand"
+	"testing"
+)
+
+// This test is first, in its own file with an alphabetically early name,
+// to try to make sure that it runs early. It has the best chance of
+// detecting deterministic seeding if it's the first test that runs.
+
+func TestAuto(t *testing.T) {
+	// Pull out 10 int64s from the global source
+	// and then check that they don't appear in that
+	// order in the deterministic Seed(1) result.
+	var out []int64
+	for i := 0; i < 10; i++ {
+		out = append(out, Int63())
+	}
+
+	// Look for out in Seed(1)'s output.
+	// Strictly speaking, we should look for them in order,
+	// but this is good enough and not significantly more
+	// likely to have a false positive.
+	Seed(1)
+	found := 0
+	for i := 0; i < 1000; i++ {
+		x := Int63()
+		if x == out[found] {
+			found++
+			if found == len(out) {
+				t.Fatalf("found unseeded output in Seed(1) output")
+			}
+		}
+	}
+}
diff --git a/src/math/rand/default_test.go b/src/math/rand/default_test.go
new file mode 100644
index 0000000..19fd75d
--- /dev/null
+++ b/src/math/rand/default_test.go
@@ -0,0 +1,148 @@
+// Copyright 2023 The Go Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+package rand_test
+
+import (
+	"fmt"
+	"internal/race"
+	"internal/testenv"
+	. "math/rand"
+	"os"
+	"runtime"
+	"strconv"
+	"sync"
+	"testing"
+)
+
+// Test that racy access to the default functions behaves reasonably.
+func TestDefaultRace(t *testing.T) {
+	// Skip the test in short mode, but even in short mode run
+	// the test if we are using the race detector, because part
+	// of this is to see whether the race detector reports any problems.
+	if testing.Short() && !race.Enabled {
+		t.Skip("skipping starting another executable in short mode")
+	}
+
+	const env = "GO_RAND_TEST_HELPER_CODE"
+	if v := os.Getenv(env); v != "" {
+		doDefaultTest(t, v)
+		return
+	}
+
+	t.Parallel()
+
+	for i := 0; i < 6; i++ {
+		i := i
+		t.Run(strconv.Itoa(i), func(t *testing.T) {
+			t.Parallel()
+			exe, err := os.Executable()
+			if err != nil {
+				exe = os.Args[0]
+			}
+			cmd := testenv.Command(t, exe, "-test.run=TestDefaultRace")
+			cmd = testenv.CleanCmdEnv(cmd)
+			cmd.Env = append(cmd.Env, fmt.Sprintf("GO_RAND_TEST_HELPER_CODE=%d", i/2))
+			if i%2 != 0 {
+				cmd.Env = append(cmd.Env, "GODEBUG=randautoseed=0")
+			}
+			out, err := cmd.CombinedOutput()
+			if len(out) > 0 {
+				t.Logf("%s", out)
+			}
+			if err != nil {
+				t.Error(err)
+			}
+		})
+	}
+}
+
+// doDefaultTest should be run before there have been any calls to the
+// top-level math/rand functions. Make sure that we can make concurrent
+// calls to top-level functions and to Seed without any duplicate values.
+// This will also give the race detector a change to report any problems.
+func doDefaultTest(t *testing.T, v string) {
+	code, err := strconv.Atoi(v)
+	if err != nil {
+		t.Fatalf("internal error: unrecognized code %q", v)
+	}
+
+	goroutines := runtime.GOMAXPROCS(0)
+	if goroutines < 4 {
+		goroutines = 4
+	}
+
+	ch := make(chan uint64, goroutines*3)
+	var wg sync.WaitGroup
+
+	// The various tests below should not cause race detector reports
+	// and should not produce duplicate results.
+	//
+	// Note: these tests can theoretically fail when using fastrand64
+	// in that it is possible to coincidentally get the same random
+	// number twice. That could happen something like 1 / 2**64 times,
+	// which is rare enough that it may never happen. We don't worry
+	// about that case.
+
+	switch code {
+	case 0:
+		// Call Seed and Uint64 concurrently.
+		wg.Add(goroutines)
+		for i := 0; i < goroutines; i++ {
+			go func(s int64) {
+				defer wg.Done()
+				Seed(s)
+			}(int64(i) + 100)
+		}
+		wg.Add(goroutines)
+		for i := 0; i < goroutines; i++ {
+			go func() {
+				defer wg.Done()
+				ch <- Uint64()
+			}()
+		}
+	case 1:
+		// Call Uint64 concurrently with no Seed.
+		wg.Add(goroutines)
+		for i := 0; i < goroutines; i++ {
+			go func() {
+				defer wg.Done()
+				ch <- Uint64()
+			}()
+		}
+	case 2:
+		// Start with Uint64 to pick the fast source, then call
+		// Seed and Uint64 concurrently.
+		ch <- Uint64()
+		wg.Add(goroutines)
+		for i := 0; i < goroutines; i++ {
+			go func(s int64) {
+				defer wg.Done()
+				Seed(s)
+			}(int64(i) + 100)
+		}
+		wg.Add(goroutines)
+		for i := 0; i < goroutines; i++ {
+			go func() {
+				defer wg.Done()
+				ch <- Uint64()
+			}()
+		}
+	default:
+		t.Fatalf("internal error: unrecognized code %d", code)
+	}
+
+	go func() {
+		wg.Wait()
+		close(ch)
+	}()
+
+	m := make(map[uint64]bool)
+	for i := range ch {
+		if m[i] {
+			t.Errorf("saw %d twice", i)
+		}
+		m[i] = true
+	}
+}
diff --git a/src/math/rand/example_test.go b/src/math/rand/example_test.go
new file mode 100644
index 0000000..d656f47
--- /dev/null
+++ b/src/math/rand/example_test.go
@@ -0,0 +1,133 @@
+// Copyright 2012 The Go Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+package rand_test
+
+import (
+	"fmt"
+	"math/rand"
+	"os"
+	"strings"
+	"text/tabwriter"
+)
+
+// These tests serve as an example but also make sure we don't change
+// the output of the random number generator when given a fixed seed.
+
+func Example() {
+	answers := []string{
+		"It is certain",
+		"It is decidedly so",
+		"Without a doubt",
+		"Yes definitely",
+		"You may rely on it",
+		"As I see it yes",
+		"Most likely",
+		"Outlook good",
+		"Yes",
+		"Signs point to yes",
+		"Reply hazy try again",
+		"Ask again later",
+		"Better not tell you now",
+		"Cannot predict now",
+		"Concentrate and ask again",
+		"Don't count on it",
+		"My reply is no",
+		"My sources say no",
+		"Outlook not so good",
+		"Very doubtful",
+	}
+	fmt.Println("Magic 8-Ball says:", answers[rand.Intn(len(answers))])
+}
+
+// This example shows the use of each of the methods on a *Rand.
+// The use of the global functions is the same, without the receiver.
+func Example_rand() {
+	// Create and seed the generator.
+	// Typically a non-fixed seed should be used, such as time.Now().UnixNano().
+	// Using a fixed seed will produce the same output on every run.
+	r := rand.New(rand.NewSource(99))
+
+	// The tabwriter here helps us generate aligned output.
+	w := tabwriter.NewWriter(os.Stdout, 1, 1, 1, ' ', 0)
+	defer w.Flush()
+	show := func(name string, v1, v2, v3 any) {
+		fmt.Fprintf(w, "%s\t%v\t%v\t%v\n", name, v1, v2, v3)
+	}
+
+	// Float32 and Float64 values are in [0, 1).
+	show("Float32", r.Float32(), r.Float32(), r.Float32())
+	show("Float64", r.Float64(), r.Float64(), r.Float64())
+
+	// ExpFloat64 values have an average of 1 but decay exponentially.
+	show("ExpFloat64", r.ExpFloat64(), r.ExpFloat64(), r.ExpFloat64())
+
+	// NormFloat64 values have an average of 0 and a standard deviation of 1.
+	show("NormFloat64", r.NormFloat64(), r.NormFloat64(), r.NormFloat64())
+
+	// Int31, Int63, and Uint32 generate values of the given width.
+	// The Int method (not shown) is like either Int31 or Int63
+	// depending on the size of 'int'.
+	show("Int31", r.Int31(), r.Int31(), r.Int31())
+	show("Int63", r.Int63(), r.Int63(), r.Int63())
+	show("Uint32", r.Uint32(), r.Uint32(), r.Uint32())
+
+	// Intn, Int31n, and Int63n limit their output to be < n.
+	// They do so more carefully than using r.Int()%n.
+	show("Intn(10)", r.Intn(10), r.Intn(10), r.Intn(10))
+	show("Int31n(10)", r.Int31n(10), r.Int31n(10), r.Int31n(10))
+	show("Int63n(10)", r.Int63n(10), r.Int63n(10), r.Int63n(10))
+
+	// Perm generates a random permutation of the numbers [0, n).
+	show("Perm", r.Perm(5), r.Perm(5), r.Perm(5))
+	// Output:
+	// Float32     0.2635776           0.6358173           0.6718283
+	// Float64     0.628605430454327   0.4504798828572669  0.9562755949377957
+	// ExpFloat64  0.3362240648200941  1.4256072328483647  0.24354758816173044
+	// NormFloat64 0.17233959114940064 1.577014951434847   0.04259129641113857
+	// Int31       1501292890          1486668269          182840835
+	// Int63       3546343826724305832 5724354148158589552 5239846799706671610
+	// Uint32      2760229429          296659907           1922395059
+	// Intn(10)    1                   2                   5
+	// Int31n(10)  4                   7                   8
+	// Int63n(10)  7                   6                   3
+	// Perm        [1 4 2 3 0]         [4 2 1 3 0]         [1 2 4 0 3]
+}
+
+func ExamplePerm() {
+	for _, value := range rand.Perm(3) {
+		fmt.Println(value)
+	}
+
+	// Unordered output: 1
+	// 2
+	// 0
+}
+
+func ExampleShuffle() {
+	words := strings.Fields("ink runs from the corners of my mouth")
+	rand.Shuffle(len(words), func(i, j int) {
+		words[i], words[j] = words[j], words[i]
+	})
+	fmt.Println(words)
+}
+
+func ExampleShuffle_slicesInUnison() {
+	numbers := []byte("12345")
+	letters := []byte("ABCDE")
+	// Shuffle numbers, swapping corresponding entries in letters at the same time.
+	rand.Shuffle(len(numbers), func(i, j int) {
+		numbers[i], numbers[j] = numbers[j], numbers[i]
+		letters[i], letters[j] = letters[j], letters[i]
+	})
+	for i := range numbers {
+		fmt.Printf("%c: %c\n", letters[i], numbers[i])
+	}
+}
+
+func ExampleIntn() {
+	fmt.Println(rand.Intn(100))
+	fmt.Println(rand.Intn(100))
+	fmt.Println(rand.Intn(100))
+}
diff --git a/src/math/rand/exp.go b/src/math/rand/exp.go
new file mode 100644
index 0000000..c1162c1
--- /dev/null
+++ b/src/math/rand/exp.go
@@ -0,0 +1,221 @@
+// Copyright 2009 The Go Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+package rand
+
+import (
+	"math"
+)
+
+/*
+ * Exponential distribution
+ *
+ * See "The Ziggurat Method for Generating Random Variables"
+ * (Marsaglia & Tsang, 2000)
+ * https://www.jstatsoft.org/v05/i08/paper [pdf]
+ */
+
+const (
+	re = 7.69711747013104972
+)
+
+// ExpFloat64 returns an exponentially distributed float64 in the range
+// (0, +math.MaxFloat64] with an exponential distribution whose rate parameter
+// (lambda) is 1 and whose mean is 1/lambda (1).
+// To produce a distribution with a different rate parameter,
+// callers can adjust the output using:
+//
+//	sample = ExpFloat64() / desiredRateParameter
+func (r *Rand) ExpFloat64() float64 {
+	for {
+		j := r.Uint32()
+		i := j & 0xFF
+		x := float64(j) * float64(we[i])
+		if j < ke[i] {
+			return x
+		}
+		if i == 0 {
+			return re - math.Log(r.Float64())
+		}
+		if fe[i]+float32(r.Float64())*(fe[i-1]-fe[i]) < float32(math.Exp(-x)) {
+			return x
+		}
+	}
+}
+
+var ke = [256]uint32{
+	0xe290a139, 0x0, 0x9beadebc, 0xc377ac71, 0xd4ddb990,
+	0xde893fb8, 0xe4a8e87c, 0xe8dff16a, 0xebf2deab, 0xee49a6e8,
+	0xf0204efd, 0xf19bdb8e, 0xf2d458bb, 0xf3da104b, 0xf4b86d78,
+	0xf577ad8a, 0xf61de83d, 0xf6afb784, 0xf730a573, 0xf7a37651,
+	0xf80a5bb6, 0xf867189d, 0xf8bb1b4f, 0xf9079062, 0xf94d70ca,
+	0xf98d8c7d, 0xf9c8928a, 0xf9ff175b, 0xfa319996, 0xfa6085f8,
+	0xfa8c3a62, 0xfab5084e, 0xfadb36c8, 0xfaff0410, 0xfb20a6ea,
+	0xfb404fb4, 0xfb5e2951, 0xfb7a59e9, 0xfb95038c, 0xfbae44ba,
+	0xfbc638d8, 0xfbdcf892, 0xfbf29a30, 0xfc0731df, 0xfc1ad1ed,
+	0xfc2d8b02, 0xfc3f6c4d, 0xfc5083ac, 0xfc60ddd1, 0xfc708662,
+	0xfc7f8810, 0xfc8decb4, 0xfc9bbd62, 0xfca9027c, 0xfcb5c3c3,
+	0xfcc20864, 0xfccdd70a, 0xfcd935e3, 0xfce42ab0, 0xfceebace,
+	0xfcf8eb3b, 0xfd02c0a0, 0xfd0c3f59, 0xfd156b7b, 0xfd1e48d6,
+	0xfd26daff, 0xfd2f2552, 0xfd372af7, 0xfd3eeee5, 0xfd4673e7,
+	0xfd4dbc9e, 0xfd54cb85, 0xfd5ba2f2, 0xfd62451b, 0xfd68b415,
+	0xfd6ef1da, 0xfd750047, 0xfd7ae120, 0xfd809612, 0xfd8620b4,
+	0xfd8b8285, 0xfd90bcf5, 0xfd95d15e, 0xfd9ac10b, 0xfd9f8d36,
+	0xfda43708, 0xfda8bf9e, 0xfdad2806, 0xfdb17141, 0xfdb59c46,
+	0xfdb9a9fd, 0xfdbd9b46, 0xfdc170f6, 0xfdc52bd8, 0xfdc8ccac,
+	0xfdcc542d, 0xfdcfc30b, 0xfdd319ef, 0xfdd6597a, 0xfdd98245,
+	0xfddc94e5, 0xfddf91e6, 0xfde279ce, 0xfde54d1f, 0xfde80c52,
+	0xfdeab7de, 0xfded5034, 0xfdefd5be, 0xfdf248e3, 0xfdf4aa06,
+	0xfdf6f984, 0xfdf937b6, 0xfdfb64f4, 0xfdfd818d, 0xfdff8dd0,
+	0xfe018a08, 0xfe03767a, 0xfe05536c, 0xfe07211c, 0xfe08dfc9,
+	0xfe0a8fab, 0xfe0c30fb, 0xfe0dc3ec, 0xfe0f48b1, 0xfe10bf76,
+	0xfe122869, 0xfe1383b4, 0xfe14d17c, 0xfe1611e7, 0xfe174516,
+	0xfe186b2a, 0xfe19843e, 0xfe1a9070, 0xfe1b8fd6, 0xfe1c8289,
+	0xfe1d689b, 0xfe1e4220, 0xfe1f0f26, 0xfe1fcfbc, 0xfe2083ed,
+	0xfe212bc3, 0xfe21c745, 0xfe225678, 0xfe22d95f, 0xfe234ffb,
+	0xfe23ba4a, 0xfe241849, 0xfe2469f2, 0xfe24af3c, 0xfe24e81e,
+	0xfe25148b, 0xfe253474, 0xfe2547c7, 0xfe254e70, 0xfe25485a,
+	0xfe25356a, 0xfe251586, 0xfe24e88f, 0xfe24ae64, 0xfe2466e1,
+	0xfe2411df, 0xfe23af34, 0xfe233eb4, 0xfe22c02c, 0xfe22336b,
+	0xfe219838, 0xfe20ee58, 0xfe20358c, 0xfe1f6d92, 0xfe1e9621,
+	0xfe1daef0, 0xfe1cb7ac, 0xfe1bb002, 0xfe1a9798, 0xfe196e0d,
+	0xfe1832fd, 0xfe16e5fe, 0xfe15869d, 0xfe141464, 0xfe128ed3,
+	0xfe10f565, 0xfe0f478c, 0xfe0d84b1, 0xfe0bac36, 0xfe09bd73,
+	0xfe07b7b5, 0xfe059a40, 0xfe03644c, 0xfe011504, 0xfdfeab88,
+	0xfdfc26e9, 0xfdf98629, 0xfdf6c83b, 0xfdf3ec01, 0xfdf0f04a,
+	0xfdedd3d1, 0xfdea953d, 0xfde7331e, 0xfde3abe9, 0xfddffdfb,
+	0xfddc2791, 0xfdd826cd, 0xfdd3f9a8, 0xfdcf9dfc, 0xfdcb1176,
+	0xfdc65198, 0xfdc15bb3, 0xfdbc2ce2, 0xfdb6c206, 0xfdb117be,
+	0xfdab2a63, 0xfda4f5fd, 0xfd9e7640, 0xfd97a67a, 0xfd908192,
+	0xfd8901f2, 0xfd812182, 0xfd78d98e, 0xfd7022bb, 0xfd66f4ed,
+	0xfd5d4732, 0xfd530f9c, 0xfd48432b, 0xfd3cd59a, 0xfd30b936,
+	0xfd23dea4, 0xfd16349e, 0xfd07a7a3, 0xfcf8219b, 0xfce7895b,
+	0xfcd5c220, 0xfcc2aadb, 0xfcae1d5e, 0xfc97ed4e, 0xfc7fe6d4,
+	0xfc65ccf3, 0xfc495762, 0xfc2a2fc8, 0xfc07ee19, 0xfbe213c1,
+	0xfbb8051a, 0xfb890078, 0xfb5411a5, 0xfb180005, 0xfad33482,
+	0xfa839276, 0xfa263b32, 0xf9b72d1c, 0xf930a1a2, 0xf889f023,
+	0xf7b577d2, 0xf69c650c, 0xf51530f0, 0xf2cb0e3c, 0xeeefb15d,
+	0xe6da6ecf,
+}
+var we = [256]float32{
+	2.0249555e-09, 1.486674e-11, 2.4409617e-11, 3.1968806e-11,
+	3.844677e-11, 4.4228204e-11, 4.9516443e-11, 5.443359e-11,
+	5.905944e-11, 6.344942e-11, 6.7643814e-11, 7.1672945e-11,
+	7.556032e-11, 7.932458e-11, 8.298079e-11, 8.654132e-11,
+	9.0016515e-11, 9.3415074e-11, 9.674443e-11, 1.0001099e-10,
+	1.03220314e-10, 1.06377254e-10, 1.09486115e-10, 1.1255068e-10,
+	1.1557435e-10, 1.1856015e-10, 1.2151083e-10, 1.2442886e-10,
+	1.2731648e-10, 1.3017575e-10, 1.3300853e-10, 1.3581657e-10,
+	1.3860142e-10, 1.4136457e-10, 1.4410738e-10, 1.4683108e-10,
+	1.4953687e-10, 1.5222583e-10, 1.54899e-10, 1.5755733e-10,
+	1.6020171e-10, 1.6283301e-10, 1.6545203e-10, 1.6805951e-10,
+	1.7065617e-10, 1.732427e-10, 1.7581973e-10, 1.7838787e-10,
+	1.8094774e-10, 1.8349985e-10, 1.8604476e-10, 1.8858298e-10,
+	1.9111498e-10, 1.9364126e-10, 1.9616223e-10, 1.9867835e-10,
+	2.0119004e-10, 2.0369768e-10, 2.0620168e-10, 2.087024e-10,
+	2.1120022e-10, 2.136955e-10, 2.1618855e-10, 2.1867974e-10,
+	2.2116936e-10, 2.2365775e-10, 2.261452e-10, 2.2863202e-10,
+	2.311185e-10, 2.3360494e-10, 2.360916e-10, 2.3857874e-10,
+	2.4106667e-10, 2.4355562e-10, 2.4604588e-10, 2.485377e-10,
+	2.5103128e-10, 2.5352695e-10, 2.560249e-10, 2.585254e-10,
+	2.6102867e-10, 2.6353494e-10, 2.6604446e-10, 2.6855745e-10,
+	2.7107416e-10, 2.7359479e-10, 2.761196e-10, 2.7864877e-10,
+	2.8118255e-10, 2.8372119e-10, 2.8626485e-10, 2.888138e-10,
+	2.9136826e-10, 2.939284e-10, 2.9649452e-10, 2.9906677e-10,
+	3.016454e-10, 3.0423064e-10, 3.0682268e-10, 3.0942177e-10,
+	3.1202813e-10, 3.1464195e-10, 3.1726352e-10, 3.19893e-10,
+	3.2253064e-10, 3.251767e-10, 3.2783135e-10, 3.3049485e-10,
+	3.3316744e-10, 3.3584938e-10, 3.3854083e-10, 3.4124212e-10,
+	3.4395342e-10, 3.46675e-10, 3.4940711e-10, 3.5215003e-10,
+	3.5490397e-10, 3.5766917e-10, 3.6044595e-10, 3.6323455e-10,
+	3.660352e-10, 3.6884823e-10, 3.7167386e-10, 3.745124e-10,
+	3.773641e-10, 3.802293e-10, 3.8310827e-10, 3.860013e-10,
+	3.8890866e-10, 3.918307e-10, 3.9476775e-10, 3.9772008e-10,
+	4.0068804e-10, 4.0367196e-10, 4.0667217e-10, 4.09689e-10,
+	4.1272286e-10, 4.1577405e-10, 4.1884296e-10, 4.2192994e-10,
+	4.250354e-10, 4.281597e-10, 4.313033e-10, 4.3446652e-10,
+	4.3764986e-10, 4.408537e-10, 4.4407847e-10, 4.4732465e-10,
+	4.5059267e-10, 4.5388301e-10, 4.571962e-10, 4.6053267e-10,
+	4.6389292e-10, 4.6727755e-10, 4.70687e-10, 4.741219e-10,
+	4.7758275e-10, 4.810702e-10, 4.845848e-10, 4.8812715e-10,
+	4.9169796e-10, 4.9529775e-10, 4.989273e-10, 5.0258725e-10,
+	5.0627835e-10, 5.100013e-10, 5.1375687e-10, 5.1754584e-10,
+	5.21369e-10, 5.2522725e-10, 5.2912136e-10, 5.330522e-10,
+	5.370208e-10, 5.4102806e-10, 5.45075e-10, 5.491625e-10,
+	5.532918e-10, 5.5746385e-10, 5.616799e-10, 5.6594107e-10,
+	5.7024857e-10, 5.746037e-10, 5.7900773e-10, 5.834621e-10,
+	5.8796823e-10, 5.925276e-10, 5.971417e-10, 6.018122e-10,
+	6.065408e-10, 6.113292e-10, 6.1617933e-10, 6.2109295e-10,
+	6.260722e-10, 6.3111916e-10, 6.3623595e-10, 6.4142497e-10,
+	6.4668854e-10, 6.5202926e-10, 6.5744976e-10, 6.6295286e-10,
+	6.6854156e-10, 6.742188e-10, 6.79988e-10, 6.858526e-10,
+	6.9181616e-10, 6.978826e-10, 7.04056e-10, 7.103407e-10,
+	7.167412e-10, 7.2326256e-10, 7.2990985e-10, 7.366886e-10,
+	7.4360473e-10, 7.5066453e-10, 7.5787476e-10, 7.6524265e-10,
+	7.7277595e-10, 7.80483e-10, 7.883728e-10, 7.9645507e-10,
+	8.047402e-10, 8.1323964e-10, 8.219657e-10, 8.309319e-10,
+	8.401528e-10, 8.496445e-10, 8.594247e-10, 8.6951274e-10,
+	8.799301e-10, 8.9070046e-10, 9.018503e-10, 9.134092e-10,
+	9.254101e-10, 9.378904e-10, 9.508923e-10, 9.644638e-10,
+	9.786603e-10, 9.935448e-10, 1.0091913e-09, 1.025686e-09,
+	1.0431306e-09, 1.0616465e-09, 1.08138e-09, 1.1025096e-09,
+	1.1252564e-09, 1.1498986e-09, 1.1767932e-09, 1.206409e-09,
+	1.2393786e-09, 1.276585e-09, 1.3193139e-09, 1.3695435e-09,
+	1.4305498e-09, 1.508365e-09, 1.6160854e-09, 1.7921248e-09,
+}
+var fe = [256]float32{
+	1, 0.9381437, 0.90046996, 0.87170434, 0.8477855, 0.8269933,
+	0.8084217, 0.7915276, 0.77595687, 0.7614634, 0.7478686,
+	0.7350381, 0.72286767, 0.71127474, 0.70019263, 0.6895665,
+	0.67935055, 0.6695063, 0.66000086, 0.65080583, 0.6418967,
+	0.63325197, 0.6248527, 0.6166822, 0.60872537, 0.60096896,
+	0.5934009, 0.58601034, 0.5787874, 0.57172304, 0.5648092,
+	0.5580383, 0.5514034, 0.5448982, 0.5385169, 0.53225386,
+	0.5261042, 0.52006316, 0.5141264, 0.50828975, 0.5025495,
+	0.496902, 0.49134386, 0.485872, 0.48048335, 0.4751752,
+	0.46994483, 0.46478975, 0.45970762, 0.45469615, 0.44975325,
+	0.44487688, 0.44006512, 0.43531612, 0.43062815, 0.42599955,
+	0.42142874, 0.4169142, 0.41245446, 0.40804818, 0.403694,
+	0.3993907, 0.39513698, 0.39093173, 0.38677382, 0.38266218,
+	0.37859577, 0.37457356, 0.37059465, 0.3666581, 0.362763,
+	0.35890847, 0.35509375, 0.351318, 0.3475805, 0.34388044,
+	0.34021714, 0.3365899, 0.33299807, 0.32944095, 0.32591796,
+	0.3224285, 0.3189719, 0.31554767, 0.31215525, 0.30879408,
+	0.3054636, 0.3021634, 0.29889292, 0.2956517, 0.29243928,
+	0.28925523, 0.28609908, 0.28297043, 0.27986884, 0.27679393,
+	0.2737453, 0.2707226, 0.2677254, 0.26475343, 0.26180625,
+	0.25888354, 0.25598502, 0.2531103, 0.25025907, 0.24743107,
+	0.24462597, 0.24184346, 0.23908329, 0.23634516, 0.23362878,
+	0.23093392, 0.2282603, 0.22560766, 0.22297576, 0.22036438,
+	0.21777324, 0.21520215, 0.21265087, 0.21011916, 0.20760682,
+	0.20511365, 0.20263945, 0.20018397, 0.19774707, 0.19532852,
+	0.19292815, 0.19054577, 0.1881812, 0.18583426, 0.18350479,
+	0.1811926, 0.17889754, 0.17661946, 0.17435817, 0.17211354,
+	0.1698854, 0.16767362, 0.16547804, 0.16329853, 0.16113494,
+	0.15898713, 0.15685499, 0.15473837, 0.15263714, 0.15055119,
+	0.14848037, 0.14642459, 0.14438373, 0.14235765, 0.14034624,
+	0.13834943, 0.13636707, 0.13439907, 0.13244532, 0.13050574,
+	0.1285802, 0.12666863, 0.12477092, 0.12288698, 0.12101672,
+	0.119160056, 0.1173169, 0.115487166, 0.11367077, 0.11186763,
+	0.11007768, 0.10830083, 0.10653701, 0.10478614, 0.10304816,
+	0.101323, 0.09961058, 0.09791085, 0.09622374, 0.09454919,
+	0.09288713, 0.091237515, 0.08960028, 0.087975375, 0.08636274,
+	0.08476233, 0.083174095, 0.081597984, 0.08003395, 0.07848195,
+	0.076941945, 0.07541389, 0.07389775, 0.072393484, 0.07090106,
+	0.069420435, 0.06795159, 0.066494495, 0.06504912, 0.063615434,
+	0.062193416, 0.060783047, 0.059384305, 0.057997175,
+	0.05662164, 0.05525769, 0.053905312, 0.052564494, 0.051235236,
+	0.049917534, 0.048611384, 0.047316793, 0.046033762, 0.0447623,
+	0.043502413, 0.042254124, 0.041017443, 0.039792392,
+	0.038578995, 0.037377283, 0.036187284, 0.035009038,
+	0.033842582, 0.032687962, 0.031545233, 0.030414443, 0.02929566,
+	0.02818895, 0.027094385, 0.026012046, 0.024942026, 0.023884421,
+	0.022839336, 0.021806888, 0.020787204, 0.019780423, 0.0187867,
+	0.0178062, 0.016839107, 0.015885621, 0.014945968, 0.014020392,
+	0.013109165, 0.012212592, 0.011331013, 0.01046481, 0.009614414,
+	0.008780315, 0.007963077, 0.0071633533, 0.006381906,
+	0.0056196423, 0.0048776558, 0.004157295, 0.0034602648,
+	0.0027887989, 0.0021459677, 0.0015362998, 0.0009672693,
+	0.00045413437,
+}
diff --git a/src/math/rand/export_test.go b/src/math/rand/export_test.go
new file mode 100644
index 0000000..560010b
--- /dev/null
+++ b/src/math/rand/export_test.go
@@ -0,0 +1,17 @@
+// Copyright 2021 The Go Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+package rand
+
+func Int31nForTest(r *Rand, n int32) int32 {
+	return r.int31n(n)
+}
+
+func GetNormalDistributionParameters() (float64, [128]uint32, [128]float32, [128]float32) {
+	return rn, kn, wn, fn
+}
+
+func GetExponentialDistributionParameters() (float64, [256]uint32, [256]float32, [256]float32) {
+	return re, ke, we, fe
+}
diff --git a/src/math/rand/gen_cooked.go b/src/math/rand/gen_cooked.go
new file mode 100644
index 0000000..782bb66
--- /dev/null
+++ b/src/math/rand/gen_cooked.go
@@ -0,0 +1,89 @@
+// Copyright 2009 The Go Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+//go:build ignore
+
+// This program computes the value of rngCooked in rng.go,
+// which is used for seeding all instances of rand.Source.
+// a 64bit and a 63bit version of the array is printed to
+// the standard output.
+
+package main
+
+import "fmt"
+
+const (
+	length = 607
+	tap    = 273
+	mask   = (1 << 63) - 1
+	a      = 48271
+	m      = (1 << 31) - 1
+	q      = 44488
+	r      = 3399
+)
+
+var (
+	rngVec          [length]int64
+	rngTap, rngFeed int
+)
+
+func seedrand(x int32) int32 {
+	hi := x / q
+	lo := x % q
+	x = a*lo - r*hi
+	if x < 0 {
+		x += m
+	}
+	return x
+}
+
+func srand(seed int32) {
+	rngTap = 0
+	rngFeed = length - tap
+	seed %= m
+	if seed < 0 {
+		seed += m
+	} else if seed == 0 {
+		seed = 89482311
+	}
+	x := seed
+	for i := -20; i < length; i++ {
+		x = seedrand(x)
+		if i >= 0 {
+			var u int64
+			u = int64(x) << 20
+			x = seedrand(x)
+			u ^= int64(x) << 10
+			x = seedrand(x)
+			u ^= int64(x)
+			rngVec[i] = u
+		}
+	}
+}
+
+func vrand() int64 {
+	rngTap--
+	if rngTap < 0 {
+		rngTap += length
+	}
+	rngFeed--
+	if rngFeed < 0 {
+		rngFeed += length
+	}
+	x := (rngVec[rngFeed] + rngVec[rngTap])
+	rngVec[rngFeed] = x
+	return x
+}
+
+func main() {
+	srand(1)
+	for i := uint64(0); i < 7.8e12; i++ {
+		vrand()
+	}
+	fmt.Printf("rngVec after 7.8e12 calls to vrand:\n%#v\n", rngVec)
+	for i := range rngVec {
+		rngVec[i] &= mask
+	}
+	fmt.Printf("lower 63bit of rngVec after 7.8e12 calls to vrand:\n%#v\n", rngVec)
+}
diff --git a/src/math/rand/normal.go b/src/math/rand/normal.go
new file mode 100644
index 0000000..6654479
--- /dev/null
+++ b/src/math/rand/normal.go
@@ -0,0 +1,156 @@
+// Copyright 2009 The Go Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+package rand
+
+import (
+	"math"
+)
+
+/*
+ * Normal distribution
+ *
+ * See "The Ziggurat Method for Generating Random Variables"
+ * (Marsaglia & Tsang, 2000)
+ * http://www.jstatsoft.org/v05/i08/paper [pdf]
+ */
+
+const (
+	rn = 3.442619855899
+)
+
+func absInt32(i int32) uint32 {
+	if i < 0 {
+		return uint32(-i)
+	}
+	return uint32(i)
+}
+
+// NormFloat64 returns a normally distributed float64 in
+// the range -math.MaxFloat64 through +math.MaxFloat64 inclusive,
+// with standard normal distribution (mean = 0, stddev = 1).
+// To produce a different normal distribution, callers can
+// adjust the output using:
+//
+//	sample = NormFloat64() * desiredStdDev + desiredMean
+func (r *Rand) NormFloat64() float64 {
+	for {
+		j := int32(r.Uint32()) // Possibly negative
+		i := j & 0x7F
+		x := float64(j) * float64(wn[i])
+		if absInt32(j) < kn[i] {
+			// This case should be hit better than 99% of the time.
+			return x
+		}
+
+		if i == 0 {
+			// This extra work is only required for the base strip.
+			for {
+				x = -math.Log(r.Float64()) * (1.0 / rn)
+				y := -math.Log(r.Float64())
+				if y+y >= x*x {
+					break
+				}
+			}
+			if j > 0 {
+				return rn + x
+			}
+			return -rn - x
+		}
+		if fn[i]+float32(r.Float64())*(fn[i-1]-fn[i]) < float32(math.Exp(-.5*x*x)) {
+			return x
+		}
+	}
+}
+
+var kn = [128]uint32{
+	0x76ad2212, 0x0, 0x600f1b53, 0x6ce447a6, 0x725b46a2,
+	0x7560051d, 0x774921eb, 0x789a25bd, 0x799045c3, 0x7a4bce5d,
+	0x7adf629f, 0x7b5682a6, 0x7bb8a8c6, 0x7c0ae722, 0x7c50cce7,
+	0x7c8cec5b, 0x7cc12cd6, 0x7ceefed2, 0x7d177e0b, 0x7d3b8883,
+	0x7d5bce6c, 0x7d78dd64, 0x7d932886, 0x7dab0e57, 0x7dc0dd30,
+	0x7dd4d688, 0x7de73185, 0x7df81cea, 0x7e07c0a3, 0x7e163efa,
+	0x7e23b587, 0x7e303dfd, 0x7e3beec2, 0x7e46db77, 0x7e51155d,
+	0x7e5aabb3, 0x7e63abf7, 0x7e6c222c, 0x7e741906, 0x7e7b9a18,
+	0x7e82adfa, 0x7e895c63, 0x7e8fac4b, 0x7e95a3fb, 0x7e9b4924,
+	0x7ea0a0ef, 0x7ea5b00d, 0x7eaa7ac3, 0x7eaf04f3, 0x7eb3522a,
+	0x7eb765a5, 0x7ebb4259, 0x7ebeeafd, 0x7ec2620a, 0x7ec5a9c4,
+	0x7ec8c441, 0x7ecbb365, 0x7ece78ed, 0x7ed11671, 0x7ed38d62,
+	0x7ed5df12, 0x7ed80cb4, 0x7eda175c, 0x7edc0005, 0x7eddc78e,
+	0x7edf6ebf, 0x7ee0f647, 0x7ee25ebe, 0x7ee3a8a9, 0x7ee4d473,
+	0x7ee5e276, 0x7ee6d2f5, 0x7ee7a620, 0x7ee85c10, 0x7ee8f4cd,
+	0x7ee97047, 0x7ee9ce59, 0x7eea0eca, 0x7eea3147, 0x7eea3568,
+	0x7eea1aab, 0x7ee9e071, 0x7ee98602, 0x7ee90a88, 0x7ee86d08,
+	0x7ee7ac6a, 0x7ee6c769, 0x7ee5bc9c, 0x7ee48a67, 0x7ee32efc,
+	0x7ee1a857, 0x7edff42f, 0x7ede0ffa, 0x7edbf8d9, 0x7ed9ab94,
+	0x7ed7248d, 0x7ed45fae, 0x7ed1585c, 0x7ece095f, 0x7eca6ccb,
+	0x7ec67be2, 0x7ec22eee, 0x7ebd7d1a, 0x7eb85c35, 0x7eb2c075,
+	0x7eac9c20, 0x7ea5df27, 0x7e9e769f, 0x7e964c16, 0x7e8d44ba,
+	0x7e834033, 0x7e781728, 0x7e6b9933, 0x7e5d8a1a, 0x7e4d9ded,
+	0x7e3b737a, 0x7e268c2f, 0x7e0e3ff5, 0x7df1aa5d, 0x7dcf8c72,
+	0x7da61a1e, 0x7d72a0fb, 0x7d30e097, 0x7cd9b4ab, 0x7c600f1a,
+	0x7ba90bdc, 0x7a722176, 0x77d664e5,
+}
+var wn = [128]float32{
+	1.7290405e-09, 1.2680929e-10, 1.6897518e-10, 1.9862688e-10,
+	2.2232431e-10, 2.4244937e-10, 2.601613e-10, 2.7611988e-10,
+	2.9073963e-10, 3.042997e-10, 3.1699796e-10, 3.289802e-10,
+	3.4035738e-10, 3.5121603e-10, 3.616251e-10, 3.7164058e-10,
+	3.8130857e-10, 3.9066758e-10, 3.9975012e-10, 4.08584e-10,
+	4.1719309e-10, 4.2559822e-10, 4.338176e-10, 4.418672e-10,
+	4.497613e-10, 4.5751258e-10, 4.651324e-10, 4.7263105e-10,
+	4.8001775e-10, 4.87301e-10, 4.944885e-10, 5.015873e-10,
+	5.0860405e-10, 5.155446e-10, 5.2241467e-10, 5.2921934e-10,
+	5.359635e-10, 5.426517e-10, 5.4928817e-10, 5.5587696e-10,
+	5.624219e-10, 5.6892646e-10, 5.753941e-10, 5.818282e-10,
+	5.882317e-10, 5.946077e-10, 6.00959e-10, 6.072884e-10,
+	6.135985e-10, 6.19892e-10, 6.2617134e-10, 6.3243905e-10,
+	6.386974e-10, 6.449488e-10, 6.511956e-10, 6.5744005e-10,
+	6.6368433e-10, 6.699307e-10, 6.7618144e-10, 6.824387e-10,
+	6.8870465e-10, 6.949815e-10, 7.012715e-10, 7.075768e-10,
+	7.1389966e-10, 7.202424e-10, 7.266073e-10, 7.329966e-10,
+	7.394128e-10, 7.4585826e-10, 7.5233547e-10, 7.58847e-10,
+	7.653954e-10, 7.719835e-10, 7.7861395e-10, 7.852897e-10,
+	7.920138e-10, 7.987892e-10, 8.0561924e-10, 8.125073e-10,
+	8.194569e-10, 8.2647167e-10, 8.3355556e-10, 8.407127e-10,
+	8.479473e-10, 8.55264e-10, 8.6266755e-10, 8.7016316e-10,
+	8.777562e-10, 8.8545243e-10, 8.932582e-10, 9.0117996e-10,
+	9.09225e-10, 9.174008e-10, 9.2571584e-10, 9.341788e-10,
+	9.427997e-10, 9.515889e-10, 9.605579e-10, 9.697193e-10,
+	9.790869e-10, 9.88676e-10, 9.985036e-10, 1.0085882e-09,
+	1.0189509e-09, 1.0296151e-09, 1.0406069e-09, 1.0519566e-09,
+	1.063698e-09, 1.0758702e-09, 1.0885183e-09, 1.1016947e-09,
+	1.1154611e-09, 1.1298902e-09, 1.1450696e-09, 1.1611052e-09,
+	1.1781276e-09, 1.1962995e-09, 1.2158287e-09, 1.2369856e-09,
+	1.2601323e-09, 1.2857697e-09, 1.3146202e-09, 1.347784e-09,
+	1.3870636e-09, 1.4357403e-09, 1.5008659e-09, 1.6030948e-09,
+}
+var fn = [128]float32{
+	1, 0.9635997, 0.9362827, 0.9130436, 0.89228165, 0.87324303,
+	0.8555006, 0.8387836, 0.8229072, 0.8077383, 0.793177,
+	0.7791461, 0.7655842, 0.7524416, 0.73967725, 0.7272569,
+	0.7151515, 0.7033361, 0.69178915, 0.68049186, 0.6694277,
+	0.658582, 0.6479418, 0.63749546, 0.6272325, 0.6171434,
+	0.6072195, 0.5974532, 0.58783704, 0.5783647, 0.56903,
+	0.5598274, 0.5507518, 0.54179835, 0.5329627, 0.52424055,
+	0.5156282, 0.50712204, 0.49871865, 0.49041483, 0.48220766,
+	0.4740943, 0.46607214, 0.4581387, 0.45029163, 0.44252872,
+	0.43484783, 0.427247, 0.41972435, 0.41227803, 0.40490642,
+	0.39760786, 0.3903808, 0.3832238, 0.37613547, 0.36911446,
+	0.3621595, 0.35526937, 0.34844297, 0.34167916, 0.33497685,
+	0.3283351, 0.3217529, 0.3152294, 0.30876362, 0.30235484,
+	0.29600215, 0.28970486, 0.2834622, 0.2772735, 0.27113807,
+	0.2650553, 0.25902456, 0.2530453, 0.24711695, 0.241239,
+	0.23541094, 0.22963232, 0.2239027, 0.21822165, 0.21258877,
+	0.20700371, 0.20146611, 0.19597565, 0.19053204, 0.18513499,
+	0.17978427, 0.17447963, 0.1692209, 0.16400786, 0.15884037,
+	0.15371831, 0.14864157, 0.14361008, 0.13862377, 0.13368265,
+	0.12878671, 0.12393598, 0.119130544, 0.11437051, 0.10965602,
+	0.104987256, 0.10036444, 0.095787846, 0.0912578, 0.08677467,
+	0.0823389, 0.077950984, 0.073611505, 0.06932112, 0.06508058,
+	0.06089077, 0.056752663, 0.0526674, 0.048636295, 0.044660863,
+	0.040742867, 0.03688439, 0.033087887, 0.029356318,
+	0.025693292, 0.022103304, 0.018592102, 0.015167298,
+	0.011839478, 0.008624485, 0.005548995, 0.0026696292,
+}
diff --git a/src/math/rand/race_test.go b/src/math/rand/race_test.go
new file mode 100644
index 0000000..e7d1036
--- /dev/null
+++ b/src/math/rand/race_test.go
@@ -0,0 +1,49 @@
+// Copyright 2016 The Go Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+package rand_test
+
+import (
+	. "math/rand"
+	"sync"
+	"testing"
+)
+
+// TestConcurrent exercises the rand API concurrently, triggering situations
+// where the race detector is likely to detect issues.
+func TestConcurrent(t *testing.T) {
+	const (
+		numRoutines = 10
+		numCycles   = 10
+	)
+	var wg sync.WaitGroup
+	defer wg.Wait()
+	wg.Add(numRoutines)
+	for i := 0; i < numRoutines; i++ {
+		go func(i int) {
+			defer wg.Done()
+			buf := make([]byte, 997)
+			for j := 0; j < numCycles; j++ {
+				var seed int64
+				seed += int64(ExpFloat64())
+				seed += int64(Float32())
+				seed += int64(Float64())
+				seed += int64(Intn(Int()))
+				seed += int64(Int31n(Int31()))
+				seed += int64(Int63n(Int63()))
+				seed += int64(NormFloat64())
+				seed += int64(Uint32())
+				seed += int64(Uint64())
+				for _, p := range Perm(10) {
+					seed += int64(p)
+				}
+				Read(buf)
+				for _, b := range buf {
+					seed += int64(b)
+				}
+				Seed(int64(i*j) * seed)
+			}
+		}(i)
+	}
+}
diff --git a/src/math/rand/rand.go b/src/math/rand/rand.go
new file mode 100644
index 0000000..cc1f95c
--- /dev/null
+++ b/src/math/rand/rand.go
@@ -0,0 +1,547 @@
+// Copyright 2009 The Go Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+// Package rand implements pseudo-random number generators suitable for tasks
+// such as simulation, but it should not be used for security-sensitive work.
+//
+// Random numbers are generated by a [Source], usually wrapped in a [Rand].
+// Both types should be used by a single goroutine at a time: sharing among
+// multiple goroutines requires some kind of synchronization.
+//
+// Top-level functions, such as [Float64] and [Int],
+// are safe for concurrent use by multiple goroutines.
+//
+// This package's outputs might be easily predictable regardless of how it's
+// seeded. For random numbers suitable for security-sensitive work, see the
+// crypto/rand package.
+package rand
+
+import (
+	"internal/godebug"
+	"sync"
+	"sync/atomic"
+	_ "unsafe" // for go:linkname
+)
+
+// A Source represents a source of uniformly-distributed
+// pseudo-random int64 values in the range [0, 1<<63).
+//
+// A Source is not safe for concurrent use by multiple goroutines.
+type Source interface {
+	Int63() int64
+	Seed(seed int64)
+}
+
+// A Source64 is a Source that can also generate
+// uniformly-distributed pseudo-random uint64 values in
+// the range [0, 1<<64) directly.
+// If a Rand r's underlying Source s implements Source64,
+// then r.Uint64 returns the result of one call to s.Uint64
+// instead of making two calls to s.Int63.
+type Source64 interface {
+	Source
+	Uint64() uint64
+}
+
+// NewSource returns a new pseudo-random Source seeded with the given value.
+// Unlike the default Source used by top-level functions, this source is not
+// safe for concurrent use by multiple goroutines.
+// The returned Source implements Source64.
+func NewSource(seed int64) Source {
+	return newSource(seed)
+}
+
+func newSource(seed int64) *rngSource {
+	var rng rngSource
+	rng.Seed(seed)
+	return &rng
+}
+
+// A Rand is a source of random numbers.
+type Rand struct {
+	src Source
+	s64 Source64 // non-nil if src is source64
+
+	// readVal contains remainder of 63-bit integer used for bytes
+	// generation during most recent Read call.
+	// It is saved so next Read call can start where the previous
+	// one finished.
+	readVal int64
+	// readPos indicates the number of low-order bytes of readVal
+	// that are still valid.
+	readPos int8
+}
+
+// New returns a new Rand that uses random values from src
+// to generate other random values.
+func New(src Source) *Rand {
+	s64, _ := src.(Source64)
+	return &Rand{src: src, s64: s64}
+}
+
+// Seed uses the provided seed value to initialize the generator to a deterministic state.
+// Seed should not be called concurrently with any other Rand method.
+func (r *Rand) Seed(seed int64) {
+	if lk, ok := r.src.(*lockedSource); ok {
+		lk.seedPos(seed, &r.readPos)
+		return
+	}
+
+	r.src.Seed(seed)
+	r.readPos = 0
+}
+
+// Int63 returns a non-negative pseudo-random 63-bit integer as an int64.
+func (r *Rand) Int63() int64 { return r.src.Int63() }
+
+// Uint32 returns a pseudo-random 32-bit value as a uint32.
+func (r *Rand) Uint32() uint32 { return uint32(r.Int63() >> 31) }
+
+// Uint64 returns a pseudo-random 64-bit value as a uint64.
+func (r *Rand) Uint64() uint64 {
+	if r.s64 != nil {
+		return r.s64.Uint64()
+	}
+	return uint64(r.Int63())>>31 | uint64(r.Int63())<<32
+}
+
+// Int31 returns a non-negative pseudo-random 31-bit integer as an int32.
+func (r *Rand) Int31() int32 { return int32(r.Int63() >> 32) }
+
+// Int returns a non-negative pseudo-random int.
+func (r *Rand) Int() int {
+	u := uint(r.Int63())
+	return int(u << 1 >> 1) // clear sign bit if int == int32
+}
+
+// Int63n returns, as an int64, a non-negative pseudo-random number in the half-open interval [0,n).
+// It panics if n <= 0.
+func (r *Rand) Int63n(n int64) int64 {
+	if n <= 0 {
+		panic("invalid argument to Int63n")
+	}
+	if n&(n-1) == 0 { // n is power of two, can mask
+		return r.Int63() & (n - 1)
+	}
+	max := int64((1 << 63) - 1 - (1<<63)%uint64(n))
+	v := r.Int63()
+	for v > max {
+		v = r.Int63()
+	}
+	return v % n
+}
+
+// Int31n returns, as an int32, a non-negative pseudo-random number in the half-open interval [0,n).
+// It panics if n <= 0.
+func (r *Rand) Int31n(n int32) int32 {
+	if n <= 0 {
+		panic("invalid argument to Int31n")
+	}
+	if n&(n-1) == 0 { // n is power of two, can mask
+		return r.Int31() & (n - 1)
+	}
+	max := int32((1 << 31) - 1 - (1<<31)%uint32(n))
+	v := r.Int31()
+	for v > max {
+		v = r.Int31()
+	}
+	return v % n
+}
+
+// int31n returns, as an int32, a non-negative pseudo-random number in the half-open interval [0,n).
+// n must be > 0, but int31n does not check this; the caller must ensure it.
+// int31n exists because Int31n is inefficient, but Go 1 compatibility
+// requires that the stream of values produced by math/rand remain unchanged.
+// int31n can thus only be used internally, by newly introduced APIs.
+//
+// For implementation details, see:
+// https://lemire.me/blog/2016/06/27/a-fast-alternative-to-the-modulo-reduction
+// https://lemire.me/blog/2016/06/30/fast-random-shuffling
+func (r *Rand) int31n(n int32) int32 {
+	v := r.Uint32()
+	prod := uint64(v) * uint64(n)
+	low := uint32(prod)
+	if low < uint32(n) {
+		thresh := uint32(-n) % uint32(n)
+		for low < thresh {
+			v = r.Uint32()
+			prod = uint64(v) * uint64(n)
+			low = uint32(prod)
+		}
+	}
+	return int32(prod >> 32)
+}
+
+// Intn returns, as an int, a non-negative pseudo-random number in the half-open interval [0,n).
+// It panics if n <= 0.
+func (r *Rand) Intn(n int) int {
+	if n <= 0 {
+		panic("invalid argument to Intn")
+	}
+	if n <= 1<<31-1 {
+		return int(r.Int31n(int32(n)))
+	}
+	return int(r.Int63n(int64(n)))
+}
+
+// Float64 returns, as a float64, a pseudo-random number in the half-open interval [0.0,1.0).
+func (r *Rand) Float64() float64 {
+	// A clearer, simpler implementation would be:
+	//	return float64(r.Int63n(1<<53)) / (1<<53)
+	// However, Go 1 shipped with
+	//	return float64(r.Int63()) / (1 << 63)
+	// and we want to preserve that value stream.
+	//
+	// There is one bug in the value stream: r.Int63() may be so close
+	// to 1<<63 that the division rounds up to 1.0, and we've guaranteed
+	// that the result is always less than 1.0.
+	//
+	// We tried to fix this by mapping 1.0 back to 0.0, but since float64
+	// values near 0 are much denser than near 1, mapping 1 to 0 caused
+	// a theoretically significant overshoot in the probability of returning 0.
+	// Instead of that, if we round up to 1, just try again.
+	// Getting 1 only happens 1/2⁵³ of the time, so most clients
+	// will not observe it anyway.
+again:
+	f := float64(r.Int63()) / (1 << 63)
+	if f == 1 {
+		goto again // resample; this branch is taken O(never)
+	}
+	return f
+}
+
+// Float32 returns, as a float32, a pseudo-random number in the half-open interval [0.0,1.0).
+func (r *Rand) Float32() float32 {
+	// Same rationale as in Float64: we want to preserve the Go 1 value
+	// stream except we want to fix it not to return 1.0
+	// This only happens 1/2²⁴ of the time (plus the 1/2⁵³ of the time in Float64).
+again:
+	f := float32(r.Float64())
+	if f == 1 {
+		goto again // resample; this branch is taken O(very rarely)
+	}
+	return f
+}
+
+// Perm returns, as a slice of n ints, a pseudo-random permutation of the integers
+// in the half-open interval [0,n).
+func (r *Rand) Perm(n int) []int {
+	m := make([]int, n)
+	// In the following loop, the iteration when i=0 always swaps m[0] with m[0].
+	// A change to remove this useless iteration is to assign 1 to i in the init
+	// statement. But Perm also effects r. Making this change will affect
+	// the final state of r. So this change can't be made for compatibility
+	// reasons for Go 1.
+	for i := 0; i < n; i++ {
+		j := r.Intn(i + 1)
+		m[i] = m[j]
+		m[j] = i
+	}
+	return m
+}
+
+// Shuffle pseudo-randomizes the order of elements.
+// n is the number of elements. Shuffle panics if n < 0.
+// swap swaps the elements with indexes i and j.
+func (r *Rand) Shuffle(n int, swap func(i, j int)) {
+	if n < 0 {
+		panic("invalid argument to Shuffle")
+	}
+
+	// Fisher-Yates shuffle: https://en.wikipedia.org/wiki/Fisher%E2%80%93Yates_shuffle
+	// Shuffle really ought not be called with n that doesn't fit in 32 bits.
+	// Not only will it take a very long time, but with 2³¹! possible permutations,
+	// there's no way that any PRNG can have a big enough internal state to
+	// generate even a minuscule percentage of the possible permutations.
+	// Nevertheless, the right API signature accepts an int n, so handle it as best we can.
+	i := n - 1
+	for ; i > 1<<31-1-1; i-- {
+		j := int(r.Int63n(int64(i + 1)))
+		swap(i, j)
+	}
+	for ; i > 0; i-- {
+		j := int(r.int31n(int32(i + 1)))
+		swap(i, j)
+	}
+}
+
+// Read generates len(p) random bytes and writes them into p. It
+// always returns len(p) and a nil error.
+// Read should not be called concurrently with any other Rand method.
+func (r *Rand) Read(p []byte) (n int, err error) {
+	switch src := r.src.(type) {
+	case *lockedSource:
+		return src.read(p, &r.readVal, &r.readPos)
+	case *fastSource:
+		return src.read(p, &r.readVal, &r.readPos)
+	}
+	return read(p, r.src, &r.readVal, &r.readPos)
+}
+
+func read(p []byte, src Source, readVal *int64, readPos *int8) (n int, err error) {
+	pos := *readPos
+	val := *readVal
+	rng, _ := src.(*rngSource)
+	for n = 0; n < len(p); n++ {
+		if pos == 0 {
+			if rng != nil {
+				val = rng.Int63()
+			} else {
+				val = src.Int63()
+			}
+			pos = 7
+		}
+		p[n] = byte(val)
+		val >>= 8
+		pos--
+	}
+	*readPos = pos
+	*readVal = val
+	return
+}
+
+/*
+ * Top-level convenience functions
+ */
+
+// globalRandGenerator is the source of random numbers for the top-level
+// convenience functions. When possible it uses the runtime fastrand64
+// function to avoid locking. This is not possible if the user called Seed,
+// either explicitly or implicitly via GODEBUG=randautoseed=0.
+var globalRandGenerator atomic.Pointer[Rand]
+
+var randautoseed = godebug.New("randautoseed")
+
+// globalRand returns the generator to use for the top-level convenience
+// functions.
+func globalRand() *Rand {
+	if r := globalRandGenerator.Load(); r != nil {
+		return r
+	}
+
+	// This is the first call. Initialize based on GODEBUG.
+	var r *Rand
+	if randautoseed.Value() == "0" {
+		randautoseed.IncNonDefault()
+		r = New(new(lockedSource))
+		r.Seed(1)
+	} else {
+		r = &Rand{
+			src: &fastSource{},
+			s64: &fastSource{},
+		}
+	}
+
+	if !globalRandGenerator.CompareAndSwap(nil, r) {
+		// Two different goroutines called some top-level
+		// function at the same time. While the results in
+		// that case are unpredictable, if we just use r here,
+		// and we are using a seed, we will most likely return
+		// the same value for both calls. That doesn't seem ideal.
+		// Just use the first one to get in.
+		return globalRandGenerator.Load()
+	}
+
+	return r
+}
+
+//go:linkname fastrand64
+func fastrand64() uint64
+
+// fastSource is an implementation of Source64 that uses the runtime
+// fastrand functions.
+type fastSource struct {
+	// The mutex is used to avoid race conditions in Read.
+	mu sync.Mutex
+}
+
+func (*fastSource) Int63() int64 {
+	return int64(fastrand64() & rngMask)
+}
+
+func (*fastSource) Seed(int64) {
+	panic("internal error: call to fastSource.Seed")
+}
+
+func (*fastSource) Uint64() uint64 {
+	return fastrand64()
+}
+
+func (fs *fastSource) read(p []byte, readVal *int64, readPos *int8) (n int, err error) {
+	fs.mu.Lock()
+	n, err = read(p, fs, readVal, readPos)
+	fs.mu.Unlock()
+	return
+}
+
+// Seed uses the provided seed value to initialize the default Source to a
+// deterministic state. Seed values that have the same remainder when
+// divided by 2³¹-1 generate the same pseudo-random sequence.
+// Seed, unlike the Rand.Seed method, is safe for concurrent use.
+//
+// If Seed is not called, the generator is seeded randomly at program startup.
+//
+// Prior to Go 1.20, the generator was seeded like Seed(1) at program startup.
+// To force the old behavior, call Seed(1) at program startup.
+// Alternately, set GODEBUG=randautoseed=0 in the environment
+// before making any calls to functions in this package.
+//
+// Deprecated: As of Go 1.20 there is no reason to call Seed with
+// a random value. Programs that call Seed with a known value to get
+// a specific sequence of results should use New(NewSource(seed)) to
+// obtain a local random generator.
+func Seed(seed int64) {
+	orig := globalRandGenerator.Load()
+
+	// If we are already using a lockedSource, we can just re-seed it.
+	if orig != nil {
+		if _, ok := orig.src.(*lockedSource); ok {
+			orig.Seed(seed)
+			return
+		}
+	}
+
+	// Otherwise either
+	// 1) orig == nil, which is the normal case when Seed is the first
+	// top-level function to be called, or
+	// 2) orig is already a fastSource, in which case we need to change
+	// to a lockedSource.
+	// Either way we do the same thing.
+
+	r := New(new(lockedSource))
+	r.Seed(seed)
+
+	if !globalRandGenerator.CompareAndSwap(orig, r) {
+		// Something changed underfoot. Retry to be safe.
+		Seed(seed)
+	}
+}
+
+// Int63 returns a non-negative pseudo-random 63-bit integer as an int64
+// from the default Source.
+func Int63() int64 { return globalRand().Int63() }
+
+// Uint32 returns a pseudo-random 32-bit value as a uint32
+// from the default Source.
+func Uint32() uint32 { return globalRand().Uint32() }
+
+// Uint64 returns a pseudo-random 64-bit value as a uint64
+// from the default Source.
+func Uint64() uint64 { return globalRand().Uint64() }
+
+// Int31 returns a non-negative pseudo-random 31-bit integer as an int32
+// from the default Source.
+func Int31() int32 { return globalRand().Int31() }
+
+// Int returns a non-negative pseudo-random int from the default Source.
+func Int() int { return globalRand().Int() }
+
+// Int63n returns, as an int64, a non-negative pseudo-random number in the half-open interval [0,n)
+// from the default Source.
+// It panics if n <= 0.
+func Int63n(n int64) int64 { return globalRand().Int63n(n) }
+
+// Int31n returns, as an int32, a non-negative pseudo-random number in the half-open interval [0,n)
+// from the default Source.
+// It panics if n <= 0.
+func Int31n(n int32) int32 { return globalRand().Int31n(n) }
+
+// Intn returns, as an int, a non-negative pseudo-random number in the half-open interval [0,n)
+// from the default Source.
+// It panics if n <= 0.
+func Intn(n int) int { return globalRand().Intn(n) }
+
+// Float64 returns, as a float64, a pseudo-random number in the half-open interval [0.0,1.0)
+// from the default Source.
+func Float64() float64 { return globalRand().Float64() }
+
+// Float32 returns, as a float32, a pseudo-random number in the half-open interval [0.0,1.0)
+// from the default Source.
+func Float32() float32 { return globalRand().Float32() }
+
+// Perm returns, as a slice of n ints, a pseudo-random permutation of the integers
+// in the half-open interval [0,n) from the default Source.
+func Perm(n int) []int { return globalRand().Perm(n) }
+
+// Shuffle pseudo-randomizes the order of elements using the default Source.
+// n is the number of elements. Shuffle panics if n < 0.
+// swap swaps the elements with indexes i and j.
+func Shuffle(n int, swap func(i, j int)) { globalRand().Shuffle(n, swap) }
+
+// Read generates len(p) random bytes from the default Source and
+// writes them into p. It always returns len(p) and a nil error.
+// Read, unlike the Rand.Read method, is safe for concurrent use.
+//
+// Deprecated: For almost all use cases, crypto/rand.Read is more appropriate.
+func Read(p []byte) (n int, err error) { return globalRand().Read(p) }
+
+// NormFloat64 returns a normally distributed float64 in the range
+// [-math.MaxFloat64, +math.MaxFloat64] with
+// standard normal distribution (mean = 0, stddev = 1)
+// from the default Source.
+// To produce a different normal distribution, callers can
+// adjust the output using:
+//
+//	sample = NormFloat64() * desiredStdDev + desiredMean
+func NormFloat64() float64 { return globalRand().NormFloat64() }
+
+// ExpFloat64 returns an exponentially distributed float64 in the range
+// (0, +math.MaxFloat64] with an exponential distribution whose rate parameter
+// (lambda) is 1 and whose mean is 1/lambda (1) from the default Source.
+// To produce a distribution with a different rate parameter,
+// callers can adjust the output using:
+//
+//	sample = ExpFloat64() / desiredRateParameter
+func ExpFloat64() float64 { return globalRand().ExpFloat64() }
+
+type lockedSource struct {
+	lk sync.Mutex
+	s  *rngSource
+}
+
+func (r *lockedSource) Int63() (n int64) {
+	r.lk.Lock()
+	n = r.s.Int63()
+	r.lk.Unlock()
+	return
+}
+
+func (r *lockedSource) Uint64() (n uint64) {
+	r.lk.Lock()
+	n = r.s.Uint64()
+	r.lk.Unlock()
+	return
+}
+
+func (r *lockedSource) Seed(seed int64) {
+	r.lk.Lock()
+	r.seed(seed)
+	r.lk.Unlock()
+}
+
+// seedPos implements Seed for a lockedSource without a race condition.
+func (r *lockedSource) seedPos(seed int64, readPos *int8) {
+	r.lk.Lock()
+	r.seed(seed)
+	*readPos = 0
+	r.lk.Unlock()
+}
+
+// seed seeds the underlying source.
+// The caller must have locked r.lk.
+func (r *lockedSource) seed(seed int64) {
+	if r.s == nil {
+		r.s = newSource(seed)
+	} else {
+		r.s.Seed(seed)
+	}
+}
+
+// read implements Read for a lockedSource without a race condition.
+func (r *lockedSource) read(p []byte, readVal *int64, readPos *int8) (n int, err error) {
+	r.lk.Lock()
+	n, err = read(p, r.s, readVal, readPos)
+	r.lk.Unlock()
+	return
+}
diff --git a/src/math/rand/rand_test.go b/src/math/rand/rand_test.go
new file mode 100644
index 0000000..7eba1dc
--- /dev/null
+++ b/src/math/rand/rand_test.go
@@ -0,0 +1,701 @@
+// Copyright 2009 The Go Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+package rand_test
+
+import (
+	"bytes"
+	"errors"
+	"fmt"
+	"internal/testenv"
+	"io"
+	"math"
+	. "math/rand"
+	"os"
+	"runtime"
+	"sync"
+	"testing"
+	"testing/iotest"
+)
+
+const (
+	numTestSamples = 10000
+)
+
+var rn, kn, wn, fn = GetNormalDistributionParameters()
+var re, ke, we, fe = GetExponentialDistributionParameters()
+
+type statsResults struct {
+	mean        float64
+	stddev      float64
+	closeEnough float64
+	maxError    float64
+}
+
+func max(a, b float64) float64 {
+	if a > b {
+		return a
+	}
+	return b
+}
+
+func nearEqual(a, b, closeEnough, maxError float64) bool {
+	absDiff := math.Abs(a - b)
+	if absDiff < closeEnough { // Necessary when one value is zero and one value is close to zero.
+		return true
+	}
+	return absDiff/max(math.Abs(a), math.Abs(b)) < maxError
+}
+
+var testSeeds = []int64{1, 1754801282, 1698661970, 1550503961}
+
+// checkSimilarDistribution returns success if the mean and stddev of the
+// two statsResults are similar.
+func (this *statsResults) checkSimilarDistribution(expected *statsResults) error {
+	if !nearEqual(this.mean, expected.mean, expected.closeEnough, expected.maxError) {
+		s := fmt.Sprintf("mean %v != %v (allowed error %v, %v)", this.mean, expected.mean, expected.closeEnough, expected.maxError)
+		fmt.Println(s)
+		return errors.New(s)
+	}
+	if !nearEqual(this.stddev, expected.stddev, expected.closeEnough, expected.maxError) {
+		s := fmt.Sprintf("stddev %v != %v (allowed error %v, %v)", this.stddev, expected.stddev, expected.closeEnough, expected.maxError)
+		fmt.Println(s)
+		return errors.New(s)
+	}
+	return nil
+}
+
+func getStatsResults(samples []float64) *statsResults {
+	res := new(statsResults)
+	var sum, squaresum float64
+	for _, s := range samples {
+		sum += s
+		squaresum += s * s
+	}
+	res.mean = sum / float64(len(samples))
+	res.stddev = math.Sqrt(squaresum/float64(len(samples)) - res.mean*res.mean)
+	return res
+}
+
+func checkSampleDistribution(t *testing.T, samples []float64, expected *statsResults) {
+	t.Helper()
+	actual := getStatsResults(samples)
+	err := actual.checkSimilarDistribution(expected)
+	if err != nil {
+		t.Errorf(err.Error())
+	}
+}
+
+func checkSampleSliceDistributions(t *testing.T, samples []float64, nslices int, expected *statsResults) {
+	t.Helper()
+	chunk := len(samples) / nslices
+	for i := 0; i < nslices; i++ {
+		low := i * chunk
+		var high int
+		if i == nslices-1 {
+			high = len(samples) - 1
+		} else {
+			high = (i + 1) * chunk
+		}
+		checkSampleDistribution(t, samples[low:high], expected)
+	}
+}
+
+//
+// Normal distribution tests
+//
+
+func generateNormalSamples(nsamples int, mean, stddev float64, seed int64) []float64 {
+	r := New(NewSource(seed))
+	samples := make([]float64, nsamples)
+	for i := range samples {
+		samples[i] = r.NormFloat64()*stddev + mean
+	}
+	return samples
+}
+
+func testNormalDistribution(t *testing.T, nsamples int, mean, stddev float64, seed int64) {
+	//fmt.Printf("testing nsamples=%v mean=%v stddev=%v seed=%v\n", nsamples, mean, stddev, seed);
+
+	samples := generateNormalSamples(nsamples, mean, stddev, seed)
+	errorScale := max(1.0, stddev) // Error scales with stddev
+	expected := &statsResults{mean, stddev, 0.10 * errorScale, 0.08 * errorScale}
+
+	// Make sure that the entire set matches the expected distribution.
+	checkSampleDistribution(t, samples, expected)
+
+	// Make sure that each half of the set matches the expected distribution.
+	checkSampleSliceDistributions(t, samples, 2, expected)
+
+	// Make sure that each 7th of the set matches the expected distribution.
+	checkSampleSliceDistributions(t, samples, 7, expected)
+}
+
+// Actual tests
+
+func TestStandardNormalValues(t *testing.T) {
+	for _, seed := range testSeeds {
+		testNormalDistribution(t, numTestSamples, 0, 1, seed)
+	}
+}
+
+func TestNonStandardNormalValues(t *testing.T) {
+	sdmax := 1000.0
+	mmax := 1000.0
+	if testing.Short() {
+		sdmax = 5
+		mmax = 5
+	}
+	for sd := 0.5; sd < sdmax; sd *= 2 {
+		for m := 0.5; m < mmax; m *= 2 {
+			for _, seed := range testSeeds {
+				testNormalDistribution(t, numTestSamples, m, sd, seed)
+				if testing.Short() {
+					break
+				}
+			}
+		}
+	}
+}
+
+//
+// Exponential distribution tests
+//
+
+func generateExponentialSamples(nsamples int, rate float64, seed int64) []float64 {
+	r := New(NewSource(seed))
+	samples := make([]float64, nsamples)
+	for i := range samples {
+		samples[i] = r.ExpFloat64() / rate
+	}
+	return samples
+}
+
+func testExponentialDistribution(t *testing.T, nsamples int, rate float64, seed int64) {
+	//fmt.Printf("testing nsamples=%v rate=%v seed=%v\n", nsamples, rate, seed);
+
+	mean := 1 / rate
+	stddev := mean
+
+	samples := generateExponentialSamples(nsamples, rate, seed)
+	errorScale := max(1.0, 1/rate) // Error scales with the inverse of the rate
+	expected := &statsResults{mean, stddev, 0.10 * errorScale, 0.20 * errorScale}
+
+	// Make sure that the entire set matches the expected distribution.
+	checkSampleDistribution(t, samples, expected)
+
+	// Make sure that each half of the set matches the expected distribution.
+	checkSampleSliceDistributions(t, samples, 2, expected)
+
+	// Make sure that each 7th of the set matches the expected distribution.
+	checkSampleSliceDistributions(t, samples, 7, expected)
+}
+
+// Actual tests
+
+func TestStandardExponentialValues(t *testing.T) {
+	for _, seed := range testSeeds {
+		testExponentialDistribution(t, numTestSamples, 1, seed)
+	}
+}
+
+func TestNonStandardExponentialValues(t *testing.T) {
+	for rate := 0.05; rate < 10; rate *= 2 {
+		for _, seed := range testSeeds {
+			testExponentialDistribution(t, numTestSamples, rate, seed)
+			if testing.Short() {
+				break
+			}
+		}
+	}
+}
+
+//
+// Table generation tests
+//
+
+func initNorm() (testKn []uint32, testWn, testFn []float32) {
+	const m1 = 1 << 31
+	var (
+		dn float64 = rn
+		tn         = dn
+		vn float64 = 9.91256303526217e-3
+	)
+
+	testKn = make([]uint32, 128)
+	testWn = make([]float32, 128)
+	testFn = make([]float32, 128)
+
+	q := vn / math.Exp(-0.5*dn*dn)
+	testKn[0] = uint32((dn / q) * m1)
+	testKn[1] = 0
+	testWn[0] = float32(q / m1)
+	testWn[127] = float32(dn / m1)
+	testFn[0] = 1.0
+	testFn[127] = float32(math.Exp(-0.5 * dn * dn))
+	for i := 126; i >= 1; i-- {
+		dn = math.Sqrt(-2.0 * math.Log(vn/dn+math.Exp(-0.5*dn*dn)))
+		testKn[i+1] = uint32((dn / tn) * m1)
+		tn = dn
+		testFn[i] = float32(math.Exp(-0.5 * dn * dn))
+		testWn[i] = float32(dn / m1)
+	}
+	return
+}
+
+func initExp() (testKe []uint32, testWe, testFe []float32) {
+	const m2 = 1 << 32
+	var (
+		de float64 = re
+		te         = de
+		ve float64 = 3.9496598225815571993e-3
+	)
+
+	testKe = make([]uint32, 256)
+	testWe = make([]float32, 256)
+	testFe = make([]float32, 256)
+
+	q := ve / math.Exp(-de)
+	testKe[0] = uint32((de / q) * m2)
+	testKe[1] = 0
+	testWe[0] = float32(q / m2)
+	testWe[255] = float32(de / m2)
+	testFe[0] = 1.0
+	testFe[255] = float32(math.Exp(-de))
+	for i := 254; i >= 1; i-- {
+		de = -math.Log(ve/de + math.Exp(-de))
+		testKe[i+1] = uint32((de / te) * m2)
+		te = de
+		testFe[i] = float32(math.Exp(-de))
+		testWe[i] = float32(de / m2)
+	}
+	return
+}
+
+// compareUint32Slices returns the first index where the two slices
+// disagree, or <0 if the lengths are the same and all elements
+// are identical.
+func compareUint32Slices(s1, s2 []uint32) int {
+	if len(s1) != len(s2) {
+		if len(s1) > len(s2) {
+			return len(s2) + 1
+		}
+		return len(s1) + 1
+	}
+	for i := range s1 {
+		if s1[i] != s2[i] {
+			return i
+		}
+	}
+	return -1
+}
+
+// compareFloat32Slices returns the first index where the two slices
+// disagree, or <0 if the lengths are the same and all elements
+// are identical.
+func compareFloat32Slices(s1, s2 []float32) int {
+	if len(s1) != len(s2) {
+		if len(s1) > len(s2) {
+			return len(s2) + 1
+		}
+		return len(s1) + 1
+	}
+	for i := range s1 {
+		if !nearEqual(float64(s1[i]), float64(s2[i]), 0, 1e-7) {
+			return i
+		}
+	}
+	return -1
+}
+
+func TestNormTables(t *testing.T) {
+	testKn, testWn, testFn := initNorm()
+	if i := compareUint32Slices(kn[0:], testKn); i >= 0 {
+		t.Errorf("kn disagrees at index %v; %v != %v", i, kn[i], testKn[i])
+	}
+	if i := compareFloat32Slices(wn[0:], testWn); i >= 0 {
+		t.Errorf("wn disagrees at index %v; %v != %v", i, wn[i], testWn[i])
+	}
+	if i := compareFloat32Slices(fn[0:], testFn); i >= 0 {
+		t.Errorf("fn disagrees at index %v; %v != %v", i, fn[i], testFn[i])
+	}
+}
+
+func TestExpTables(t *testing.T) {
+	testKe, testWe, testFe := initExp()
+	if i := compareUint32Slices(ke[0:], testKe); i >= 0 {
+		t.Errorf("ke disagrees at index %v; %v != %v", i, ke[i], testKe[i])
+	}
+	if i := compareFloat32Slices(we[0:], testWe); i >= 0 {
+		t.Errorf("we disagrees at index %v; %v != %v", i, we[i], testWe[i])
+	}
+	if i := compareFloat32Slices(fe[0:], testFe); i >= 0 {
+		t.Errorf("fe disagrees at index %v; %v != %v", i, fe[i], testFe[i])
+	}
+}
+
+func hasSlowFloatingPoint() bool {
+	switch runtime.GOARCH {
+	case "arm":
+		return os.Getenv("GOARM") == "5"
+	case "mips", "mipsle", "mips64", "mips64le":
+		// Be conservative and assume that all mips boards
+		// have emulated floating point.
+		// TODO: detect what it actually has.
+		return true
+	}
+	return false
+}
+
+func TestFloat32(t *testing.T) {
+	// For issue 6721, the problem came after 7533753 calls, so check 10e6.
+	num := int(10e6)
+	// But do the full amount only on builders (not locally).
+	// But ARM5 floating point emulation is slow (Issue 10749), so
+	// do less for that builder:
+	if testing.Short() && (testenv.Builder() == "" || hasSlowFloatingPoint()) {
+		num /= 100 // 1.72 seconds instead of 172 seconds
+	}
+
+	r := New(NewSource(1))
+	for ct := 0; ct < num; ct++ {
+		f := r.Float32()
+		if f >= 1 {
+			t.Fatal("Float32() should be in range [0,1). ct:", ct, "f:", f)
+		}
+	}
+}
+
+func testReadUniformity(t *testing.T, n int, seed int64) {
+	r := New(NewSource(seed))
+	buf := make([]byte, n)
+	nRead, err := r.Read(buf)
+	if err != nil {
+		t.Errorf("Read err %v", err)
+	}
+	if nRead != n {
+		t.Errorf("Read returned unexpected n; %d != %d", nRead, n)
+	}
+
+	// Expect a uniform distribution of byte values, which lie in [0, 255].
+	var (
+		mean       = 255.0 / 2
+		stddev     = 256.0 / math.Sqrt(12.0)
+		errorScale = stddev / math.Sqrt(float64(n))
+	)
+
+	expected := &statsResults{mean, stddev, 0.10 * errorScale, 0.08 * errorScale}
+
+	// Cast bytes as floats to use the common distribution-validity checks.
+	samples := make([]float64, n)
+	for i, val := range buf {
+		samples[i] = float64(val)
+	}
+	// Make sure that the entire set matches the expected distribution.
+	checkSampleDistribution(t, samples, expected)
+}
+
+func TestReadUniformity(t *testing.T) {
+	testBufferSizes := []int{
+		2, 4, 7, 64, 1024, 1 << 16, 1 << 20,
+	}
+	for _, seed := range testSeeds {
+		for _, n := range testBufferSizes {
+			testReadUniformity(t, n, seed)
+		}
+	}
+}
+
+func TestReadEmpty(t *testing.T) {
+	r := New(NewSource(1))
+	buf := make([]byte, 0)
+	n, err := r.Read(buf)
+	if err != nil {
+		t.Errorf("Read err into empty buffer; %v", err)
+	}
+	if n != 0 {
+		t.Errorf("Read into empty buffer returned unexpected n of %d", n)
+	}
+}
+
+func TestReadByOneByte(t *testing.T) {
+	r := New(NewSource(1))
+	b1 := make([]byte, 100)
+	_, err := io.ReadFull(iotest.OneByteReader(r), b1)
+	if err != nil {
+		t.Errorf("read by one byte: %v", err)
+	}
+	r = New(NewSource(1))
+	b2 := make([]byte, 100)
+	_, err = r.Read(b2)
+	if err != nil {
+		t.Errorf("read: %v", err)
+	}
+	if !bytes.Equal(b1, b2) {
+		t.Errorf("read by one byte vs single read:\n%x\n%x", b1, b2)
+	}
+}
+
+func TestReadSeedReset(t *testing.T) {
+	r := New(NewSource(42))
+	b1 := make([]byte, 128)
+	_, err := r.Read(b1)
+	if err != nil {
+		t.Errorf("read: %v", err)
+	}
+	r.Seed(42)
+	b2 := make([]byte, 128)
+	_, err = r.Read(b2)
+	if err != nil {
+		t.Errorf("read: %v", err)
+	}
+	if !bytes.Equal(b1, b2) {
+		t.Errorf("mismatch after re-seed:\n%x\n%x", b1, b2)
+	}
+}
+
+func TestShuffleSmall(t *testing.T) {
+	// Check that Shuffle allows n=0 and n=1, but that swap is never called for them.
+	r := New(NewSource(1))
+	for n := 0; n <= 1; n++ {
+		r.Shuffle(n, func(i, j int) { t.Fatalf("swap called, n=%d i=%d j=%d", n, i, j) })
+	}
+}
+
+// encodePerm converts from a permuted slice of length n, such as Perm generates, to an int in [0, n!).
+// See https://en.wikipedia.org/wiki/Lehmer_code.
+// encodePerm modifies the input slice.
+func encodePerm(s []int) int {
+	// Convert to Lehmer code.
+	for i, x := range s {
+		r := s[i+1:]
+		for j, y := range r {
+			if y > x {
+				r[j]--
+			}
+		}
+	}
+	// Convert to int in [0, n!).
+	m := 0
+	fact := 1
+	for i := len(s) - 1; i >= 0; i-- {
+		m += s[i] * fact
+		fact *= len(s) - i
+	}
+	return m
+}
+
+// TestUniformFactorial tests several ways of generating a uniform value in [0, n!).
+func TestUniformFactorial(t *testing.T) {
+	r := New(NewSource(testSeeds[0]))
+	top := 6
+	if testing.Short() {
+		top = 3
+	}
+	for n := 3; n <= top; n++ {
+		t.Run(fmt.Sprintf("n=%d", n), func(t *testing.T) {
+			// Calculate n!.
+			nfact := 1
+			for i := 2; i <= n; i++ {
+				nfact *= i
+			}
+
+			// Test a few different ways to generate a uniform distribution.
+			p := make([]int, n) // re-usable slice for Shuffle generator
+			tests := [...]struct {
+				name string
+				fn   func() int
+			}{
+				{name: "Int31n", fn: func() int { return int(r.Int31n(int32(nfact))) }},
+				{name: "int31n", fn: func() int { return int(Int31nForTest(r, int32(nfact))) }},
+				{name: "Perm", fn: func() int { return encodePerm(r.Perm(n)) }},
+				{name: "Shuffle", fn: func() int {
+					// Generate permutation using Shuffle.
+					for i := range p {
+						p[i] = i
+					}
+					r.Shuffle(n, func(i, j int) { p[i], p[j] = p[j], p[i] })
+					return encodePerm(p)
+				}},
+			}
+
+			for _, test := range tests {
+				t.Run(test.name, func(t *testing.T) {
+					// Gather chi-squared values and check that they follow
+					// the expected normal distribution given n!-1 degrees of freedom.
+					// See https://en.wikipedia.org/wiki/Pearson%27s_chi-squared_test and
+					// https://www.johndcook.com/Beautiful_Testing_ch10.pdf.
+					nsamples := 10 * nfact
+					if nsamples < 200 {
+						nsamples = 200
+					}
+					samples := make([]float64, nsamples)
+					for i := range samples {
+						// Generate some uniformly distributed values and count their occurrences.
+						const iters = 1000
+						counts := make([]int, nfact)
+						for i := 0; i < iters; i++ {
+							counts[test.fn()]++
+						}
+						// Calculate chi-squared and add to samples.
+						want := iters / float64(nfact)
+						var χ2 float64
+						for _, have := range counts {
+							err := float64(have) - want
+							χ2 += err * err
+						}
+						χ2 /= want
+						samples[i] = χ2
+					}
+
+					// Check that our samples approximate the appropriate normal distribution.
+					dof := float64(nfact - 1)
+					expected := &statsResults{mean: dof, stddev: math.Sqrt(2 * dof)}
+					errorScale := max(1.0, expected.stddev)
+					expected.closeEnough = 0.10 * errorScale
+					expected.maxError = 0.08 // TODO: What is the right value here? See issue 21211.
+					checkSampleDistribution(t, samples, expected)
+				})
+			}
+		})
+	}
+}
+
+// Benchmarks
+
+func BenchmarkInt63Threadsafe(b *testing.B) {
+	for n := b.N; n > 0; n-- {
+		Int63()
+	}
+}
+
+func BenchmarkInt63ThreadsafeParallel(b *testing.B) {
+	b.RunParallel(func(pb *testing.PB) {
+		for pb.Next() {
+			Int63()
+		}
+	})
+}
+
+func BenchmarkInt63Unthreadsafe(b *testing.B) {
+	r := New(NewSource(1))
+	for n := b.N; n > 0; n-- {
+		r.Int63()
+	}
+}
+
+func BenchmarkIntn1000(b *testing.B) {
+	r := New(NewSource(1))
+	for n := b.N; n > 0; n-- {
+		r.Intn(1000)
+	}
+}
+
+func BenchmarkInt63n1000(b *testing.B) {
+	r := New(NewSource(1))
+	for n := b.N; n > 0; n-- {
+		r.Int63n(1000)
+	}
+}
+
+func BenchmarkInt31n1000(b *testing.B) {
+	r := New(NewSource(1))
+	for n := b.N; n > 0; n-- {
+		r.Int31n(1000)
+	}
+}
+
+func BenchmarkFloat32(b *testing.B) {
+	r := New(NewSource(1))
+	for n := b.N; n > 0; n-- {
+		r.Float32()
+	}
+}
+
+func BenchmarkFloat64(b *testing.B) {
+	r := New(NewSource(1))
+	for n := b.N; n > 0; n-- {
+		r.Float64()
+	}
+}
+
+func BenchmarkPerm3(b *testing.B) {
+	r := New(NewSource(1))
+	for n := b.N; n > 0; n-- {
+		r.Perm(3)
+	}
+}
+
+func BenchmarkPerm30(b *testing.B) {
+	r := New(NewSource(1))
+	for n := b.N; n > 0; n-- {
+		r.Perm(30)
+	}
+}
+
+func BenchmarkPerm30ViaShuffle(b *testing.B) {
+	r := New(NewSource(1))
+	for n := b.N; n > 0; n-- {
+		p := make([]int, 30)
+		for i := range p {
+			p[i] = i
+		}
+		r.Shuffle(30, func(i, j int) { p[i], p[j] = p[j], p[i] })
+	}
+}
+
+// BenchmarkShuffleOverhead uses a minimal swap function
+// to measure just the shuffling overhead.
+func BenchmarkShuffleOverhead(b *testing.B) {
+	r := New(NewSource(1))
+	for n := b.N; n > 0; n-- {
+		r.Shuffle(52, func(i, j int) {
+			if i < 0 || i >= 52 || j < 0 || j >= 52 {
+				b.Fatalf("bad swap(%d, %d)", i, j)
+			}
+		})
+	}
+}
+
+func BenchmarkRead3(b *testing.B) {
+	r := New(NewSource(1))
+	buf := make([]byte, 3)
+	b.ResetTimer()
+	for n := b.N; n > 0; n-- {
+		r.Read(buf)
+	}
+}
+
+func BenchmarkRead64(b *testing.B) {
+	r := New(NewSource(1))
+	buf := make([]byte, 64)
+	b.ResetTimer()
+	for n := b.N; n > 0; n-- {
+		r.Read(buf)
+	}
+}
+
+func BenchmarkRead1000(b *testing.B) {
+	r := New(NewSource(1))
+	buf := make([]byte, 1000)
+	b.ResetTimer()
+	for n := b.N; n > 0; n-- {
+		r.Read(buf)
+	}
+}
+
+func BenchmarkConcurrent(b *testing.B) {
+	const goroutines = 4
+	var wg sync.WaitGroup
+	wg.Add(goroutines)
+	for i := 0; i < goroutines; i++ {
+		go func() {
+			defer wg.Done()
+			for n := b.N; n > 0; n-- {
+				Int63()
+			}
+		}()
+	}
+	wg.Wait()
+}
diff --git a/src/math/rand/regress_test.go b/src/math/rand/regress_test.go
new file mode 100644
index 0000000..813098e
--- /dev/null
+++ b/src/math/rand/regress_test.go
@@ -0,0 +1,404 @@
+// Copyright 2014 The Go Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+// Test that random number sequences generated by a specific seed
+// do not change from version to version.
+//
+// Do NOT make changes to the golden outputs. If bugs need to be fixed
+// in the underlying code, find ways to fix them that do not affect the
+// outputs.
+
+package rand_test
+
+import (
+	"flag"
+	"fmt"
+	. "math/rand"
+	"reflect"
+	"testing"
+)
+
+var printgolden = flag.Bool("printgolden", false, "print golden results for regression test")
+
+func TestRegress(t *testing.T) {
+	var int32s = []int32{1, 10, 32, 1 << 20, 1<<20 + 1, 1000000000, 1 << 30, 1<<31 - 2, 1<<31 - 1}
+	var int64s = []int64{1, 10, 32, 1 << 20, 1<<20 + 1, 1000000000, 1 << 30, 1<<31 - 2, 1<<31 - 1, 1000000000000000000, 1 << 60, 1<<63 - 2, 1<<63 - 1}
+	var permSizes = []int{0, 1, 5, 8, 9, 10, 16}
+	var readBufferSizes = []int{1, 7, 8, 9, 10}
+	r := New(NewSource(0))
+
+	rv := reflect.ValueOf(r)
+	n := rv.NumMethod()
+	p := 0
+	if *printgolden {
+		fmt.Printf("var regressGolden = []interface{}{\n")
+	}
+	for i := 0; i < n; i++ {
+		m := rv.Type().Method(i)
+		mv := rv.Method(i)
+		mt := mv.Type()
+		if mt.NumOut() == 0 {
+			continue
+		}
+		r.Seed(0)
+		for repeat := 0; repeat < 20; repeat++ {
+			var args []reflect.Value
+			var argstr string
+			if mt.NumIn() == 1 {
+				var x any
+				switch mt.In(0).Kind() {
+				default:
+					t.Fatalf("unexpected argument type for r.%s", m.Name)
+
+				case reflect.Int:
+					if m.Name == "Perm" {
+						x = permSizes[repeat%len(permSizes)]
+						break
+					}
+					big := int64s[repeat%len(int64s)]
+					if int64(int(big)) != big {
+						r.Int63n(big) // what would happen on 64-bit machine, to keep stream in sync
+						if *printgolden {
+							fmt.Printf("\tskipped, // must run printgolden on 64-bit machine\n")
+						}
+						p++
+						continue
+					}
+					x = int(big)
+
+				case reflect.Int32:
+					x = int32s[repeat%len(int32s)]
+
+				case reflect.Int64:
+					x = int64s[repeat%len(int64s)]
+
+				case reflect.Slice:
+					if m.Name == "Read" {
+						n := readBufferSizes[repeat%len(readBufferSizes)]
+						x = make([]byte, n)
+					}
+				}
+				argstr = fmt.Sprint(x)
+				args = append(args, reflect.ValueOf(x))
+			}
+
+			var out any
+			out = mv.Call(args)[0].Interface()
+			if m.Name == "Int" || m.Name == "Intn" {
+				out = int64(out.(int))
+			}
+			if m.Name == "Read" {
+				out = args[0].Interface().([]byte)
+			}
+			if *printgolden {
+				var val string
+				big := int64(1 << 60)
+				if int64(int(big)) != big && (m.Name == "Int" || m.Name == "Intn") {
+					// 32-bit machine cannot print 64-bit results
+					val = "truncated"
+				} else if reflect.TypeOf(out).Kind() == reflect.Slice {
+					val = fmt.Sprintf("%#v", out)
+				} else {
+					val = fmt.Sprintf("%T(%v)", out, out)
+				}
+				fmt.Printf("\t%s, // %s(%s)\n", val, m.Name, argstr)
+			} else {
+				want := regressGolden[p]
+				if m.Name == "Int" {
+					want = int64(int(uint(want.(int64)) << 1 >> 1))
+				}
+				if !reflect.DeepEqual(out, want) {
+					t.Errorf("r.%s(%s) = %v, want %v", m.Name, argstr, out, want)
+				}
+			}
+			p++
+		}
+	}
+	if *printgolden {
+		fmt.Printf("}\n")
+	}
+}
+
+var regressGolden = []any{
+	float64(4.668112973579268),          // ExpFloat64()
+	float64(0.1601593871172866),         // ExpFloat64()
+	float64(3.0465834105636),            // ExpFloat64()
+	float64(0.06385839451671879),        // ExpFloat64()
+	float64(1.8578917487258961),         // ExpFloat64()
+	float64(0.784676123472182),          // ExpFloat64()
+	float64(0.11225477361256932),        // ExpFloat64()
+	float64(0.20173283329802255),        // ExpFloat64()
+	float64(0.3468619496201105),         // ExpFloat64()
+	float64(0.35601103454384536),        // ExpFloat64()
+	float64(0.888376329507869),          // ExpFloat64()
+	float64(1.4081362450365698),         // ExpFloat64()
+	float64(1.0077753823151994),         // ExpFloat64()
+	float64(0.23594100766227588),        // ExpFloat64()
+	float64(2.777245612300007),          // ExpFloat64()
+	float64(0.5202997830662377),         // ExpFloat64()
+	float64(1.2842705247770294),         // ExpFloat64()
+	float64(0.030307408362776206),       // ExpFloat64()
+	float64(2.204156824853721),          // ExpFloat64()
+	float64(2.09891923895058),           // ExpFloat64()
+	float32(0.94519615),                 // Float32()
+	float32(0.24496509),                 // Float32()
+	float32(0.65595627),                 // Float32()
+	float32(0.05434384),                 // Float32()
+	float32(0.3675872),                  // Float32()
+	float32(0.28948045),                 // Float32()
+	float32(0.1924386),                  // Float32()
+	float32(0.65533215),                 // Float32()
+	float32(0.8971697),                  // Float32()
+	float32(0.16735445),                 // Float32()
+	float32(0.28858566),                 // Float32()
+	float32(0.9026048),                  // Float32()
+	float32(0.84978026),                 // Float32()
+	float32(0.2730468),                  // Float32()
+	float32(0.6090802),                  // Float32()
+	float32(0.253656),                   // Float32()
+	float32(0.7746542),                  // Float32()
+	float32(0.017480763),                // Float32()
+	float32(0.78707397),                 // Float32()
+	float32(0.7993937),                  // Float32()
+	float64(0.9451961492941164),         // Float64()
+	float64(0.24496508529377975),        // Float64()
+	float64(0.6559562651954052),         // Float64()
+	float64(0.05434383959970039),        // Float64()
+	float64(0.36758720663245853),        // Float64()
+	float64(0.2894804331565928),         // Float64()
+	float64(0.19243860967493215),        // Float64()
+	float64(0.6553321508148324),         // Float64()
+	float64(0.897169713149801),          // Float64()
+	float64(0.16735444255905835),        // Float64()
+	float64(0.2885856518054551),         // Float64()
+	float64(0.9026048462705047),         // Float64()
+	float64(0.8497802817628735),         // Float64()
+	float64(0.2730468047134829),         // Float64()
+	float64(0.6090801919903561),         // Float64()
+	float64(0.25365600644283687),        // Float64()
+	float64(0.7746542391859803),         // Float64()
+	float64(0.017480762156647272),       // Float64()
+	float64(0.7870739563039942),         // Float64()
+	float64(0.7993936979594545),         // Float64()
+	int64(8717895732742165505),          // Int()
+	int64(2259404117704393152),          // Int()
+	int64(6050128673802995827),          // Int()
+	int64(501233450539197794),           // Int()
+	int64(3390393562759376202),          // Int()
+	int64(2669985732393126063),          // Int()
+	int64(1774932891286980153),          // Int()
+	int64(6044372234677422456),          // Int()
+	int64(8274930044578894929),          // Int()
+	int64(1543572285742637646),          // Int()
+	int64(2661732831099943416),          // Int()
+	int64(8325060299420976708),          // Int()
+	int64(7837839688282259259),          // Int()
+	int64(2518412263346885298),          // Int()
+	int64(5617773211005988520),          // Int()
+	int64(2339563716805116249),          // Int()
+	int64(7144924247938981575),          // Int()
+	int64(161231572858529631),           // Int()
+	int64(7259475919510918339),          // Int()
+	int64(7373105480197164748),          // Int()
+	int32(2029793274),                   // Int31()
+	int32(526058514),                    // Int31()
+	int32(1408655353),                   // Int31()
+	int32(116702506),                    // Int31()
+	int32(789387515),                    // Int31()
+	int32(621654496),                    // Int31()
+	int32(413258767),                    // Int31()
+	int32(1407315077),                   // Int31()
+	int32(1926657288),                   // Int31()
+	int32(359390928),                    // Int31()
+	int32(619732968),                    // Int31()
+	int32(1938329147),                   // Int31()
+	int32(1824889259),                   // Int31()
+	int32(586363548),                    // Int31()
+	int32(1307989752),                   // Int31()
+	int32(544722126),                    // Int31()
+	int32(1663557311),                   // Int31()
+	int32(37539650),                     // Int31()
+	int32(1690228450),                   // Int31()
+	int32(1716684894),                   // Int31()
+	int32(0),                            // Int31n(1)
+	int32(4),                            // Int31n(10)
+	int32(25),                           // Int31n(32)
+	int32(310570),                       // Int31n(1048576)
+	int32(857611),                       // Int31n(1048577)
+	int32(621654496),                    // Int31n(1000000000)
+	int32(413258767),                    // Int31n(1073741824)
+	int32(1407315077),                   // Int31n(2147483646)
+	int32(1926657288),                   // Int31n(2147483647)
+	int32(0),                            // Int31n(1)
+	int32(8),                            // Int31n(10)
+	int32(27),                           // Int31n(32)
+	int32(367019),                       // Int31n(1048576)
+	int32(209005),                       // Int31n(1048577)
+	int32(307989752),                    // Int31n(1000000000)
+	int32(544722126),                    // Int31n(1073741824)
+	int32(1663557311),                   // Int31n(2147483646)
+	int32(37539650),                     // Int31n(2147483647)
+	int32(0),                            // Int31n(1)
+	int32(4),                            // Int31n(10)
+	int64(8717895732742165505),          // Int63()
+	int64(2259404117704393152),          // Int63()
+	int64(6050128673802995827),          // Int63()
+	int64(501233450539197794),           // Int63()
+	int64(3390393562759376202),          // Int63()
+	int64(2669985732393126063),          // Int63()
+	int64(1774932891286980153),          // Int63()
+	int64(6044372234677422456),          // Int63()
+	int64(8274930044578894929),          // Int63()
+	int64(1543572285742637646),          // Int63()
+	int64(2661732831099943416),          // Int63()
+	int64(8325060299420976708),          // Int63()
+	int64(7837839688282259259),          // Int63()
+	int64(2518412263346885298),          // Int63()
+	int64(5617773211005988520),          // Int63()
+	int64(2339563716805116249),          // Int63()
+	int64(7144924247938981575),          // Int63()
+	int64(161231572858529631),           // Int63()
+	int64(7259475919510918339),          // Int63()
+	int64(7373105480197164748),          // Int63()
+	int64(0),                            // Int63n(1)
+	int64(2),                            // Int63n(10)
+	int64(19),                           // Int63n(32)
+	int64(959842),                       // Int63n(1048576)
+	int64(688912),                       // Int63n(1048577)
+	int64(393126063),                    // Int63n(1000000000)
+	int64(89212473),                     // Int63n(1073741824)
+	int64(834026388),                    // Int63n(2147483646)
+	int64(1577188963),                   // Int63n(2147483647)
+	int64(543572285742637646),           // Int63n(1000000000000000000)
+	int64(355889821886249464),           // Int63n(1152921504606846976)
+	int64(8325060299420976708),          // Int63n(9223372036854775806)
+	int64(7837839688282259259),          // Int63n(9223372036854775807)
+	int64(0),                            // Int63n(1)
+	int64(0),                            // Int63n(10)
+	int64(25),                           // Int63n(32)
+	int64(679623),                       // Int63n(1048576)
+	int64(882178),                       // Int63n(1048577)
+	int64(510918339),                    // Int63n(1000000000)
+	int64(782454476),                    // Int63n(1073741824)
+	int64(0),                            // Intn(1)
+	int64(4),                            // Intn(10)
+	int64(25),                           // Intn(32)
+	int64(310570),                       // Intn(1048576)
+	int64(857611),                       // Intn(1048577)
+	int64(621654496),                    // Intn(1000000000)
+	int64(413258767),                    // Intn(1073741824)
+	int64(1407315077),                   // Intn(2147483646)
+	int64(1926657288),                   // Intn(2147483647)
+	int64(543572285742637646),           // Intn(1000000000000000000)
+	int64(355889821886249464),           // Intn(1152921504606846976)
+	int64(8325060299420976708),          // Intn(9223372036854775806)
+	int64(7837839688282259259),          // Intn(9223372036854775807)
+	int64(0),                            // Intn(1)
+	int64(2),                            // Intn(10)
+	int64(14),                           // Intn(32)
+	int64(515775),                       // Intn(1048576)
+	int64(839455),                       // Intn(1048577)
+	int64(690228450),                    // Intn(1000000000)
+	int64(642943070),                    // Intn(1073741824)
+	float64(-0.28158587086436215),       // NormFloat64()
+	float64(0.570933095808067),          // NormFloat64()
+	float64(-1.6920196326157044),        // NormFloat64()
+	float64(0.1996229111693099),         // NormFloat64()
+	float64(1.9195199291234621),         // NormFloat64()
+	float64(0.8954838794918353),         // NormFloat64()
+	float64(0.41457072128813166),        // NormFloat64()
+	float64(-0.48700161491544713),       // NormFloat64()
+	float64(-0.1684059662402393),        // NormFloat64()
+	float64(0.37056410998929545),        // NormFloat64()
+	float64(1.0156889027029008),         // NormFloat64()
+	float64(-0.5174422210625114),        // NormFloat64()
+	float64(-0.5565834214413804),        // NormFloat64()
+	float64(0.778320596648391),          // NormFloat64()
+	float64(-1.8970718197702225),        // NormFloat64()
+	float64(0.5229525761688676),         // NormFloat64()
+	float64(-1.5515595563231523),        // NormFloat64()
+	float64(0.0182029289376123),         // NormFloat64()
+	float64(-0.6820951356608795),        // NormFloat64()
+	float64(-0.5987943422687668),        // NormFloat64()
+	[]int{},                             // Perm(0)
+	[]int{0},                            // Perm(1)
+	[]int{0, 4, 1, 3, 2},                // Perm(5)
+	[]int{3, 1, 0, 4, 7, 5, 2, 6},       // Perm(8)
+	[]int{5, 0, 3, 6, 7, 4, 2, 1, 8},    // Perm(9)
+	[]int{4, 5, 0, 2, 6, 9, 3, 1, 8, 7}, // Perm(10)
+	[]int{14, 2, 0, 8, 3, 5, 13, 12, 1, 4, 6, 7, 11, 9, 15, 10}, // Perm(16)
+	[]int{},                             // Perm(0)
+	[]int{0},                            // Perm(1)
+	[]int{3, 0, 1, 2, 4},                // Perm(5)
+	[]int{5, 1, 2, 0, 4, 7, 3, 6},       // Perm(8)
+	[]int{4, 0, 6, 8, 1, 5, 2, 7, 3},    // Perm(9)
+	[]int{8, 6, 1, 7, 5, 4, 3, 2, 9, 0}, // Perm(10)
+	[]int{0, 3, 13, 2, 15, 4, 10, 1, 8, 14, 7, 6, 12, 9, 5, 11}, // Perm(16)
+	[]int{},                             // Perm(0)
+	[]int{0},                            // Perm(1)
+	[]int{0, 4, 2, 1, 3},                // Perm(5)
+	[]int{2, 1, 7, 0, 6, 3, 4, 5},       // Perm(8)
+	[]int{8, 7, 5, 3, 4, 6, 0, 1, 2},    // Perm(9)
+	[]int{1, 0, 2, 5, 7, 6, 9, 8, 3, 4}, // Perm(10)
+	[]byte{0x1},                         // Read([0])
+	[]byte{0x94, 0xfd, 0xc2, 0xfa, 0x2f, 0xfc, 0xc0},                 // Read([0 0 0 0 0 0 0])
+	[]byte{0x41, 0xd3, 0xff, 0x12, 0x4, 0x5b, 0x73, 0xc8},            // Read([0 0 0 0 0 0 0 0])
+	[]byte{0x6e, 0x4f, 0xf9, 0x5f, 0xf6, 0x62, 0xa5, 0xee, 0xe8},     // Read([0 0 0 0 0 0 0 0 0])
+	[]byte{0x2a, 0xbd, 0xf4, 0x4a, 0x2d, 0xb, 0x75, 0xfb, 0x18, 0xd}, // Read([0 0 0 0 0 0 0 0 0 0])
+	[]byte{0xaf}, // Read([0])
+	[]byte{0x48, 0xa7, 0x9e, 0xe0, 0xb1, 0xd, 0x39},                   // Read([0 0 0 0 0 0 0])
+	[]byte{0x46, 0x51, 0x85, 0xf, 0xd4, 0xa1, 0x78, 0x89},             // Read([0 0 0 0 0 0 0 0])
+	[]byte{0x2e, 0xe2, 0x85, 0xec, 0xe1, 0x51, 0x14, 0x55, 0x78},      // Read([0 0 0 0 0 0 0 0 0])
+	[]byte{0x8, 0x75, 0xd6, 0x4e, 0xe2, 0xd3, 0xd0, 0xd0, 0xde, 0x6b}, // Read([0 0 0 0 0 0 0 0 0 0])
+	[]byte{0xf8}, // Read([0])
+	[]byte{0xf9, 0xb4, 0x4c, 0xe8, 0x5f, 0xf0, 0x44},                   // Read([0 0 0 0 0 0 0])
+	[]byte{0xc6, 0xb1, 0xf8, 0x3b, 0x8e, 0x88, 0x3b, 0xbf},             // Read([0 0 0 0 0 0 0 0])
+	[]byte{0x85, 0x7a, 0xab, 0x99, 0xc5, 0xb2, 0x52, 0xc7, 0x42},       // Read([0 0 0 0 0 0 0 0 0])
+	[]byte{0x9c, 0x32, 0xf3, 0xa8, 0xae, 0xb7, 0x9e, 0xf8, 0x56, 0xf6}, // Read([0 0 0 0 0 0 0 0 0 0])
+	[]byte{0x59}, // Read([0])
+	[]byte{0xc1, 0x8f, 0xd, 0xce, 0xcc, 0x77, 0xc7},                    // Read([0 0 0 0 0 0 0])
+	[]byte{0x5e, 0x7a, 0x81, 0xbf, 0xde, 0x27, 0x5f, 0x67},             // Read([0 0 0 0 0 0 0 0])
+	[]byte{0xcf, 0xe2, 0x42, 0xcf, 0x3c, 0xc3, 0x54, 0xf3, 0xed},       // Read([0 0 0 0 0 0 0 0 0])
+	[]byte{0xe2, 0xd6, 0xbe, 0xcc, 0x4e, 0xa3, 0xae, 0x5e, 0x88, 0x52}, // Read([0 0 0 0 0 0 0 0 0 0])
+	uint32(4059586549),           // Uint32()
+	uint32(1052117029),           // Uint32()
+	uint32(2817310706),           // Uint32()
+	uint32(233405013),            // Uint32()
+	uint32(1578775030),           // Uint32()
+	uint32(1243308993),           // Uint32()
+	uint32(826517535),            // Uint32()
+	uint32(2814630155),           // Uint32()
+	uint32(3853314576),           // Uint32()
+	uint32(718781857),            // Uint32()
+	uint32(1239465936),           // Uint32()
+	uint32(3876658295),           // Uint32()
+	uint32(3649778518),           // Uint32()
+	uint32(1172727096),           // Uint32()
+	uint32(2615979505),           // Uint32()
+	uint32(1089444252),           // Uint32()
+	uint32(3327114623),           // Uint32()
+	uint32(75079301),             // Uint32()
+	uint32(3380456901),           // Uint32()
+	uint32(3433369789),           // Uint32()
+	uint64(8717895732742165505),  // Uint64()
+	uint64(2259404117704393152),  // Uint64()
+	uint64(6050128673802995827),  // Uint64()
+	uint64(9724605487393973602),  // Uint64()
+	uint64(12613765599614152010), // Uint64()
+	uint64(11893357769247901871), // Uint64()
+	uint64(1774932891286980153),  // Uint64()
+	uint64(15267744271532198264), // Uint64()
+	uint64(17498302081433670737), // Uint64()
+	uint64(1543572285742637646),  // Uint64()
+	uint64(11885104867954719224), // Uint64()
+	uint64(17548432336275752516), // Uint64()
+	uint64(7837839688282259259),  // Uint64()
+	uint64(2518412263346885298),  // Uint64()
+	uint64(5617773211005988520),  // Uint64()
+	uint64(11562935753659892057), // Uint64()
+	uint64(16368296284793757383), // Uint64()
+	uint64(161231572858529631),   // Uint64()
+	uint64(16482847956365694147), // Uint64()
+	uint64(16596477517051940556), // Uint64()
+}
diff --git a/src/math/rand/rng.go b/src/math/rand/rng.go
new file mode 100644
index 0000000..1e4a9e0
--- /dev/null
+++ b/src/math/rand/rng.go
@@ -0,0 +1,252 @@
+// Copyright 2009 The Go Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+package rand
+
+/*
+ * Uniform distribution
+ *
+ * algorithm by
+ * DP Mitchell and JA Reeds
+ */
+
+const (
+	rngLen   = 607
+	rngTap   = 273
+	rngMax   = 1 << 63
+	rngMask  = rngMax - 1
+	int32max = (1 << 31) - 1
+)
+
+var (
+	// rngCooked used for seeding. See gen_cooked.go for details.
+	rngCooked [rngLen]int64 = [...]int64{
+		-4181792142133755926, -4576982950128230565, 1395769623340756751, 5333664234075297259,
+		-6347679516498800754, 9033628115061424579, 7143218595135194537, 4812947590706362721,
+		7937252194349799378, 5307299880338848416, 8209348851763925077, -7107630437535961764,
+		4593015457530856296, 8140875735541888011, -5903942795589686782, -603556388664454774,
+		-7496297993371156308, 113108499721038619, 4569519971459345583, -4160538177779461077,
+		-6835753265595711384, -6507240692498089696, 6559392774825876886, 7650093201692370310,
+		7684323884043752161, -8965504200858744418, -2629915517445760644, 271327514973697897,
+		-6433985589514657524, 1065192797246149621, 3344507881999356393, -4763574095074709175,
+		7465081662728599889, 1014950805555097187, -4773931307508785033, -5742262670416273165,
+		2418672789110888383, 5796562887576294778, 4484266064449540171, 3738982361971787048,
+		-4699774852342421385, 10530508058128498, -589538253572429690, -6598062107225984180,
+		8660405965245884302, 10162832508971942, -2682657355892958417, 7031802312784620857,
+		6240911277345944669, 831864355460801054, -1218937899312622917, 2116287251661052151,
+		2202309800992166967, 9161020366945053561, 4069299552407763864, 4936383537992622449,
+		457351505131524928, -8881176990926596454, -6375600354038175299, -7155351920868399290,
+		4368649989588021065, 887231587095185257, -3659780529968199312, -2407146836602825512,
+		5616972787034086048, -751562733459939242, 1686575021641186857, -5177887698780513806,
+		-4979215821652996885, -1375154703071198421, 5632136521049761902, -8390088894796940536,
+		-193645528485698615, -5979788902190688516, -4907000935050298721, -285522056888777828,
+		-2776431630044341707, 1679342092332374735, 6050638460742422078, -2229851317345194226,
+		-1582494184340482199, 5881353426285907985, 812786550756860885, 4541845584483343330,
+		-6497901820577766722, 4980675660146853729, -4012602956251539747, -329088717864244987,
+		-2896929232104691526, 1495812843684243920, -2153620458055647789, 7370257291860230865,
+		-2466442761497833547, 4706794511633873654, -1398851569026877145, 8549875090542453214,
+		-9189721207376179652, -7894453601103453165, 7297902601803624459, 1011190183918857495,
+		-6985347000036920864, 5147159997473910359, -8326859945294252826, 2659470849286379941,
+		6097729358393448602, -7491646050550022124, -5117116194870963097, -896216826133240300,
+		-745860416168701406, 5803876044675762232, -787954255994554146, -3234519180203704564,
+		-4507534739750823898, -1657200065590290694, 505808562678895611, -4153273856159712438,
+		-8381261370078904295, 572156825025677802, 1791881013492340891, 3393267094866038768,
+		-5444650186382539299, 2352769483186201278, -7930912453007408350, -325464993179687389,
+		-3441562999710612272, -6489413242825283295, 5092019688680754699, -227247482082248967,
+		4234737173186232084, 5027558287275472836, 4635198586344772304, -536033143587636457,
+		5907508150730407386, -8438615781380831356, 972392927514829904, -3801314342046600696,
+		-4064951393885491917, -174840358296132583, 2407211146698877100, -1640089820333676239,
+		3940796514530962282, -5882197405809569433, 3095313889586102949, -1818050141166537098,
+		5832080132947175283, 7890064875145919662, 8184139210799583195, -8073512175445549678,
+		-7758774793014564506, -4581724029666783935, 3516491885471466898, -8267083515063118116,
+		6657089965014657519, 5220884358887979358, 1796677326474620641, 5340761970648932916,
+		1147977171614181568, 5066037465548252321, 2574765911837859848, 1085848279845204775,
+		-5873264506986385449, 6116438694366558490, 2107701075971293812, -7420077970933506541,
+		2469478054175558874, -1855128755834809824, -5431463669011098282, -9038325065738319171,
+		-6966276280341336160, 7217693971077460129, -8314322083775271549, 7196649268545224266,
+		-3585711691453906209, -5267827091426810625, 8057528650917418961, -5084103596553648165,
+		-2601445448341207749, -7850010900052094367, 6527366231383600011, 3507654575162700890,
+		9202058512774729859, 1954818376891585542, -2582991129724600103, 8299563319178235687,
+		-5321504681635821435, 7046310742295574065, -2376176645520785576, -7650733936335907755,
+		8850422670118399721, 3631909142291992901, 5158881091950831288, -6340413719511654215,
+		4763258931815816403, 6280052734341785344, -4979582628649810958, 2043464728020827976,
+		-2678071570832690343, 4562580375758598164, 5495451168795427352, -7485059175264624713,
+		553004618757816492, 6895160632757959823, -989748114590090637, 7139506338801360852,
+		-672480814466784139, 5535668688139305547, 2430933853350256242, -3821430778991574732,
+		-1063731997747047009, -3065878205254005442, 7632066283658143750, 6308328381617103346,
+		3681878764086140361, 3289686137190109749, 6587997200611086848, 244714774258135476,
+		-5143583659437639708, 8090302575944624335, 2945117363431356361, -8359047641006034763,
+		3009039260312620700, -793344576772241777, 401084700045993341, -1968749590416080887,
+		4707864159563588614, -3583123505891281857, -3240864324164777915, -5908273794572565703,
+		-3719524458082857382, -5281400669679581926, 8118566580304798074, 3839261274019871296,
+		7062410411742090847, -8481991033874568140, 6027994129690250817, -6725542042704711878,
+		-2971981702428546974, -7854441788951256975, 8809096399316380241, 6492004350391900708,
+		2462145737463489636, -8818543617934476634, -5070345602623085213, -8961586321599299868,
+		-3758656652254704451, -8630661632476012791, 6764129236657751224, -709716318315418359,
+		-3403028373052861600, -8838073512170985897, -3999237033416576341, -2920240395515973663,
+		-2073249475545404416, 368107899140673753, -6108185202296464250, -6307735683270494757,
+		4782583894627718279, 6718292300699989587, 8387085186914375220, 3387513132024756289,
+		4654329375432538231, -292704475491394206, -3848998599978456535, 7623042350483453954,
+		7725442901813263321, 9186225467561587250, -5132344747257272453, -6865740430362196008,
+		2530936820058611833, 1636551876240043639, -3658707362519810009, 1452244145334316253,
+		-7161729655835084979, -7943791770359481772, 9108481583171221009, -3200093350120725999,
+		5007630032676973346, 2153168792952589781, 6720334534964750538, -3181825545719981703,
+		3433922409283786309, 2285479922797300912, 3110614940896576130, -2856812446131932915,
+		-3804580617188639299, 7163298419643543757, 4891138053923696990, 580618510277907015,
+		1684034065251686769, 4429514767357295841, -8893025458299325803, -8103734041042601133,
+		7177515271653460134, 4589042248470800257, -1530083407795771245, 143607045258444228,
+		246994305896273627, -8356954712051676521, 6473547110565816071, 3092379936208876896,
+		2058427839513754051, -4089587328327907870, 8785882556301281247, -3074039370013608197,
+		-637529855400303673, 6137678347805511274, -7152924852417805802, 5708223427705576541,
+		-3223714144396531304, 4358391411789012426, 325123008708389849, 6837621693887290924,
+		4843721905315627004, -3212720814705499393, -3825019837890901156, 4602025990114250980,
+		1044646352569048800, 9106614159853161675, -8394115921626182539, -4304087667751778808,
+		2681532557646850893, 3681559472488511871, -3915372517896561773, -2889241648411946534,
+		-6564663803938238204, -8060058171802589521, 581945337509520675, 3648778920718647903,
+		-4799698790548231394, -7602572252857820065, 220828013409515943, -1072987336855386047,
+		4287360518296753003, -4633371852008891965, 5513660857261085186, -2258542936462001533,
+		-8744380348503999773, 8746140185685648781, 228500091334420247, 1356187007457302238,
+		3019253992034194581, 3152601605678500003, -8793219284148773595, 5559581553696971176,
+		4916432985369275664, -8559797105120221417, -5802598197927043732, 2868348622579915573,
+		-7224052902810357288, -5894682518218493085, 2587672709781371173, -7706116723325376475,
+		3092343956317362483, -5561119517847711700, 972445599196498113, -1558506600978816441,
+		1708913533482282562, -2305554874185907314, -6005743014309462908, -6653329009633068701,
+		-483583197311151195, 2488075924621352812, -4529369641467339140, -4663743555056261452,
+		2997203966153298104, 1282559373026354493, 240113143146674385, 8665713329246516443,
+		628141331766346752, -4651421219668005332, -7750560848702540400, 7596648026010355826,
+		-3132152619100351065, 7834161864828164065, 7103445518877254909, 4390861237357459201,
+		-4780718172614204074, -319889632007444440, 622261699494173647, -3186110786557562560,
+		-8718967088789066690, -1948156510637662747, -8212195255998774408, -7028621931231314745,
+		2623071828615234808, -4066058308780939700, -5484966924888173764, -6683604512778046238,
+		-6756087640505506466, 5256026990536851868, 7841086888628396109, 6640857538655893162,
+		-8021284697816458310, -7109857044414059830, -1689021141511844405, -4298087301956291063,
+		-4077748265377282003, -998231156719803476, 2719520354384050532, 9132346697815513771,
+		4332154495710163773, -2085582442760428892, 6994721091344268833, -2556143461985726874,
+		-8567931991128098309, 59934747298466858, -3098398008776739403, -265597256199410390,
+		2332206071942466437, -7522315324568406181, 3154897383618636503, -7585605855467168281,
+		-6762850759087199275, 197309393502684135, -8579694182469508493, 2543179307861934850,
+		4350769010207485119, -4468719947444108136, -7207776534213261296, -1224312577878317200,
+		4287946071480840813, 8362686366770308971, 6486469209321732151, -5605644191012979782,
+		-1669018511020473564, 4450022655153542367, -7618176296641240059, -3896357471549267421,
+		-4596796223304447488, -6531150016257070659, -8982326463137525940, -4125325062227681798,
+		-1306489741394045544, -8338554946557245229, 5329160409530630596, 7790979528857726136,
+		4955070238059373407, -4304834761432101506, -6215295852904371179, 3007769226071157901,
+		-6753025801236972788, 8928702772696731736, 7856187920214445904, -4748497451462800923,
+		7900176660600710914, -7082800908938549136, -6797926979589575837, -6737316883512927978,
+		4186670094382025798, 1883939007446035042, -414705992779907823, 3734134241178479257,
+		4065968871360089196, 6953124200385847784, -7917685222115876751, -7585632937840318161,
+		-5567246375906782599, -5256612402221608788, 3106378204088556331, -2894472214076325998,
+		4565385105440252958, 1979884289539493806, -6891578849933910383, 3783206694208922581,
+		8464961209802336085, 2843963751609577687, 3030678195484896323, -4429654462759003204,
+		4459239494808162889, 402587895800087237, 8057891408711167515, 4541888170938985079,
+		1042662272908816815, -3666068979732206850, 2647678726283249984, 2144477441549833761,
+		-3417019821499388721, -2105601033380872185, 5916597177708541638, -8760774321402454447,
+		8833658097025758785, 5970273481425315300, 563813119381731307, -6455022486202078793,
+		1598828206250873866, -4016978389451217698, -2988328551145513985, -6071154634840136312,
+		8469693267274066490, 125672920241807416, -3912292412830714870, -2559617104544284221,
+		-486523741806024092, -4735332261862713930, 5923302823487327109, -9082480245771672572,
+		-1808429243461201518, 7990420780896957397, 4317817392807076702, 3625184369705367340,
+		-6482649271566653105, -3480272027152017464, -3225473396345736649, -368878695502291645,
+		-3981164001421868007, -8522033136963788610, 7609280429197514109, 3020985755112334161,
+		-2572049329799262942, 2635195723621160615, 5144520864246028816, -8188285521126945980,
+		1567242097116389047, 8172389260191636581, -2885551685425483535, -7060359469858316883,
+		-6480181133964513127, -7317004403633452381, 6011544915663598137, 5932255307352610768,
+		2241128460406315459, -8327867140638080220, 3094483003111372717, 4583857460292963101,
+		9079887171656594975, -384082854924064405, -3460631649611717935, 4225072055348026230,
+		-7385151438465742745, 3801620336801580414, -399845416774701952, -7446754431269675473,
+		7899055018877642622, 5421679761463003041, 5521102963086275121, -4975092593295409910,
+		8735487530905098534, -7462844945281082830, -2080886987197029914, -1000715163927557685,
+		-4253840471931071485, -5828896094657903328, 6424174453260338141, 359248545074932887,
+		-5949720754023045210, -2426265837057637212, 3030918217665093212, -9077771202237461772,
+		-3186796180789149575, 740416251634527158, -2142944401404840226, 6951781370868335478,
+		399922722363687927, -8928469722407522623, -1378421100515597285, -8343051178220066766,
+		-3030716356046100229, -8811767350470065420, 9026808440365124461, 6440783557497587732,
+		4615674634722404292, 539897290441580544, 2096238225866883852, 8751955639408182687,
+		-7316147128802486205, 7381039757301768559, 6157238513393239656, -1473377804940618233,
+		8629571604380892756, 5280433031239081479, 7101611890139813254, 2479018537985767835,
+		7169176924412769570, -1281305539061572506, -7865612307799218120, 2278447439451174845,
+		3625338785743880657, 6477479539006708521, 8976185375579272206, -3712000482142939688,
+		1326024180520890843, 7537449876596048829, 5464680203499696154, 3189671183162196045,
+		6346751753565857109, -8982212049534145501, -6127578587196093755, -245039190118465649,
+		-6320577374581628592, 7208698530190629697, 7276901792339343736, -7490986807540332668,
+		4133292154170828382, 2918308698224194548, -7703910638917631350, -3929437324238184044,
+		-4300543082831323144, -6344160503358350167, 5896236396443472108, -758328221503023383,
+		-1894351639983151068, -307900319840287220, -6278469401177312761, -2171292963361310674,
+		8382142935188824023, 9103922860780351547, 4152330101494654406,
+	}
+)
+
+type rngSource struct {
+	tap  int           // index into vec
+	feed int           // index into vec
+	vec  [rngLen]int64 // current feedback register
+}
+
+// seed rng x[n+1] = 48271 * x[n] mod (2**31 - 1)
+func seedrand(x int32) int32 {
+	const (
+		A = 48271
+		Q = 44488
+		R = 3399
+	)
+
+	hi := x / Q
+	lo := x % Q
+	x = A*lo - R*hi
+	if x < 0 {
+		x += int32max
+	}
+	return x
+}
+
+// Seed uses the provided seed value to initialize the generator to a deterministic state.
+func (rng *rngSource) Seed(seed int64) {
+	rng.tap = 0
+	rng.feed = rngLen - rngTap
+
+	seed = seed % int32max
+	if seed < 0 {
+		seed += int32max
+	}
+	if seed == 0 {
+		seed = 89482311
+	}
+
+	x := int32(seed)
+	for i := -20; i < rngLen; i++ {
+		x = seedrand(x)
+		if i >= 0 {
+			var u int64
+			u = int64(x) << 40
+			x = seedrand(x)
+			u ^= int64(x) << 20
+			x = seedrand(x)
+			u ^= int64(x)
+			u ^= rngCooked[i]
+			rng.vec[i] = u
+		}
+	}
+}
+
+// Int63 returns a non-negative pseudo-random 63-bit integer as an int64.
+func (rng *rngSource) Int63() int64 {
+	return int64(rng.Uint64() & rngMask)
+}
+
+// Uint64 returns a non-negative pseudo-random 64-bit integer as a uint64.
+func (rng *rngSource) Uint64() uint64 {
+	rng.tap--
+	if rng.tap < 0 {
+		rng.tap += rngLen
+	}
+
+	rng.feed--
+	if rng.feed < 0 {
+		rng.feed += rngLen
+	}
+
+	x := rng.vec[rng.feed] + rng.vec[rng.tap]
+	rng.vec[rng.feed] = x
+	return uint64(x)
+}
diff --git a/src/math/rand/zipf.go b/src/math/rand/zipf.go
new file mode 100644
index 0000000..f04c814
--- /dev/null
+++ b/src/math/rand/zipf.go
@@ -0,0 +1,77 @@
+// Copyright 2009 The Go Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+// W.Hormann, G.Derflinger:
+// "Rejection-Inversion to Generate Variates
+// from Monotone Discrete Distributions"
+// http://eeyore.wu-wien.ac.at/papers/96-04-04.wh-der.ps.gz
+
+package rand
+
+import "math"
+
+// A Zipf generates Zipf distributed variates.
+type Zipf struct {
+	r            *Rand
+	imax         float64
+	v            float64
+	q            float64
+	s            float64
+	oneminusQ    float64
+	oneminusQinv float64
+	hxm          float64
+	hx0minusHxm  float64
+}
+
+func (z *Zipf) h(x float64) float64 {
+	return math.Exp(z.oneminusQ*math.Log(z.v+x)) * z.oneminusQinv
+}
+
+func (z *Zipf) hinv(x float64) float64 {
+	return math.Exp(z.oneminusQinv*math.Log(z.oneminusQ*x)) - z.v
+}
+
+// NewZipf returns a Zipf variate generator.
+// The generator generates values k ∈ [0, imax]
+// such that P(k) is proportional to (v + k) ** (-s).
+// Requirements: s > 1 and v >= 1.
+func NewZipf(r *Rand, s float64, v float64, imax uint64) *Zipf {
+	z := new(Zipf)
+	if s <= 1.0 || v < 1 {
+		return nil
+	}
+	z.r = r
+	z.imax = float64(imax)
+	z.v = v
+	z.q = s
+	z.oneminusQ = 1.0 - z.q
+	z.oneminusQinv = 1.0 / z.oneminusQ
+	z.hxm = z.h(z.imax + 0.5)
+	z.hx0minusHxm = z.h(0.5) - math.Exp(math.Log(z.v)*(-z.q)) - z.hxm
+	z.s = 1 - z.hinv(z.h(1.5)-math.Exp(-z.q*math.Log(z.v+1.0)))
+	return z
+}
+
+// Uint64 returns a value drawn from the Zipf distribution described
+// by the Zipf object.
+func (z *Zipf) Uint64() uint64 {
+	if z == nil {
+		panic("rand: nil Zipf")
+	}
+	k := 0.0
+
+	for {
+		r := z.r.Float64() // r on [0,1]
+		ur := z.hxm + r*z.hx0minusHxm
+		x := z.hinv(ur)
+		k = math.Floor(x + 0.5)
+		if k-x <= z.s {
+			break
+		}
+		if ur >= z.h(k+0.5)-math.Exp(-math.Log(k+z.v)*z.q) {
+			break
+		}
+	}
+	return uint64(k)
+}
diff --git a/src/math/remainder.go b/src/math/remainder.go
new file mode 100644
index 0000000..8e99345
--- /dev/null
+++ b/src/math/remainder.go
@@ -0,0 +1,95 @@
+// Copyright 2010 The Go Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+package math
+
+// The original C code and the comment below are from
+// FreeBSD's /usr/src/lib/msun/src/e_remainder.c and came
+// with this notice. The go code is a simplified version of
+// the original C.
+//
+// ====================================================
+// Copyright (C) 1993 by Sun Microsystems, Inc. All rights reserved.
+//
+// Developed at SunPro, a Sun Microsystems, Inc. business.
+// Permission to use, copy, modify, and distribute this
+// software is freely granted, provided that this notice
+// is preserved.
+// ====================================================
+//
+// __ieee754_remainder(x,y)
+// Return :
+//      returns  x REM y  =  x - [x/y]*y  as if in infinite
+//      precision arithmetic, where [x/y] is the (infinite bit)
+//      integer nearest x/y (in half way cases, choose the even one).
+// Method :
+//      Based on Mod() returning  x - [x/y]chopped * y  exactly.
+
+// Remainder returns the IEEE 754 floating-point remainder of x/y.
+//
+// Special cases are:
+//
+//	Remainder(±Inf, y) = NaN
+//	Remainder(NaN, y) = NaN
+//	Remainder(x, 0) = NaN
+//	Remainder(x, ±Inf) = x
+//	Remainder(x, NaN) = NaN
+func Remainder(x, y float64) float64 {
+	if haveArchRemainder {
+		return archRemainder(x, y)
+	}
+	return remainder(x, y)
+}
+
+func remainder(x, y float64) float64 {
+	const (
+		Tiny    = 4.45014771701440276618e-308 // 0x0020000000000000
+		HalfMax = MaxFloat64 / 2
+	)
+	// special cases
+	switch {
+	case IsNaN(x) || IsNaN(y) || IsInf(x, 0) || y == 0:
+		return NaN()
+	case IsInf(y, 0):
+		return x
+	}
+	sign := false
+	if x < 0 {
+		x = -x
+		sign = true
+	}
+	if y < 0 {
+		y = -y
+	}
+	if x == y {
+		if sign {
+			zero := 0.0
+			return -zero
+		}
+		return 0
+	}
+	if y <= HalfMax {
+		x = Mod(x, y+y) // now x < 2y
+	}
+	if y < Tiny {
+		if x+x > y {
+			x -= y
+			if x+x >= y {
+				x -= y
+			}
+		}
+	} else {
+		yHalf := 0.5 * y
+		if x > yHalf {
+			x -= y
+			if x >= yHalf {
+				x -= y
+			}
+		}
+	}
+	if sign {
+		x = -x
+	}
+	return x
+}
diff --git a/src/math/signbit.go b/src/math/signbit.go
new file mode 100644
index 0000000..f6e61d6
--- /dev/null
+++ b/src/math/signbit.go
@@ -0,0 +1,10 @@
+// Copyright 2010 The Go Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+package math
+
+// Signbit reports whether x is negative or negative zero.
+func Signbit(x float64) bool {
+	return Float64bits(x)&(1<<63) != 0
+}
diff --git a/src/math/sin.go b/src/math/sin.go
new file mode 100644
index 0000000..4793d7e
--- /dev/null
+++ b/src/math/sin.go
@@ -0,0 +1,244 @@
+// Copyright 2011 The Go Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+package math
+
+/*
+	Floating-point sine and cosine.
+*/
+
+// The original C code, the long comment, and the constants
+// below were from http://netlib.sandia.gov/cephes/cmath/sin.c,
+// available from http://www.netlib.org/cephes/cmath.tgz.
+// The go code is a simplified version of the original C.
+//
+//      sin.c
+//
+//      Circular sine
+//
+// SYNOPSIS:
+//
+// double x, y, sin();
+// y = sin( x );
+//
+// DESCRIPTION:
+//
+// Range reduction is into intervals of pi/4.  The reduction error is nearly
+// eliminated by contriving an extended precision modular arithmetic.
+//
+// Two polynomial approximating functions are employed.
+// Between 0 and pi/4 the sine is approximated by
+//      x  +  x**3 P(x**2).
+// Between pi/4 and pi/2 the cosine is represented as
+//      1  -  x**2 Q(x**2).
+//
+// ACCURACY:
+//
+//                      Relative error:
+// arithmetic   domain      # trials      peak         rms
+//    DEC       0, 10       150000       3.0e-17     7.8e-18
+//    IEEE -1.07e9,+1.07e9  130000       2.1e-16     5.4e-17
+//
+// Partial loss of accuracy begins to occur at x = 2**30 = 1.074e9.  The loss
+// is not gradual, but jumps suddenly to about 1 part in 10e7.  Results may
+// be meaningless for x > 2**49 = 5.6e14.
+//
+//      cos.c
+//
+//      Circular cosine
+//
+// SYNOPSIS:
+//
+// double x, y, cos();
+// y = cos( x );
+//
+// DESCRIPTION:
+//
+// Range reduction is into intervals of pi/4.  The reduction error is nearly
+// eliminated by contriving an extended precision modular arithmetic.
+//
+// Two polynomial approximating functions are employed.
+// Between 0 and pi/4 the cosine is approximated by
+//      1  -  x**2 Q(x**2).
+// Between pi/4 and pi/2 the sine is represented as
+//      x  +  x**3 P(x**2).
+//
+// ACCURACY:
+//
+//                      Relative error:
+// arithmetic   domain      # trials      peak         rms
+//    IEEE -1.07e9,+1.07e9  130000       2.1e-16     5.4e-17
+//    DEC        0,+1.07e9   17000       3.0e-17     7.2e-18
+//
+// Cephes Math Library Release 2.8:  June, 2000
+// Copyright 1984, 1987, 1989, 1992, 2000 by Stephen L. Moshier
+//
+// The readme file at http://netlib.sandia.gov/cephes/ says:
+//    Some software in this archive may be from the book _Methods and
+// Programs for Mathematical Functions_ (Prentice-Hall or Simon & Schuster
+// International, 1989) or from the Cephes Mathematical Library, a
+// commercial product. In either event, it is copyrighted by the author.
+// What you see here may be used freely but it comes with no support or
+// guarantee.
+//
+//   The two known misprints in the book are repaired here in the
+// source listings for the gamma function and the incomplete beta
+// integral.
+//
+//   Stephen L. Moshier
+//   moshier@na-net.ornl.gov
+
+// sin coefficients
+var _sin = [...]float64{
+	1.58962301576546568060e-10, // 0x3de5d8fd1fd19ccd
+	-2.50507477628578072866e-8, // 0xbe5ae5e5a9291f5d
+	2.75573136213857245213e-6,  // 0x3ec71de3567d48a1
+	-1.98412698295895385996e-4, // 0xbf2a01a019bfdf03
+	8.33333333332211858878e-3,  // 0x3f8111111110f7d0
+	-1.66666666666666307295e-1, // 0xbfc5555555555548
+}
+
+// cos coefficients
+var _cos = [...]float64{
+	-1.13585365213876817300e-11, // 0xbda8fa49a0861a9b
+	2.08757008419747316778e-9,   // 0x3e21ee9d7b4e3f05
+	-2.75573141792967388112e-7,  // 0xbe927e4f7eac4bc6
+	2.48015872888517045348e-5,   // 0x3efa01a019c844f5
+	-1.38888888888730564116e-3,  // 0xbf56c16c16c14f91
+	4.16666666666665929218e-2,   // 0x3fa555555555554b
+}
+
+// Cos returns the cosine of the radian argument x.
+//
+// Special cases are:
+//
+//	Cos(±Inf) = NaN
+//	Cos(NaN) = NaN
+func Cos(x float64) float64 {
+	if haveArchCos {
+		return archCos(x)
+	}
+	return cos(x)
+}
+
+func cos(x float64) float64 {
+	const (
+		PI4A = 7.85398125648498535156e-1  // 0x3fe921fb40000000, Pi/4 split into three parts
+		PI4B = 3.77489470793079817668e-8  // 0x3e64442d00000000,
+		PI4C = 2.69515142907905952645e-15 // 0x3ce8469898cc5170,
+	)
+	// special cases
+	switch {
+	case IsNaN(x) || IsInf(x, 0):
+		return NaN()
+	}
+
+	// make argument positive
+	sign := false
+	x = Abs(x)
+
+	var j uint64
+	var y, z float64
+	if x >= reduceThreshold {
+		j, z = trigReduce(x)
+	} else {
+		j = uint64(x * (4 / Pi)) // integer part of x/(Pi/4), as integer for tests on the phase angle
+		y = float64(j)           // integer part of x/(Pi/4), as float
+
+		// map zeros to origin
+		if j&1 == 1 {
+			j++
+			y++
+		}
+		j &= 7                               // octant modulo 2Pi radians (360 degrees)
+		z = ((x - y*PI4A) - y*PI4B) - y*PI4C // Extended precision modular arithmetic
+	}
+
+	if j > 3 {
+		j -= 4
+		sign = !sign
+	}
+	if j > 1 {
+		sign = !sign
+	}
+
+	zz := z * z
+	if j == 1 || j == 2 {
+		y = z + z*zz*((((((_sin[0]*zz)+_sin[1])*zz+_sin[2])*zz+_sin[3])*zz+_sin[4])*zz+_sin[5])
+	} else {
+		y = 1.0 - 0.5*zz + zz*zz*((((((_cos[0]*zz)+_cos[1])*zz+_cos[2])*zz+_cos[3])*zz+_cos[4])*zz+_cos[5])
+	}
+	if sign {
+		y = -y
+	}
+	return y
+}
+
+// Sin returns the sine of the radian argument x.
+//
+// Special cases are:
+//
+//	Sin(±0) = ±0
+//	Sin(±Inf) = NaN
+//	Sin(NaN) = NaN
+func Sin(x float64) float64 {
+	if haveArchSin {
+		return archSin(x)
+	}
+	return sin(x)
+}
+
+func sin(x float64) float64 {
+	const (
+		PI4A = 7.85398125648498535156e-1  // 0x3fe921fb40000000, Pi/4 split into three parts
+		PI4B = 3.77489470793079817668e-8  // 0x3e64442d00000000,
+		PI4C = 2.69515142907905952645e-15 // 0x3ce8469898cc5170,
+	)
+	// special cases
+	switch {
+	case x == 0 || IsNaN(x):
+		return x // return ±0 || NaN()
+	case IsInf(x, 0):
+		return NaN()
+	}
+
+	// make argument positive but save the sign
+	sign := false
+	if x < 0 {
+		x = -x
+		sign = true
+	}
+
+	var j uint64
+	var y, z float64
+	if x >= reduceThreshold {
+		j, z = trigReduce(x)
+	} else {
+		j = uint64(x * (4 / Pi)) // integer part of x/(Pi/4), as integer for tests on the phase angle
+		y = float64(j)           // integer part of x/(Pi/4), as float
+
+		// map zeros to origin
+		if j&1 == 1 {
+			j++
+			y++
+		}
+		j &= 7                               // octant modulo 2Pi radians (360 degrees)
+		z = ((x - y*PI4A) - y*PI4B) - y*PI4C // Extended precision modular arithmetic
+	}
+	// reflect in x axis
+	if j > 3 {
+		sign = !sign
+		j -= 4
+	}
+	zz := z * z
+	if j == 1 || j == 2 {
+		y = 1.0 - 0.5*zz + zz*zz*((((((_cos[0]*zz)+_cos[1])*zz+_cos[2])*zz+_cos[3])*zz+_cos[4])*zz+_cos[5])
+	} else {
+		y = z + z*zz*((((((_sin[0]*zz)+_sin[1])*zz+_sin[2])*zz+_sin[3])*zz+_sin[4])*zz+_sin[5])
+	}
+	if sign {
+		y = -y
+	}
+	return y
+}
diff --git a/src/math/sin_s390x.s b/src/math/sin_s390x.s
new file mode 100644
index 0000000..7eb2206
--- /dev/null
+++ b/src/math/sin_s390x.s
@@ -0,0 +1,356 @@
+// Copyright 2016 The Go Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+#include "textflag.h"
+
+// Various constants
+DATA sincosxnan<>+0(SB)/8, $0x7ff8000000000000
+GLOBL sincosxnan<>+0(SB), RODATA, $8
+DATA sincosxlim<>+0(SB)/8, $0x432921fb54442d19
+GLOBL sincosxlim<>+0(SB), RODATA, $8
+DATA sincosxadd<>+0(SB)/8, $0xc338000000000000
+GLOBL sincosxadd<>+0(SB), RODATA, $8
+DATA sincosxpi2l<>+0(SB)/8, $0.108285667392191389e-31
+GLOBL sincosxpi2l<>+0(SB), RODATA, $8
+DATA sincosxpi2m<>+0(SB)/8, $0.612323399573676480e-16
+GLOBL sincosxpi2m<>+0(SB), RODATA, $8
+DATA sincosxpi2h<>+0(SB)/8, $0.157079632679489656e+01
+GLOBL sincosxpi2h<>+0(SB), RODATA, $8
+DATA sincosrpi2<>+0(SB)/8, $0.636619772367581341e+00
+GLOBL sincosrpi2<>+0(SB), RODATA, $8
+
+// Minimax polynomial approximations
+DATA sincosc0<>+0(SB)/8, $0.100000000000000000E+01
+GLOBL sincosc0<>+0(SB), RODATA, $8
+DATA sincosc1<>+0(SB)/8, $-.499999999999999833E+00
+GLOBL sincosc1<>+0(SB), RODATA, $8
+DATA sincosc2<>+0(SB)/8, $0.416666666666625843E-01
+GLOBL sincosc2<>+0(SB), RODATA, $8
+DATA sincosc3<>+0(SB)/8, $-.138888888885498984E-02
+GLOBL sincosc3<>+0(SB), RODATA, $8
+DATA sincosc4<>+0(SB)/8, $0.248015871681607202E-04
+GLOBL sincosc4<>+0(SB), RODATA, $8
+DATA sincosc5<>+0(SB)/8, $-.275572911309937875E-06
+GLOBL sincosc5<>+0(SB), RODATA, $8
+DATA sincosc6<>+0(SB)/8, $0.208735047247632818E-08
+GLOBL sincosc6<>+0(SB), RODATA, $8
+DATA sincosc7<>+0(SB)/8, $-.112753632738365317E-10
+GLOBL sincosc7<>+0(SB), RODATA, $8
+DATA sincoss0<>+0(SB)/8, $0.100000000000000000E+01
+GLOBL sincoss0<>+0(SB), RODATA, $8
+DATA sincoss1<>+0(SB)/8, $-.166666666666666657E+00
+GLOBL sincoss1<>+0(SB), RODATA, $8
+DATA sincoss2<>+0(SB)/8, $0.833333333333309209E-02
+GLOBL sincoss2<>+0(SB), RODATA, $8
+DATA sincoss3<>+0(SB)/8, $-.198412698410701448E-03
+GLOBL sincoss3<>+0(SB), RODATA, $8
+DATA sincoss4<>+0(SB)/8, $0.275573191453906794E-05
+GLOBL sincoss4<>+0(SB), RODATA, $8
+DATA sincoss5<>+0(SB)/8, $-.250520918387633290E-07
+GLOBL sincoss5<>+0(SB), RODATA, $8
+DATA sincoss6<>+0(SB)/8, $0.160571285514715856E-09
+GLOBL sincoss6<>+0(SB), RODATA, $8
+DATA sincoss7<>+0(SB)/8, $-.753213484933210972E-12
+GLOBL sincoss7<>+0(SB), RODATA, $8
+
+// Sin returns the sine of the radian argument x.
+//
+// Special cases are:
+//      Sin(±0) = ±0
+//      Sin(±Inf) = NaN
+//      Sin(NaN) = NaN
+// The algorithm used is minimax polynomial approximation.
+// with coefficients determined with a Remez exchange algorithm.
+
+TEXT ·sinAsm(SB),NOSPLIT,$0-16
+	FMOVD   x+0(FP), F0
+	//special case Sin(±0) = ±0
+	FMOVD   $(0.0), F1
+	FCMPU   F0, F1
+	BEQ     sinIsZero
+	LTDBR	F0, F0
+	BLTU    L17
+	FMOVD   F0, F5
+L2:
+	MOVD    $sincoss7<>+0(SB), R1
+	FMOVD   0(R1), F4
+	MOVD    $sincoss6<>+0(SB), R1
+	FMOVD   0(R1), F1
+	MOVD    $sincoss5<>+0(SB), R1
+	VLEG    $0, 0(R1), V18
+	MOVD    $sincoss4<>+0(SB), R1
+	FMOVD   0(R1), F6
+	MOVD    $sincoss2<>+0(SB), R1
+	VLEG    $0, 0(R1), V16
+	MOVD    $sincoss3<>+0(SB), R1
+	FMOVD   0(R1), F7
+	MOVD    $sincoss1<>+0(SB), R1
+	FMOVD   0(R1), F3
+	MOVD    $sincoss0<>+0(SB), R1
+	FMOVD   0(R1), F2
+	WFCHDBS V2, V5, V2
+	BEQ     L18
+	MOVD    $sincosrpi2<>+0(SB), R1
+	FMOVD   0(R1), F3
+	MOVD    $sincosxadd<>+0(SB), R1
+	FMOVD   0(R1), F2
+	WFMSDB  V0, V3, V2, V3
+	FMOVD   0(R1), F6
+	FADD    F3, F6
+	MOVD    $sincosxpi2h<>+0(SB), R1
+	FMOVD   0(R1), F2
+	FMSUB   F2, F6, F0
+	MOVD    $sincosxpi2m<>+0(SB), R1
+	FMOVD   0(R1), F4
+	FMADD   F4, F6, F0
+	MOVD    $sincosxpi2l<>+0(SB), R1
+	WFMDB   V0, V0, V1
+	FMOVD   0(R1), F7
+	WFMDB   V1, V1, V2
+	LGDR    F3, R1
+	MOVD    $sincosxlim<>+0(SB), R2
+	TMLL	R1, $1
+	BEQ     L6
+	FMOVD   0(R2), F0
+	WFCHDBS V0, V5, V0
+	BNE     L14
+	MOVD    $sincosc7<>+0(SB), R2
+	FMOVD   0(R2), F0
+	MOVD    $sincosc6<>+0(SB), R2
+	FMOVD   0(R2), F4
+	MOVD    $sincosc5<>+0(SB), R2
+	WFMADB  V1, V0, V4, V0
+	FMOVD   0(R2), F6
+	MOVD    $sincosc4<>+0(SB), R2
+	WFMADB  V1, V0, V6, V0
+	FMOVD   0(R2), F4
+	MOVD    $sincosc2<>+0(SB), R2
+	FMOVD   0(R2), F6
+	WFMADB  V2, V4, V6, V4
+	MOVD    $sincosc3<>+0(SB), R2
+	FMOVD   0(R2), F3
+	MOVD    $sincosc1<>+0(SB), R2
+	WFMADB  V2, V0, V3, V0
+	FMOVD   0(R2), F6
+	WFMADB  V1, V4, V6, V4
+	TMLL	R1, $2
+	WFMADB  V2, V0, V4, V0
+	MOVD    $sincosc0<>+0(SB), R1
+	FMOVD   0(R1), F2
+	WFMADB  V1, V0, V2, V0
+	BNE     L15
+	FMOVD   F0, ret+8(FP)
+	RET
+
+L6:
+	FMOVD   0(R2), F4
+	WFCHDBS V4, V5, V4
+	BNE     L14
+	MOVD    $sincoss7<>+0(SB), R2
+	FMOVD   0(R2), F4
+	MOVD    $sincoss6<>+0(SB), R2
+	FMOVD   0(R2), F3
+	MOVD    $sincoss5<>+0(SB), R2
+	WFMADB  V1, V4, V3, V4
+	WFMADB  V6, V7, V0, V6
+	FMOVD   0(R2), F0
+	MOVD    $sincoss4<>+0(SB), R2
+	FMADD   F4, F1, F0
+	FMOVD   0(R2), F3
+	MOVD    $sincoss2<>+0(SB), R2
+	FMOVD   0(R2), F4
+	MOVD    $sincoss3<>+0(SB), R2
+	WFMADB  V2, V3, V4, V3
+	FMOVD   0(R2), F4
+	MOVD    $sincoss1<>+0(SB), R2
+	WFMADB  V2, V0, V4, V0
+	FMOVD   0(R2), F4
+	WFMADB  V1, V3, V4, V3
+	FNEG    F6, F4
+	WFMADB  V2, V0, V3, V2
+	WFMDB   V4, V1, V0
+	TMLL	R1, $2
+	WFMSDB  V0, V2, V6, V0
+	BNE     L15
+	FMOVD   F0, ret+8(FP)
+	RET
+
+L14:
+	MOVD    $sincosxnan<>+0(SB), R1
+	FMOVD   0(R1), F0
+	FMOVD   F0, ret+8(FP)
+	RET
+
+L18:
+	WFMDB   V0, V0, V2
+	WFMADB  V2, V4, V1, V4
+	WFMDB   V2, V2, V1
+	WFMADB  V2, V4, V18, V4
+	WFMADB  V1, V6, V16, V6
+	WFMADB  V1, V4, V7, V4
+	WFMADB  V2, V6, V3, V6
+	FMUL    F0, F2
+	WFMADB  V1, V4, V6, V4
+	FMADD   F4, F2, F0
+	FMOVD   F0, ret+8(FP)
+	RET
+
+L17:
+	FNEG    F0, F5
+	BR      L2
+L15:
+	FNEG    F0, F0
+	FMOVD   F0, ret+8(FP)
+	RET
+
+
+sinIsZero:
+	FMOVD   F0, ret+8(FP)
+	RET
+
+// Cos returns the cosine of the radian argument.
+//
+// Special cases are:
+//      Cos(±Inf) = NaN
+//      Cos(NaN) = NaN
+// The algorithm used is minimax polynomial approximation.
+// with coefficients determined with a Remez exchange algorithm.
+
+TEXT ·cosAsm(SB),NOSPLIT,$0-16
+	FMOVD   x+0(FP), F0
+	LTDBR	F0, F0
+	BLTU    L35
+	FMOVD   F0, F1
+L21:
+	MOVD    $sincosc7<>+0(SB), R1
+	FMOVD   0(R1), F4
+	MOVD    $sincosc6<>+0(SB), R1
+	VLEG    $0, 0(R1), V20
+	MOVD    $sincosc5<>+0(SB), R1
+	VLEG    $0, 0(R1), V18
+	MOVD    $sincosc4<>+0(SB), R1
+	FMOVD   0(R1), F6
+	MOVD    $sincosc2<>+0(SB), R1
+	VLEG    $0, 0(R1), V16
+	MOVD    $sincosc3<>+0(SB), R1
+	FMOVD   0(R1), F7
+	MOVD    $sincosc1<>+0(SB), R1
+	FMOVD   0(R1), F5
+	MOVD    $sincosrpi2<>+0(SB), R1
+	FMOVD   0(R1), F2
+	MOVD    $sincosxadd<>+0(SB), R1
+	FMOVD   0(R1), F3
+	MOVD    $sincoss0<>+0(SB), R1
+	WFMSDB  V0, V2, V3, V2
+	FMOVD   0(R1), F3
+	WFCHDBS V3, V1, V3
+	LGDR    F2, R1
+	BEQ     L36
+	MOVD    $sincosxadd<>+0(SB), R2
+	FMOVD   0(R2), F4
+	FADD    F2, F4
+	MOVD    $sincosxpi2h<>+0(SB), R2
+	FMOVD   0(R2), F2
+	WFMSDB  V4, V2, V0, V2
+	MOVD    $sincosxpi2m<>+0(SB), R2
+	FMOVD   0(R2), F0
+	WFMADB  V4, V0, V2, V0
+	MOVD    $sincosxpi2l<>+0(SB), R2
+	WFMDB   V0, V0, V2
+	FMOVD   0(R2), F5
+	WFMDB   V2, V2, V6
+	MOVD    $sincosxlim<>+0(SB), R2
+	TMLL	R1, $1
+	BNE     L25
+	FMOVD   0(R2), F0
+	WFCHDBS V0, V1, V0
+	BNE     L33
+	MOVD    $sincosc7<>+0(SB), R2
+	FMOVD   0(R2), F0
+	MOVD    $sincosc6<>+0(SB), R2
+	FMOVD   0(R2), F4
+	MOVD    $sincosc5<>+0(SB), R2
+	WFMADB  V2, V0, V4, V0
+	FMOVD   0(R2), F1
+	MOVD    $sincosc4<>+0(SB), R2
+	WFMADB  V2, V0, V1, V0
+	FMOVD   0(R2), F4
+	MOVD    $sincosc2<>+0(SB), R2
+	FMOVD   0(R2), F1
+	WFMADB  V6, V4, V1, V4
+	MOVD    $sincosc3<>+0(SB), R2
+	FMOVD   0(R2), F3
+	MOVD    $sincosc1<>+0(SB), R2
+	WFMADB  V6, V0, V3, V0
+	FMOVD   0(R2), F1
+	WFMADB  V2, V4, V1, V4
+	TMLL	R1, $2
+	WFMADB  V6, V0, V4, V0
+	MOVD    $sincosc0<>+0(SB), R1
+	FMOVD   0(R1), F4
+	WFMADB  V2, V0, V4, V0
+	BNE     L34
+	FMOVD   F0, ret+8(FP)
+	RET
+
+L25:
+	FMOVD   0(R2), F3
+	WFCHDBS V3, V1, V1
+	BNE     L33
+	MOVD    $sincoss7<>+0(SB), R2
+	FMOVD   0(R2), F1
+	MOVD    $sincoss6<>+0(SB), R2
+	FMOVD   0(R2), F3
+	MOVD    $sincoss5<>+0(SB), R2
+	WFMADB  V2, V1, V3, V1
+	FMOVD   0(R2), F3
+	MOVD    $sincoss4<>+0(SB), R2
+	WFMADB  V2, V1, V3, V1
+	FMOVD   0(R2), F3
+	MOVD    $sincoss2<>+0(SB), R2
+	FMOVD   0(R2), F7
+	WFMADB  V6, V3, V7, V3
+	MOVD    $sincoss3<>+0(SB), R2
+	FMADD   F5, F4, F0
+	FMOVD   0(R2), F4
+	MOVD    $sincoss1<>+0(SB), R2
+	FMADD   F1, F6, F4
+	FMOVD   0(R2), F1
+	FMADD   F3, F2, F1
+	FMUL    F0, F2
+	WFMADB  V6, V4, V1, V6
+	TMLL	R1, $2
+	FMADD   F6, F2, F0
+	BNE     L34
+	FMOVD   F0, ret+8(FP)
+	RET
+
+L33:
+	MOVD    $sincosxnan<>+0(SB), R1
+	FMOVD   0(R1), F0
+	FMOVD   F0, ret+8(FP)
+	RET
+
+L36:
+	FMUL    F0, F0
+	MOVD    $sincosc0<>+0(SB), R1
+	WFMDB   V0, V0, V1
+	WFMADB  V0, V4, V20, V4
+	WFMADB  V1, V6, V16, V6
+	WFMADB  V0, V4, V18, V4
+	WFMADB  V0, V6, V5, V6
+	WFMADB  V1, V4, V7, V4
+	FMOVD   0(R1), F2
+	WFMADB  V1, V4, V6, V4
+	WFMADB  V0, V4, V2, V0
+	FMOVD   F0, ret+8(FP)
+	RET
+
+L35:
+	FNEG    F0, F1
+	BR      L21
+L34:
+	FNEG    F0, F0
+	FMOVD   F0, ret+8(FP)
+	RET
diff --git a/src/math/sincos.go b/src/math/sincos.go
new file mode 100644
index 0000000..e3fb960
--- /dev/null
+++ b/src/math/sincos.go
@@ -0,0 +1,73 @@
+// Copyright 2010 The Go Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+package math
+
+// Coefficients _sin[] and _cos[] are found in pkg/math/sin.go.
+
+// Sincos returns Sin(x), Cos(x).
+//
+// Special cases are:
+//
+//	Sincos(±0) = ±0, 1
+//	Sincos(±Inf) = NaN, NaN
+//	Sincos(NaN) = NaN, NaN
+func Sincos(x float64) (sin, cos float64) {
+	const (
+		PI4A = 7.85398125648498535156e-1  // 0x3fe921fb40000000, Pi/4 split into three parts
+		PI4B = 3.77489470793079817668e-8  // 0x3e64442d00000000,
+		PI4C = 2.69515142907905952645e-15 // 0x3ce8469898cc5170,
+	)
+	// special cases
+	switch {
+	case x == 0:
+		return x, 1 // return ±0.0, 1.0
+	case IsNaN(x) || IsInf(x, 0):
+		return NaN(), NaN()
+	}
+
+	// make argument positive
+	sinSign, cosSign := false, false
+	if x < 0 {
+		x = -x
+		sinSign = true
+	}
+
+	var j uint64
+	var y, z float64
+	if x >= reduceThreshold {
+		j, z = trigReduce(x)
+	} else {
+		j = uint64(x * (4 / Pi)) // integer part of x/(Pi/4), as integer for tests on the phase angle
+		y = float64(j)           // integer part of x/(Pi/4), as float
+
+		if j&1 == 1 { // map zeros to origin
+			j++
+			y++
+		}
+		j &= 7                               // octant modulo 2Pi radians (360 degrees)
+		z = ((x - y*PI4A) - y*PI4B) - y*PI4C // Extended precision modular arithmetic
+	}
+	if j > 3 { // reflect in x axis
+		j -= 4
+		sinSign, cosSign = !sinSign, !cosSign
+	}
+	if j > 1 {
+		cosSign = !cosSign
+	}
+
+	zz := z * z
+	cos = 1.0 - 0.5*zz + zz*zz*((((((_cos[0]*zz)+_cos[1])*zz+_cos[2])*zz+_cos[3])*zz+_cos[4])*zz+_cos[5])
+	sin = z + z*zz*((((((_sin[0]*zz)+_sin[1])*zz+_sin[2])*zz+_sin[3])*zz+_sin[4])*zz+_sin[5])
+	if j == 1 || j == 2 {
+		sin, cos = cos, sin
+	}
+	if cosSign {
+		cos = -cos
+	}
+	if sinSign {
+		sin = -sin
+	}
+	return
+}
diff --git a/src/math/sinh.go b/src/math/sinh.go
new file mode 100644
index 0000000..78b3c29
--- /dev/null
+++ b/src/math/sinh.go
@@ -0,0 +1,93 @@
+// Copyright 2009 The Go Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+package math
+
+/*
+	Floating-point hyperbolic sine and cosine.
+
+	The exponential func is called for arguments
+	greater in magnitude than 0.5.
+
+	A series is used for arguments smaller in magnitude than 0.5.
+
+	Cosh(x) is computed from the exponential func for
+	all arguments.
+*/
+
+// Sinh returns the hyperbolic sine of x.
+//
+// Special cases are:
+//
+//	Sinh(±0) = ±0
+//	Sinh(±Inf) = ±Inf
+//	Sinh(NaN) = NaN
+func Sinh(x float64) float64 {
+	if haveArchSinh {
+		return archSinh(x)
+	}
+	return sinh(x)
+}
+
+func sinh(x float64) float64 {
+	// The coefficients are #2029 from Hart & Cheney. (20.36D)
+	const (
+		P0 = -0.6307673640497716991184787251e+6
+		P1 = -0.8991272022039509355398013511e+5
+		P2 = -0.2894211355989563807284660366e+4
+		P3 = -0.2630563213397497062819489e+2
+		Q0 = -0.6307673640497716991212077277e+6
+		Q1 = 0.1521517378790019070696485176e+5
+		Q2 = -0.173678953558233699533450911e+3
+	)
+
+	sign := false
+	if x < 0 {
+		x = -x
+		sign = true
+	}
+
+	var temp float64
+	switch {
+	case x > 21:
+		temp = Exp(x) * 0.5
+
+	case x > 0.5:
+		ex := Exp(x)
+		temp = (ex - 1/ex) * 0.5
+
+	default:
+		sq := x * x
+		temp = (((P3*sq+P2)*sq+P1)*sq + P0) * x
+		temp = temp / (((sq+Q2)*sq+Q1)*sq + Q0)
+	}
+
+	if sign {
+		temp = -temp
+	}
+	return temp
+}
+
+// Cosh returns the hyperbolic cosine of x.
+//
+// Special cases are:
+//
+//	Cosh(±0) = 1
+//	Cosh(±Inf) = +Inf
+//	Cosh(NaN) = NaN
+func Cosh(x float64) float64 {
+	if haveArchCosh {
+		return archCosh(x)
+	}
+	return cosh(x)
+}
+
+func cosh(x float64) float64 {
+	x = Abs(x)
+	if x > 21 {
+		return Exp(x) * 0.5
+	}
+	ex := Exp(x)
+	return (ex + 1/ex) * 0.5
+}
diff --git a/src/math/sinh_s390x.s b/src/math/sinh_s390x.s
new file mode 100644
index 0000000..d684968
--- /dev/null
+++ b/src/math/sinh_s390x.s
@@ -0,0 +1,251 @@
+// Copyright 2016 The Go Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+
+#include "textflag.h"
+
+// Constants
+DATA sinhrodataL21<>+0(SB)/8, $0.231904681384629956E-16
+DATA sinhrodataL21<>+8(SB)/8, $0.693147180559945286E+00
+DATA sinhrodataL21<>+16(SB)/8, $704.E0
+GLOBL sinhrodataL21<>+0(SB), RODATA, $24
+DATA sinhrlog2<>+0(SB)/8, $0x3ff7154760000000
+GLOBL sinhrlog2<>+0(SB), RODATA, $8
+DATA sinhxinf<>+0(SB)/8, $0x7ff0000000000000
+GLOBL sinhxinf<>+0(SB), RODATA, $8
+DATA sinhxinit<>+0(SB)/8, $0x3ffb504f333f9de6
+GLOBL sinhxinit<>+0(SB), RODATA, $8
+DATA sinhxlim1<>+0(SB)/8, $800.E0
+GLOBL sinhxlim1<>+0(SB), RODATA, $8
+DATA sinhxadd<>+0(SB)/8, $0xc3200001610007fb
+GLOBL sinhxadd<>+0(SB), RODATA, $8
+DATA sinhx4ff<>+0(SB)/8, $0x4ff0000000000000
+GLOBL sinhx4ff<>+0(SB), RODATA, $8
+
+// Minimax polynomial approximations
+DATA sinhe0<>+0(SB)/8, $0.11715728752538099300E+01
+GLOBL sinhe0<>+0(SB), RODATA, $8
+DATA sinhe1<>+0(SB)/8, $0.11715728752538099300E+01
+GLOBL sinhe1<>+0(SB), RODATA, $8
+DATA sinhe2<>+0(SB)/8, $0.58578643762688526692E+00
+GLOBL sinhe2<>+0(SB), RODATA, $8
+DATA sinhe3<>+0(SB)/8, $0.19526214587563004497E+00
+GLOBL sinhe3<>+0(SB), RODATA, $8
+DATA sinhe4<>+0(SB)/8, $0.48815536475176217404E-01
+GLOBL sinhe4<>+0(SB), RODATA, $8
+DATA sinhe5<>+0(SB)/8, $0.97631072948627397816E-02
+GLOBL sinhe5<>+0(SB), RODATA, $8
+DATA sinhe6<>+0(SB)/8, $0.16271839297756073153E-02
+GLOBL sinhe6<>+0(SB), RODATA, $8
+DATA sinhe7<>+0(SB)/8, $0.23245485387271142509E-03
+GLOBL sinhe7<>+0(SB), RODATA, $8
+DATA sinhe8<>+0(SB)/8, $0.29080955860869629131E-04
+GLOBL sinhe8<>+0(SB), RODATA, $8
+DATA sinhe9<>+0(SB)/8, $0.32311267157667725278E-05
+GLOBL sinhe9<>+0(SB), RODATA, $8
+
+// Sinh returns the hyperbolic sine of the argument.
+//
+// Special cases are:
+//      Sinh(±0) = ±0
+//      Sinh(±Inf) = ±Inf
+//      Sinh(NaN) = NaN
+// The algorithm used is minimax polynomial approximation
+// with coefficients determined with a Remez exchange algorithm.
+
+TEXT ·sinhAsm(SB),NOSPLIT,$0-16
+	FMOVD   x+0(FP), F0
+	//special case Sinh(±0) = ±0
+	FMOVD   $(0.0), F1
+	FCMPU   F0, F1
+	BEQ     sinhIsZero
+	//special case Sinh(±Inf) = ±Inf
+	FMOVD   $1.797693134862315708145274237317043567981e+308, F1
+	FCMPU   F1, F0
+	BLEU    sinhIsInf
+	FMOVD   $-1.797693134862315708145274237317043567981e+308, F1
+	FCMPU   F1, F0
+	BGT             sinhIsInf
+
+	MOVD    $sinhrodataL21<>+0(SB), R5
+	LTDBR	F0, F0
+	MOVD    sinhxinit<>+0(SB), R1
+	FMOVD   F0, F4
+	MOVD    R1, R3
+	BLTU    L19
+	FMOVD   F0, F2
+L2:
+	WORD    $0xED205010     //cdb %f2,.L22-.L21(%r5)
+	BYTE    $0x00
+	BYTE    $0x19
+	BGE     L15     //jnl   .L15
+	BVS     L15
+	WFCEDBS V2, V2, V0
+	BEQ     L20
+L12:
+	FMOVD   F4, F0
+	FMOVD   F0, ret+8(FP)
+	RET
+
+L15:
+	WFCEDBS V2, V2, V0
+	BVS     L12
+	MOVD    $sinhxlim1<>+0(SB), R2
+	FMOVD   0(R2), F0
+	WFCHDBS V0, V2, V0
+	BEQ     L6
+	WFCHEDBS        V4, V2, V6
+	MOVD    $sinhxinf<>+0(SB), R1
+	FMOVD   0(R1), F0
+	BNE     LEXITTAGsinh
+	WFCHDBS V2, V4, V2
+	BNE     L16
+	FNEG    F0, F0
+	FMOVD   F0, ret+8(FP)
+	RET
+
+L19:
+	FNEG    F0, F2
+	BR      L2
+L6:
+	MOVD    $sinhxadd<>+0(SB), R2
+	FMOVD   0(R2), F0
+	MOVD    sinhrlog2<>+0(SB), R2
+	LDGR    R2, F6
+	WFMSDB  V4, V6, V0, V16
+	FMOVD   sinhrodataL21<>+8(SB), F6
+	WFADB   V0, V16, V0
+	FMOVD   sinhrodataL21<>+0(SB), F3
+	WFMSDB  V0, V6, V4, V6
+	MOVD    $sinhe9<>+0(SB), R2
+	WFMADB  V0, V3, V6, V0
+	FMOVD   0(R2), F1
+	MOVD    $sinhe7<>+0(SB), R2
+	WFMDB   V0, V0, V6
+	FMOVD   0(R2), F5
+	MOVD    $sinhe8<>+0(SB), R2
+	FMOVD   0(R2), F3
+	MOVD    $sinhe6<>+0(SB), R2
+	WFMADB  V6, V1, V5, V1
+	FMOVD   0(R2), F5
+	MOVD    $sinhe5<>+0(SB), R2
+	FMOVD   0(R2), F7
+	MOVD    $sinhe3<>+0(SB), R2
+	WFMADB  V6, V3, V5, V3
+	FMOVD   0(R2), F5
+	MOVD    $sinhe4<>+0(SB), R2
+	WFMADB  V6, V7, V5, V7
+	FMOVD   0(R2), F5
+	MOVD    $sinhe2<>+0(SB), R2
+	VLEG    $0, 0(R2), V20
+	WFMDB   V6, V6, V18
+	WFMADB  V6, V5, V20, V5
+	WFMADB  V1, V18, V7, V1
+	FNEG    F0, F0
+	WFMADB  V3, V18, V5, V3
+	MOVD    $sinhe1<>+0(SB), R3
+	WFCEDBS V2, V4, V2
+	FMOVD   0(R3), F5
+	MOVD    $sinhe0<>+0(SB), R3
+	WFMADB  V6, V1, V5, V1
+	FMOVD   0(R3), F5
+	VLGVG   $0, V16, R2
+	WFMADB  V6, V3, V5, V6
+	RLL     $3, R2, R2
+	RISBGN	$0, $15, $48, R2, R1
+	BEQ     L9
+	WFMSDB  V0, V1, V6, V0
+	MOVD    $sinhx4ff<>+0(SB), R3
+	FNEG    F0, F0
+	FMOVD   0(R3), F2
+	FMUL    F2, F0
+	ANDW    $0xFFFF, R2
+	WORD    $0xA53FEFB6     //llill %r3,61366
+	SUBW    R2, R3, R2
+	RISBGN	$0, $15, $48, R2, R1
+	LDGR    R1, F2
+	FMUL    F2, F0
+	FMOVD   F0, ret+8(FP)
+	RET
+
+L20:
+	MOVD    $sinhxadd<>+0(SB), R2
+	FMOVD   0(R2), F2
+	MOVD    sinhrlog2<>+0(SB), R2
+	LDGR    R2, F0
+	WFMSDB  V4, V0, V2, V6
+	FMOVD   sinhrodataL21<>+8(SB), F0
+	FADD    F6, F2
+	MOVD    $sinhe9<>+0(SB), R2
+	FMSUB   F0, F2, F4
+	FMOVD   0(R2), F1
+	FMOVD   sinhrodataL21<>+0(SB), F3
+	MOVD    $sinhe7<>+0(SB), R2
+	FMADD   F3, F2, F4
+	FMOVD   0(R2), F0
+	MOVD    $sinhe8<>+0(SB), R2
+	WFMDB   V4, V4, V2
+	FMOVD   0(R2), F3
+	MOVD    $sinhe6<>+0(SB), R2
+	FMOVD   0(R2), F5
+	LGDR    F6, R2
+	RLL     $3, R2, R2
+	RISBGN	$0, $15, $48, R2, R1
+	WFMADB  V2, V1, V0, V1
+	LDGR    R1, F0
+	MOVD    $sinhe5<>+0(SB), R1
+	WFMADB  V2, V3, V5, V3
+	FMOVD   0(R1), F5
+	MOVD    $sinhe3<>+0(SB), R1
+	FMOVD   0(R1), F6
+	WFMDB   V2, V2, V7
+	WFMADB  V2, V5, V6, V5
+	WORD    $0xA7487FB6     //lhi %r4,32694
+	FNEG    F4, F4
+	ANDW    $0xFFFF, R2
+	SUBW    R2, R4, R2
+	RISBGN	$0, $15, $48, R2, R3
+	LDGR    R3, F6
+	WFADB   V0, V6, V16
+	MOVD    $sinhe4<>+0(SB), R1
+	WFMADB  V1, V7, V5, V1
+	WFMDB   V4, V16, V4
+	FMOVD   0(R1), F5
+	MOVD    $sinhe2<>+0(SB), R1
+	VLEG    $0, 0(R1), V16
+	MOVD    $sinhe1<>+0(SB), R1
+	WFMADB  V2, V5, V16, V5
+	VLEG    $0, 0(R1), V16
+	WFMADB  V3, V7, V5, V3
+	WFMADB  V2, V1, V16, V1
+	FSUB    F6, F0
+	FMUL    F1, F4
+	MOVD    $sinhe0<>+0(SB), R1
+	FMOVD   0(R1), F6
+	WFMADB  V2, V3, V6, V2
+	WFMADB  V0, V2, V4, V0
+	FMOVD   F0, ret+8(FP)
+	RET
+
+L9:
+	WFMADB  V0, V1, V6, V0
+	MOVD    $sinhx4ff<>+0(SB), R3
+	FMOVD   0(R3), F2
+	FMUL    F2, F0
+	WORD    $0xA72AF000     //ahi   %r2,-4096
+	RISBGN	$0, $15, $48, R2, R1
+	LDGR    R1, F2
+	FMUL    F2, F0
+	FMOVD   F0, ret+8(FP)
+	RET
+
+L16:
+	FMOVD   F0, ret+8(FP)
+	RET
+
+LEXITTAGsinh:
+sinhIsInf:
+sinhIsZero:
+	FMOVD   F0, ret+8(FP)
+	RET
diff --git a/src/math/sqrt.go b/src/math/sqrt.go
new file mode 100644
index 0000000..54929eb
--- /dev/null
+++ b/src/math/sqrt.go
@@ -0,0 +1,145 @@
+// Copyright 2009 The Go Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+package math
+
+// The original C code and the long comment below are
+// from FreeBSD's /usr/src/lib/msun/src/e_sqrt.c and
+// came with this notice. The go code is a simplified
+// version of the original C.
+//
+// ====================================================
+// Copyright (C) 1993 by Sun Microsystems, Inc. All rights reserved.
+//
+// Developed at SunPro, a Sun Microsystems, Inc. business.
+// Permission to use, copy, modify, and distribute this
+// software is freely granted, provided that this notice
+// is preserved.
+// ====================================================
+//
+// __ieee754_sqrt(x)
+// Return correctly rounded sqrt.
+//           -----------------------------------------
+//           | Use the hardware sqrt if you have one |
+//           -----------------------------------------
+// Method:
+//   Bit by bit method using integer arithmetic. (Slow, but portable)
+//   1. Normalization
+//      Scale x to y in [1,4) with even powers of 2:
+//      find an integer k such that  1 <= (y=x*2**(2k)) < 4, then
+//              sqrt(x) = 2**k * sqrt(y)
+//   2. Bit by bit computation
+//      Let q  = sqrt(y) truncated to i bit after binary point (q = 1),
+//           i                                                   0
+//                                     i+1         2
+//          s  = 2*q , and      y  =  2   * ( y - q  ).          (1)
+//           i      i            i                 i
+//
+//      To compute q    from q , one checks whether
+//                  i+1       i
+//
+//                            -(i+1) 2
+//                      (q + 2      )  <= y.                     (2)
+//                        i
+//                                                            -(i+1)
+//      If (2) is false, then q   = q ; otherwise q   = q  + 2      .
+//                             i+1   i             i+1   i
+//
+//      With some algebraic manipulation, it is not difficult to see
+//      that (2) is equivalent to
+//                             -(i+1)
+//                      s  +  2       <= y                       (3)
+//                       i                i
+//
+//      The advantage of (3) is that s  and y  can be computed by
+//                                    i      i
+//      the following recurrence formula:
+//          if (3) is false
+//
+//          s     =  s  ,       y    = y   ;                     (4)
+//           i+1      i          i+1    i
+//
+//      otherwise,
+//                         -i                      -(i+1)
+//          s     =  s  + 2  ,  y    = y  -  s  - 2              (5)
+//           i+1      i          i+1    i     i
+//
+//      One may easily use induction to prove (4) and (5).
+//      Note. Since the left hand side of (3) contain only i+2 bits,
+//            it is not necessary to do a full (53-bit) comparison
+//            in (3).
+//   3. Final rounding
+//      After generating the 53 bits result, we compute one more bit.
+//      Together with the remainder, we can decide whether the
+//      result is exact, bigger than 1/2ulp, or less than 1/2ulp
+//      (it will never equal to 1/2ulp).
+//      The rounding mode can be detected by checking whether
+//      huge + tiny is equal to huge, and whether huge - tiny is
+//      equal to huge for some floating point number "huge" and "tiny".
+//
+//
+// Notes:  Rounding mode detection omitted. The constants "mask", "shift",
+// and "bias" are found in src/math/bits.go
+
+// Sqrt returns the square root of x.
+//
+// Special cases are:
+//
+//	Sqrt(+Inf) = +Inf
+//	Sqrt(±0) = ±0
+//	Sqrt(x < 0) = NaN
+//	Sqrt(NaN) = NaN
+func Sqrt(x float64) float64 {
+	return sqrt(x)
+}
+
+// Note: On systems where Sqrt is a single instruction, the compiler
+// may turn a direct call into a direct use of that instruction instead.
+
+func sqrt(x float64) float64 {
+	// special cases
+	switch {
+	case x == 0 || IsNaN(x) || IsInf(x, 1):
+		return x
+	case x < 0:
+		return NaN()
+	}
+	ix := Float64bits(x)
+	// normalize x
+	exp := int((ix >> shift) & mask)
+	if exp == 0 { // subnormal x
+		for ix&(1<<shift) == 0 {
+			ix <<= 1
+			exp--
+		}
+		exp++
+	}
+	exp -= bias // unbias exponent
+	ix &^= mask << shift
+	ix |= 1 << shift
+	if exp&1 == 1 { // odd exp, double x to make it even
+		ix <<= 1
+	}
+	exp >>= 1 // exp = exp/2, exponent of square root
+	// generate sqrt(x) bit by bit
+	ix <<= 1
+	var q, s uint64               // q = sqrt(x)
+	r := uint64(1 << (shift + 1)) // r = moving bit from MSB to LSB
+	for r != 0 {
+		t := s + r
+		if t <= ix {
+			s = t + r
+			ix -= t
+			q += r
+		}
+		ix <<= 1
+		r >>= 1
+	}
+	// final rounding
+	if ix != 0 { // remainder, result not exact
+		q += q & 1 // round according to extra bit
+	}
+	ix = q>>1 + uint64(exp-1+bias)<<shift // significand + biased exponent
+	return Float64frombits(ix)
+}
diff --git a/src/math/stubs.go b/src/math/stubs.go
new file mode 100644
index 0000000..c4350d4
--- /dev/null
+++ b/src/math/stubs.go
@@ -0,0 +1,160 @@
+// Copyright 2021 The Go Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+//go:build !s390x
+
+// This is a large group of functions that most architectures don't
+// implement in assembly.
+
+package math
+
+const haveArchAcos = false
+
+func archAcos(x float64) float64 {
+	panic("not implemented")
+}
+
+const haveArchAcosh = false
+
+func archAcosh(x float64) float64 {
+	panic("not implemented")
+}
+
+const haveArchAsin = false
+
+func archAsin(x float64) float64 {
+	panic("not implemented")
+}
+
+const haveArchAsinh = false
+
+func archAsinh(x float64) float64 {
+	panic("not implemented")
+}
+
+const haveArchAtan = false
+
+func archAtan(x float64) float64 {
+	panic("not implemented")
+}
+
+const haveArchAtan2 = false
+
+func archAtan2(y, x float64) float64 {
+	panic("not implemented")
+}
+
+const haveArchAtanh = false
+
+func archAtanh(x float64) float64 {
+	panic("not implemented")
+}
+
+const haveArchCbrt = false
+
+func archCbrt(x float64) float64 {
+	panic("not implemented")
+}
+
+const haveArchCos = false
+
+func archCos(x float64) float64 {
+	panic("not implemented")
+}
+
+const haveArchCosh = false
+
+func archCosh(x float64) float64 {
+	panic("not implemented")
+}
+
+const haveArchErf = false
+
+func archErf(x float64) float64 {
+	panic("not implemented")
+}
+
+const haveArchErfc = false
+
+func archErfc(x float64) float64 {
+	panic("not implemented")
+}
+
+const haveArchExpm1 = false
+
+func archExpm1(x float64) float64 {
+	panic("not implemented")
+}
+
+const haveArchFrexp = false
+
+func archFrexp(x float64) (float64, int) {
+	panic("not implemented")
+}
+
+const haveArchLdexp = false
+
+func archLdexp(frac float64, exp int) float64 {
+	panic("not implemented")
+}
+
+const haveArchLog10 = false
+
+func archLog10(x float64) float64 {
+	panic("not implemented")
+}
+
+const haveArchLog2 = false
+
+func archLog2(x float64) float64 {
+	panic("not implemented")
+}
+
+const haveArchLog1p = false
+
+func archLog1p(x float64) float64 {
+	panic("not implemented")
+}
+
+const haveArchMod = false
+
+func archMod(x, y float64) float64 {
+	panic("not implemented")
+}
+
+const haveArchPow = false
+
+func archPow(x, y float64) float64 {
+	panic("not implemented")
+}
+
+const haveArchRemainder = false
+
+func archRemainder(x, y float64) float64 {
+	panic("not implemented")
+}
+
+const haveArchSin = false
+
+func archSin(x float64) float64 {
+	panic("not implemented")
+}
+
+const haveArchSinh = false
+
+func archSinh(x float64) float64 {
+	panic("not implemented")
+}
+
+const haveArchTan = false
+
+func archTan(x float64) float64 {
+	panic("not implemented")
+}
+
+const haveArchTanh = false
+
+func archTanh(x float64) float64 {
+	panic("not implemented")
+}
diff --git a/src/math/stubs_s390x.s b/src/math/stubs_s390x.s
new file mode 100644
index 0000000..7400179
--- /dev/null
+++ b/src/math/stubs_s390x.s
@@ -0,0 +1,468 @@
+// Copyright 2016 The Go Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+#include "textflag.h"
+
+TEXT ·archLog10(SB), NOSPLIT, $0
+	MOVD ·log10vectorfacility+0x00(SB), R1
+	BR   (R1)
+
+TEXT ·log10TrampolineSetup(SB), NOSPLIT, $0
+	MOVB   ·hasVX(SB), R1
+	CMPBEQ R1, $1, vectorimpl                 // vectorfacility = 1, vector supported
+	MOVD   $·log10vectorfacility+0x00(SB), R1
+	MOVD   $·log10(SB), R2
+	MOVD   R2, 0(R1)
+	BR     ·log10(SB)
+
+vectorimpl:
+	MOVD $·log10vectorfacility+0x00(SB), R1
+	MOVD $·log10Asm(SB), R2
+	MOVD R2, 0(R1)
+	BR   ·log10Asm(SB)
+
+GLOBL ·log10vectorfacility+0x00(SB), NOPTR, $8
+DATA ·log10vectorfacility+0x00(SB)/8, $·log10TrampolineSetup(SB)
+
+TEXT ·archCos(SB), NOSPLIT, $0
+	MOVD ·cosvectorfacility+0x00(SB), R1
+	BR   (R1)
+
+TEXT ·cosTrampolineSetup(SB), NOSPLIT, $0
+	MOVB   ·hasVX(SB), R1
+	CMPBEQ R1, $1, vectorimpl               // vectorfacility = 1, vector supported
+	MOVD   $·cosvectorfacility+0x00(SB), R1
+	MOVD   $·cos(SB), R2
+	MOVD   R2, 0(R1)
+	BR     ·cos(SB)
+
+vectorimpl:
+	MOVD $·cosvectorfacility+0x00(SB), R1
+	MOVD $·cosAsm(SB), R2
+	MOVD R2, 0(R1)
+	BR   ·cosAsm(SB)
+
+GLOBL ·cosvectorfacility+0x00(SB), NOPTR, $8
+DATA ·cosvectorfacility+0x00(SB)/8, $·cosTrampolineSetup(SB)
+
+TEXT ·archCosh(SB), NOSPLIT, $0
+	MOVD ·coshvectorfacility+0x00(SB), R1
+	BR   (R1)
+
+TEXT ·coshTrampolineSetup(SB), NOSPLIT, $0
+	MOVB   ·hasVX(SB), R1
+	CMPBEQ R1, $1, vectorimpl                // vectorfacility = 1, vector supported
+	MOVD   $·coshvectorfacility+0x00(SB), R1
+	MOVD   $·cosh(SB), R2
+	MOVD   R2, 0(R1)
+	BR     ·cosh(SB)
+
+vectorimpl:
+	MOVD $·coshvectorfacility+0x00(SB), R1
+	MOVD $·coshAsm(SB), R2
+	MOVD R2, 0(R1)
+	BR   ·coshAsm(SB)
+
+GLOBL ·coshvectorfacility+0x00(SB), NOPTR, $8
+DATA ·coshvectorfacility+0x00(SB)/8, $·coshTrampolineSetup(SB)
+
+TEXT ·archSin(SB), NOSPLIT, $0
+	MOVD ·sinvectorfacility+0x00(SB), R1
+	BR   (R1)
+
+TEXT ·sinTrampolineSetup(SB), NOSPLIT, $0
+	MOVB   ·hasVX(SB), R1
+	CMPBEQ R1, $1, vectorimpl               // vectorfacility = 1, vector supported
+	MOVD   $·sinvectorfacility+0x00(SB), R1
+	MOVD   $·sin(SB), R2
+	MOVD   R2, 0(R1)
+	BR     ·sin(SB)
+
+vectorimpl:
+	MOVD $·sinvectorfacility+0x00(SB), R1
+	MOVD $·sinAsm(SB), R2
+	MOVD R2, 0(R1)
+	BR   ·sinAsm(SB)
+
+GLOBL ·sinvectorfacility+0x00(SB), NOPTR, $8
+DATA ·sinvectorfacility+0x00(SB)/8, $·sinTrampolineSetup(SB)
+
+TEXT ·archSinh(SB), NOSPLIT, $0
+	MOVD ·sinhvectorfacility+0x00(SB), R1
+	BR   (R1)
+
+TEXT ·sinhTrampolineSetup(SB), NOSPLIT, $0
+	MOVB   ·hasVX(SB), R1
+	CMPBEQ R1, $1, vectorimpl                // vectorfacility = 1, vector supported
+	MOVD   $·sinhvectorfacility+0x00(SB), R1
+	MOVD   $·sinh(SB), R2
+	MOVD   R2, 0(R1)
+	BR     ·sinh(SB)
+
+vectorimpl:
+	MOVD $·sinhvectorfacility+0x00(SB), R1
+	MOVD $·sinhAsm(SB), R2
+	MOVD R2, 0(R1)
+	BR   ·sinhAsm(SB)
+
+GLOBL ·sinhvectorfacility+0x00(SB), NOPTR, $8
+DATA ·sinhvectorfacility+0x00(SB)/8, $·sinhTrampolineSetup(SB)
+
+TEXT ·archTanh(SB), NOSPLIT, $0
+	MOVD ·tanhvectorfacility+0x00(SB), R1
+	BR   (R1)
+
+TEXT ·tanhTrampolineSetup(SB), NOSPLIT, $0
+	MOVB   ·hasVX(SB), R1
+	CMPBEQ R1, $1, vectorimpl                // vectorfacility = 1, vector supported
+	MOVD   $·tanhvectorfacility+0x00(SB), R1
+	MOVD   $·tanh(SB), R2
+	MOVD   R2, 0(R1)
+	BR     ·tanh(SB)
+
+vectorimpl:
+	MOVD $·tanhvectorfacility+0x00(SB), R1
+	MOVD $·tanhAsm(SB), R2
+	MOVD R2, 0(R1)
+	BR   ·tanhAsm(SB)
+
+GLOBL ·tanhvectorfacility+0x00(SB), NOPTR, $8
+DATA ·tanhvectorfacility+0x00(SB)/8, $·tanhTrampolineSetup(SB)
+
+TEXT ·archLog1p(SB), NOSPLIT, $0
+	MOVD ·log1pvectorfacility+0x00(SB), R1
+	BR   (R1)
+
+TEXT ·log1pTrampolineSetup(SB), NOSPLIT, $0
+	MOVB   ·hasVX(SB), R1
+	CMPBEQ R1, $1, vectorimpl                 // vectorfacility = 1, vector supported
+	MOVD   $·log1pvectorfacility+0x00(SB), R1
+	MOVD   $·log1p(SB), R2
+	MOVD   R2, 0(R1)
+	BR     ·log1p(SB)
+
+vectorimpl:
+	MOVD $·log1pvectorfacility+0x00(SB), R1
+	MOVD $·log1pAsm(SB), R2
+	MOVD R2, 0(R1)
+	BR   ·log1pAsm(SB)
+
+GLOBL ·log1pvectorfacility+0x00(SB), NOPTR, $8
+DATA ·log1pvectorfacility+0x00(SB)/8, $·log1pTrampolineSetup(SB)
+
+TEXT ·archAtanh(SB), NOSPLIT, $0
+	MOVD ·atanhvectorfacility+0x00(SB), R1
+	BR   (R1)
+
+TEXT ·atanhTrampolineSetup(SB), NOSPLIT, $0
+	MOVB   ·hasVX(SB), R1
+	CMPBEQ R1, $1, vectorimpl                 // vectorfacility = 1, vector supported
+	MOVD   $·atanhvectorfacility+0x00(SB), R1
+	MOVD   $·atanh(SB), R2
+	MOVD   R2, 0(R1)
+	BR     ·atanh(SB)
+
+vectorimpl:
+	MOVD $·atanhvectorfacility+0x00(SB), R1
+	MOVD $·atanhAsm(SB), R2
+	MOVD R2, 0(R1)
+	BR   ·atanhAsm(SB)
+
+GLOBL ·atanhvectorfacility+0x00(SB), NOPTR, $8
+DATA ·atanhvectorfacility+0x00(SB)/8, $·atanhTrampolineSetup(SB)
+
+TEXT ·archAcos(SB), NOSPLIT, $0
+	MOVD ·acosvectorfacility+0x00(SB), R1
+	BR   (R1)
+
+TEXT ·acosTrampolineSetup(SB), NOSPLIT, $0
+	MOVB   ·hasVX(SB), R1
+	CMPBEQ R1, $1, vectorimpl                // vectorfacility = 1, vector supported
+	MOVD   $·acosvectorfacility+0x00(SB), R1
+	MOVD   $·acos(SB), R2
+	MOVD   R2, 0(R1)
+	BR     ·acos(SB)
+
+vectorimpl:
+	MOVD $·acosvectorfacility+0x00(SB), R1
+	MOVD $·acosAsm(SB), R2
+	MOVD R2, 0(R1)
+	BR   ·acosAsm(SB)
+
+GLOBL ·acosvectorfacility+0x00(SB), NOPTR, $8
+DATA ·acosvectorfacility+0x00(SB)/8, $·acosTrampolineSetup(SB)
+
+TEXT ·archAsin(SB), NOSPLIT, $0
+	MOVD ·asinvectorfacility+0x00(SB), R1
+	BR   (R1)
+
+TEXT ·asinTrampolineSetup(SB), NOSPLIT, $0
+	MOVB   ·hasVX(SB), R1
+	CMPBEQ R1, $1, vectorimpl                // vectorfacility = 1, vector supported
+	MOVD   $·asinvectorfacility+0x00(SB), R1
+	MOVD   $·asin(SB), R2
+	MOVD   R2, 0(R1)
+	BR     ·asin(SB)
+
+vectorimpl:
+	MOVD $·asinvectorfacility+0x00(SB), R1
+	MOVD $·asinAsm(SB), R2
+	MOVD R2, 0(R1)
+	BR   ·asinAsm(SB)
+
+GLOBL ·asinvectorfacility+0x00(SB), NOPTR, $8
+DATA ·asinvectorfacility+0x00(SB)/8, $·asinTrampolineSetup(SB)
+
+TEXT ·archAsinh(SB), NOSPLIT, $0
+	MOVD ·asinhvectorfacility+0x00(SB), R1
+	BR   (R1)
+
+TEXT ·asinhTrampolineSetup(SB), NOSPLIT, $0
+	MOVB   ·hasVX(SB), R1
+	CMPBEQ R1, $1, vectorimpl                 // vectorfacility = 1, vector supported
+	MOVD   $·asinhvectorfacility+0x00(SB), R1
+	MOVD   $·asinh(SB), R2
+	MOVD   R2, 0(R1)
+	BR     ·asinh(SB)
+
+vectorimpl:
+	MOVD $·asinhvectorfacility+0x00(SB), R1
+	MOVD $·asinhAsm(SB), R2
+	MOVD R2, 0(R1)
+	BR   ·asinhAsm(SB)
+
+GLOBL ·asinhvectorfacility+0x00(SB), NOPTR, $8
+DATA ·asinhvectorfacility+0x00(SB)/8, $·asinhTrampolineSetup(SB)
+
+TEXT ·archAcosh(SB), NOSPLIT, $0
+	MOVD ·acoshvectorfacility+0x00(SB), R1
+	BR   (R1)
+
+TEXT ·acoshTrampolineSetup(SB), NOSPLIT, $0
+	MOVB   ·hasVX(SB), R1
+	CMPBEQ R1, $1, vectorimpl                 // vectorfacility = 1, vector supported
+	MOVD   $·acoshvectorfacility+0x00(SB), R1
+	MOVD   $·acosh(SB), R2
+	MOVD   R2, 0(R1)
+	BR     ·acosh(SB)
+
+vectorimpl:
+	MOVD $·acoshvectorfacility+0x00(SB), R1
+	MOVD $·acoshAsm(SB), R2
+	MOVD R2, 0(R1)
+	BR   ·acoshAsm(SB)
+
+GLOBL ·acoshvectorfacility+0x00(SB), NOPTR, $8
+DATA ·acoshvectorfacility+0x00(SB)/8, $·acoshTrampolineSetup(SB)
+
+TEXT ·archErf(SB), NOSPLIT, $0
+	MOVD ·erfvectorfacility+0x00(SB), R1
+	BR   (R1)
+
+TEXT ·erfTrampolineSetup(SB), NOSPLIT, $0
+	MOVB   ·hasVX(SB), R1
+	CMPBEQ R1, $1, vectorimpl               // vectorfacility = 1, vector supported
+	MOVD   $·erfvectorfacility+0x00(SB), R1
+	MOVD   $·erf(SB), R2
+	MOVD   R2, 0(R1)
+	BR     ·erf(SB)
+
+vectorimpl:
+	MOVD $·erfvectorfacility+0x00(SB), R1
+	MOVD $·erfAsm(SB), R2
+	MOVD R2, 0(R1)
+	BR   ·erfAsm(SB)
+
+GLOBL ·erfvectorfacility+0x00(SB), NOPTR, $8
+DATA ·erfvectorfacility+0x00(SB)/8, $·erfTrampolineSetup(SB)
+
+TEXT ·archErfc(SB), NOSPLIT, $0
+	MOVD ·erfcvectorfacility+0x00(SB), R1
+	BR   (R1)
+
+TEXT ·erfcTrampolineSetup(SB), NOSPLIT, $0
+	MOVB   ·hasVX(SB), R1
+	CMPBEQ R1, $1, vectorimpl                // vectorfacility = 1, vector supported
+	MOVD   $·erfcvectorfacility+0x00(SB), R1
+	MOVD   $·erfc(SB), R2
+	MOVD   R2, 0(R1)
+	BR     ·erfc(SB)
+
+vectorimpl:
+	MOVD $·erfcvectorfacility+0x00(SB), R1
+	MOVD $·erfcAsm(SB), R2
+	MOVD R2, 0(R1)
+	BR   ·erfcAsm(SB)
+
+GLOBL ·erfcvectorfacility+0x00(SB), NOPTR, $8
+DATA ·erfcvectorfacility+0x00(SB)/8, $·erfcTrampolineSetup(SB)
+
+TEXT ·archAtan(SB), NOSPLIT, $0
+	MOVD ·atanvectorfacility+0x00(SB), R1
+	BR   (R1)
+
+TEXT ·atanTrampolineSetup(SB), NOSPLIT, $0
+	MOVB   ·hasVX(SB), R1
+	CMPBEQ R1, $1, vectorimpl                // vectorfacility = 1, vector supported
+	MOVD   $·atanvectorfacility+0x00(SB), R1
+	MOVD   $·atan(SB), R2
+	MOVD   R2, 0(R1)
+	BR     ·atan(SB)
+
+vectorimpl:
+	MOVD $·atanvectorfacility+0x00(SB), R1
+	MOVD $·atanAsm(SB), R2
+	MOVD R2, 0(R1)
+	BR   ·atanAsm(SB)
+
+GLOBL ·atanvectorfacility+0x00(SB), NOPTR, $8
+DATA ·atanvectorfacility+0x00(SB)/8, $·atanTrampolineSetup(SB)
+
+TEXT ·archAtan2(SB), NOSPLIT, $0
+	MOVD ·atan2vectorfacility+0x00(SB), R1
+	BR   (R1)
+
+TEXT ·atan2TrampolineSetup(SB), NOSPLIT, $0
+	MOVB   ·hasVX(SB), R1
+	CMPBEQ R1, $1, vectorimpl                 // vectorfacility = 1, vector supported
+	MOVD   $·atan2vectorfacility+0x00(SB), R1
+	MOVD   $·atan2(SB), R2
+	MOVD   R2, 0(R1)
+	BR     ·atan2(SB)
+
+vectorimpl:
+	MOVD $·atan2vectorfacility+0x00(SB), R1
+	MOVD $·atan2Asm(SB), R2
+	MOVD R2, 0(R1)
+	BR   ·atan2Asm(SB)
+
+GLOBL ·atan2vectorfacility+0x00(SB), NOPTR, $8
+DATA ·atan2vectorfacility+0x00(SB)/8, $·atan2TrampolineSetup(SB)
+
+TEXT ·archCbrt(SB), NOSPLIT, $0
+	MOVD ·cbrtvectorfacility+0x00(SB), R1
+	BR   (R1)
+
+TEXT ·cbrtTrampolineSetup(SB), NOSPLIT, $0
+	MOVB   ·hasVX(SB), R1
+	CMPBEQ R1, $1, vectorimpl                // vectorfacility = 1, vector supported
+	MOVD   $·cbrtvectorfacility+0x00(SB), R1
+	MOVD   $·cbrt(SB), R2
+	MOVD   R2, 0(R1)
+	BR     ·cbrt(SB)
+
+vectorimpl:
+	MOVD $·cbrtvectorfacility+0x00(SB), R1
+	MOVD $·cbrtAsm(SB), R2
+	MOVD R2, 0(R1)
+	BR   ·cbrtAsm(SB)
+
+GLOBL ·cbrtvectorfacility+0x00(SB), NOPTR, $8
+DATA ·cbrtvectorfacility+0x00(SB)/8, $·cbrtTrampolineSetup(SB)
+
+TEXT ·archLog(SB), NOSPLIT, $0
+	MOVD ·logvectorfacility+0x00(SB), R1
+	BR   (R1)
+
+TEXT ·logTrampolineSetup(SB), NOSPLIT, $0
+	MOVB   ·hasVX(SB), R1
+	CMPBEQ R1, $1, vectorimpl               // vectorfacility = 1, vector supported
+	MOVD   $·logvectorfacility+0x00(SB), R1
+	MOVD   $·log(SB), R2
+	MOVD   R2, 0(R1)
+	BR     ·log(SB)
+
+vectorimpl:
+	MOVD $·logvectorfacility+0x00(SB), R1
+	MOVD $·logAsm(SB), R2
+	MOVD R2, 0(R1)
+	BR   ·logAsm(SB)
+
+GLOBL ·logvectorfacility+0x00(SB), NOPTR, $8
+DATA ·logvectorfacility+0x00(SB)/8, $·logTrampolineSetup(SB)
+
+TEXT ·archTan(SB), NOSPLIT, $0
+	MOVD ·tanvectorfacility+0x00(SB), R1
+	BR   (R1)
+
+TEXT ·tanTrampolineSetup(SB), NOSPLIT, $0
+	MOVB   ·hasVX(SB), R1
+	CMPBEQ R1, $1, vectorimpl               // vectorfacility = 1, vector supported
+	MOVD   $·tanvectorfacility+0x00(SB), R1
+	MOVD   $·tan(SB), R2
+	MOVD   R2, 0(R1)
+	BR     ·tan(SB)
+
+vectorimpl:
+	MOVD $·tanvectorfacility+0x00(SB), R1
+	MOVD $·tanAsm(SB), R2
+	MOVD R2, 0(R1)
+	BR   ·tanAsm(SB)
+
+GLOBL ·tanvectorfacility+0x00(SB), NOPTR, $8
+DATA ·tanvectorfacility+0x00(SB)/8, $·tanTrampolineSetup(SB)
+
+TEXT ·archExp(SB), NOSPLIT, $0
+	MOVD ·expvectorfacility+0x00(SB), R1
+	BR   (R1)
+
+TEXT ·expTrampolineSetup(SB), NOSPLIT, $0
+	MOVB   ·hasVX(SB), R1
+	CMPBEQ R1, $1, vectorimpl               // vectorfacility = 1, vector supported
+	MOVD   $·expvectorfacility+0x00(SB), R1
+	MOVD   $·exp(SB), R2
+	MOVD   R2, 0(R1)
+	BR     ·exp(SB)
+
+vectorimpl:
+	MOVD $·expvectorfacility+0x00(SB), R1
+	MOVD $·expAsm(SB), R2
+	MOVD R2, 0(R1)
+	BR   ·expAsm(SB)
+
+GLOBL ·expvectorfacility+0x00(SB), NOPTR, $8
+DATA ·expvectorfacility+0x00(SB)/8, $·expTrampolineSetup(SB)
+
+TEXT ·archExpm1(SB), NOSPLIT, $0
+	MOVD ·expm1vectorfacility+0x00(SB), R1
+	BR   (R1)
+
+TEXT ·expm1TrampolineSetup(SB), NOSPLIT, $0
+	MOVB   ·hasVX(SB), R1
+	CMPBEQ R1, $1, vectorimpl                 // vectorfacility = 1, vector supported
+	MOVD   $·expm1vectorfacility+0x00(SB), R1
+	MOVD   $·expm1(SB), R2
+	MOVD   R2, 0(R1)
+	BR     ·expm1(SB)
+
+vectorimpl:
+	MOVD $·expm1vectorfacility+0x00(SB), R1
+	MOVD $·expm1Asm(SB), R2
+	MOVD R2, 0(R1)
+	BR   ·expm1Asm(SB)
+
+GLOBL ·expm1vectorfacility+0x00(SB), NOPTR, $8
+DATA ·expm1vectorfacility+0x00(SB)/8, $·expm1TrampolineSetup(SB)
+
+TEXT ·archPow(SB), NOSPLIT, $0
+	MOVD ·powvectorfacility+0x00(SB), R1
+	BR   (R1)
+
+TEXT ·powTrampolineSetup(SB), NOSPLIT, $0
+	MOVB   ·hasVX(SB), R1
+	CMPBEQ R1, $1, vectorimpl               // vectorfacility = 1, vector supported
+	MOVD   $·powvectorfacility+0x00(SB), R1
+	MOVD   $·pow(SB), R2
+	MOVD   R2, 0(R1)
+	BR     ·pow(SB)
+
+vectorimpl:
+	MOVD $·powvectorfacility+0x00(SB), R1
+	MOVD $·powAsm(SB), R2
+	MOVD R2, 0(R1)
+	BR   ·powAsm(SB)
+
+GLOBL ·powvectorfacility+0x00(SB), NOPTR, $8
+DATA ·powvectorfacility+0x00(SB)/8, $·powTrampolineSetup(SB)
+
diff --git a/src/math/tan.go b/src/math/tan.go
new file mode 100644
index 0000000..8f6e71e
--- /dev/null
+++ b/src/math/tan.go
@@ -0,0 +1,140 @@
+// Copyright 2011 The Go Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+package math
+
+/*
+	Floating-point tangent.
+*/
+
+// The original C code, the long comment, and the constants
+// below were from http://netlib.sandia.gov/cephes/cmath/sin.c,
+// available from http://www.netlib.org/cephes/cmath.tgz.
+// The go code is a simplified version of the original C.
+//
+//      tan.c
+//
+//      Circular tangent
+//
+// SYNOPSIS:
+//
+// double x, y, tan();
+// y = tan( x );
+//
+// DESCRIPTION:
+//
+// Returns the circular tangent of the radian argument x.
+//
+// Range reduction is modulo pi/4.  A rational function
+//       x + x**3 P(x**2)/Q(x**2)
+// is employed in the basic interval [0, pi/4].
+//
+// ACCURACY:
+//                      Relative error:
+// arithmetic   domain     # trials      peak         rms
+//    DEC      +-1.07e9      44000      4.1e-17     1.0e-17
+//    IEEE     +-1.07e9      30000      2.9e-16     8.1e-17
+//
+// Partial loss of accuracy begins to occur at x = 2**30 = 1.074e9.  The loss
+// is not gradual, but jumps suddenly to about 1 part in 10e7.  Results may
+// be meaningless for x > 2**49 = 5.6e14.
+// [Accuracy loss statement from sin.go comments.]
+//
+// Cephes Math Library Release 2.8:  June, 2000
+// Copyright 1984, 1987, 1989, 1992, 2000 by Stephen L. Moshier
+//
+// The readme file at http://netlib.sandia.gov/cephes/ says:
+//    Some software in this archive may be from the book _Methods and
+// Programs for Mathematical Functions_ (Prentice-Hall or Simon & Schuster
+// International, 1989) or from the Cephes Mathematical Library, a
+// commercial product. In either event, it is copyrighted by the author.
+// What you see here may be used freely but it comes with no support or
+// guarantee.
+//
+//   The two known misprints in the book are repaired here in the
+// source listings for the gamma function and the incomplete beta
+// integral.
+//
+//   Stephen L. Moshier
+//   moshier@na-net.ornl.gov
+
+// tan coefficients
+var _tanP = [...]float64{
+	-1.30936939181383777646e4, // 0xc0c992d8d24f3f38
+	1.15351664838587416140e6,  // 0x413199eca5fc9ddd
+	-1.79565251976484877988e7, // 0xc1711fead3299176
+}
+var _tanQ = [...]float64{
+	1.00000000000000000000e0,
+	1.36812963470692954678e4,  // 0x40cab8a5eeb36572
+	-1.32089234440210967447e6, // 0xc13427bc582abc96
+	2.50083801823357915839e7,  // 0x4177d98fc2ead8ef
+	-5.38695755929454629881e7, // 0xc189afe03cbe5a31
+}
+
+// Tan returns the tangent of the radian argument x.
+//
+// Special cases are:
+//
+//	Tan(±0) = ±0
+//	Tan(±Inf) = NaN
+//	Tan(NaN) = NaN
+func Tan(x float64) float64 {
+	if haveArchTan {
+		return archTan(x)
+	}
+	return tan(x)
+}
+
+func tan(x float64) float64 {
+	const (
+		PI4A = 7.85398125648498535156e-1  // 0x3fe921fb40000000, Pi/4 split into three parts
+		PI4B = 3.77489470793079817668e-8  // 0x3e64442d00000000,
+		PI4C = 2.69515142907905952645e-15 // 0x3ce8469898cc5170,
+	)
+	// special cases
+	switch {
+	case x == 0 || IsNaN(x):
+		return x // return ±0 || NaN()
+	case IsInf(x, 0):
+		return NaN()
+	}
+
+	// make argument positive but save the sign
+	sign := false
+	if x < 0 {
+		x = -x
+		sign = true
+	}
+	var j uint64
+	var y, z float64
+	if x >= reduceThreshold {
+		j, z = trigReduce(x)
+	} else {
+		j = uint64(x * (4 / Pi)) // integer part of x/(Pi/4), as integer for tests on the phase angle
+		y = float64(j)           // integer part of x/(Pi/4), as float
+
+		/* map zeros and singularities to origin */
+		if j&1 == 1 {
+			j++
+			y++
+		}
+
+		z = ((x - y*PI4A) - y*PI4B) - y*PI4C
+	}
+	zz := z * z
+
+	if zz > 1e-14 {
+		y = z + z*(zz*(((_tanP[0]*zz)+_tanP[1])*zz+_tanP[2])/((((zz+_tanQ[1])*zz+_tanQ[2])*zz+_tanQ[3])*zz+_tanQ[4]))
+	} else {
+		y = z
+	}
+	if j&2 == 2 {
+		y = -1 / y
+	}
+	if sign {
+		y = -y
+	}
+	return y
+}
diff --git a/src/math/tan_s390x.s b/src/math/tan_s390x.s
new file mode 100644
index 0000000..8226760
--- /dev/null
+++ b/src/math/tan_s390x.s
@@ -0,0 +1,110 @@
+// Copyright 2017 The Go Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+#include "textflag.h"
+
+// Minimax polynomial approximations
+DATA ·tanrodataL13<> + 0(SB)/8, $0.181017336383229927e-07
+DATA ·tanrodataL13<> + 8(SB)/8, $-.256590857271311164e-03
+DATA ·tanrodataL13<> + 16(SB)/8, $-.464359274328689195e+00
+DATA ·tanrodataL13<> + 24(SB)/8, $1.0
+DATA ·tanrodataL13<> + 32(SB)/8, $-.333333333333333464e+00
+DATA ·tanrodataL13<> + 40(SB)/8, $0.245751217306830032e-01
+DATA ·tanrodataL13<> + 48(SB)/8, $-.245391301343844510e-03
+DATA ·tanrodataL13<> + 56(SB)/8, $0.214530914428992319e-01
+DATA ·tanrodataL13<> + 64(SB)/8, $0.108285667160535624e-31
+DATA ·tanrodataL13<> + 72(SB)/8, $0.612323399573676480e-16
+DATA ·tanrodataL13<> + 80(SB)/8, $0.157079632679489656e+01
+DATA ·tanrodataL13<> + 88(SB)/8, $0.636619772367581341e+00
+GLOBL ·tanrodataL13<> + 0(SB), RODATA, $96
+
+// Constants
+DATA ·tanxnan<> + 0(SB)/8, $0x7ff8000000000000
+GLOBL ·tanxnan<> + 0(SB), RODATA, $8
+DATA ·tanxlim<> + 0(SB)/8, $0x432921fb54442d19
+GLOBL ·tanxlim<> + 0(SB), RODATA, $8
+DATA ·tanxadd<> + 0(SB)/8, $0xc338000000000000
+GLOBL ·tanxadd<> + 0(SB), RODATA, $8
+
+// Tan returns the tangent of the radian argument.
+//
+// Special cases are:
+//      Tan(±0) = ±0
+//      Tan(±Inf) = NaN
+//      Tan(NaN) = NaN
+// The algorithm used is minimax polynomial approximation using a table of
+// polynomial coefficients determined with a Remez exchange algorithm.
+
+TEXT	·tanAsm(SB), NOSPLIT, $0-16
+	FMOVD	x+0(FP), F0
+	//special case Tan(±0) = ±0
+	FMOVD   $(0.0), F1
+	FCMPU   F0, F1
+	BEQ     atanIsZero
+
+	MOVD	$·tanrodataL13<>+0(SB), R5
+	LTDBR	F0, F0
+	BLTU	L10
+	FMOVD	F0, F2
+L2:
+	MOVD	$·tanxlim<>+0(SB), R1
+	WORD	$0xED201000	//cdb	%f2,0(%r1)
+	BYTE	$0x00
+	BYTE	$0x19
+	BGE	L11
+	BVS	L11
+	MOVD	$·tanxadd<>+0(SB), R1
+	FMOVD	88(R5), F6
+	FMOVD	0(R1), F4
+	WFMSDB	V0, V6, V4, V6
+	FMOVD	80(R5), F1
+	FADD	F6, F4
+	FMOVD	72(R5), F2
+	FMSUB	F1, F4, F0
+	FMOVD	64(R5), F3
+	WFMADB	V4, V2, V0, V2
+	FMOVD	56(R5), F1
+	WFMADB	V4, V3, V2, V4
+	FMUL	F2, F2
+	VLEG	$0, 48(R5), V18
+	LGDR	F6, R1
+	FMOVD	40(R5), F5
+	FMOVD	32(R5), F3
+	FMADD	F1, F2, F3
+	FMOVD	24(R5), F1
+	FMOVD	16(R5), F7
+	FMOVD	8(R5), F0
+	WFMADB	V2, V7, V1, V7
+	WFMADB	V2, V0, V5, V0
+	WFMDB	V2, V2, V1
+	FMOVD	0(R5), F5
+	WFLCDB	V4, V16
+	WFMADB	V2, V5, V18, V5
+	WFMADB	V1, V0, V7, V0
+	TMLL	R1, $1
+	WFMADB	V1, V5, V3, V1
+	BNE	L12
+	WFDDB	V0, V1, V0
+	WFMDB	V2, V16, V2
+	WFMADB	V2, V0, V4, V0
+	WORD	$0xB3130000	//lcdbr	%f0,%f0
+	FMOVD	F0, ret+8(FP)
+	RET
+L12:
+	WFMSDB	V2, V1, V0, V2
+	WFMDB	V16, V2, V2
+	FDIV	F2, F0
+	FMOVD	F0, ret+8(FP)
+	RET
+L11:
+	MOVD	$·tanxnan<>+0(SB), R1
+	FMOVD	0(R1), F0
+	FMOVD	F0, ret+8(FP)
+	RET
+L10:
+	WORD	$0xB3130020	//lcdbr	%f2,%f0
+	BR	L2
+atanIsZero:
+	FMOVD	F0, ret+8(FP)
+	RET
diff --git a/src/math/tanh.go b/src/math/tanh.go
new file mode 100644
index 0000000..94ebc3b
--- /dev/null
+++ b/src/math/tanh.go
@@ -0,0 +1,105 @@
+// Copyright 2009 The Go Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+package math
+
+// The original C code, the long comment, and the constants
+// below were from http://netlib.sandia.gov/cephes/cmath/sin.c,
+// available from http://www.netlib.org/cephes/cmath.tgz.
+// The go code is a simplified version of the original C.
+//      tanh.c
+//
+//      Hyperbolic tangent
+//
+// SYNOPSIS:
+//
+// double x, y, tanh();
+//
+// y = tanh( x );
+//
+// DESCRIPTION:
+//
+// Returns hyperbolic tangent of argument in the range MINLOG to MAXLOG.
+//      MAXLOG = 8.8029691931113054295988e+01 = log(2**127)
+//      MINLOG = -8.872283911167299960540e+01 = log(2**-128)
+//
+// A rational function is used for |x| < 0.625.  The form
+// x + x**3 P(x)/Q(x) of Cody & Waite is employed.
+// Otherwise,
+//      tanh(x) = sinh(x)/cosh(x) = 1  -  2/(exp(2x) + 1).
+//
+// ACCURACY:
+//
+//                      Relative error:
+// arithmetic   domain     # trials      peak         rms
+//    IEEE      -2,2        30000       2.5e-16     5.8e-17
+//
+// Cephes Math Library Release 2.8:  June, 2000
+// Copyright 1984, 1987, 1989, 1992, 2000 by Stephen L. Moshier
+//
+// The readme file at http://netlib.sandia.gov/cephes/ says:
+//    Some software in this archive may be from the book _Methods and
+// Programs for Mathematical Functions_ (Prentice-Hall or Simon & Schuster
+// International, 1989) or from the Cephes Mathematical Library, a
+// commercial product. In either event, it is copyrighted by the author.
+// What you see here may be used freely but it comes with no support or
+// guarantee.
+//
+//   The two known misprints in the book are repaired here in the
+// source listings for the gamma function and the incomplete beta
+// integral.
+//
+//   Stephen L. Moshier
+//   moshier@na-net.ornl.gov
+//
+
+var tanhP = [...]float64{
+	-9.64399179425052238628e-1,
+	-9.92877231001918586564e1,
+	-1.61468768441708447952e3,
+}
+var tanhQ = [...]float64{
+	1.12811678491632931402e2,
+	2.23548839060100448583e3,
+	4.84406305325125486048e3,
+}
+
+// Tanh returns the hyperbolic tangent of x.
+//
+// Special cases are:
+//
+//	Tanh(±0) = ±0
+//	Tanh(±Inf) = ±1
+//	Tanh(NaN) = NaN
+func Tanh(x float64) float64 {
+	if haveArchTanh {
+		return archTanh(x)
+	}
+	return tanh(x)
+}
+
+func tanh(x float64) float64 {
+	const MAXLOG = 8.8029691931113054295988e+01 // log(2**127)
+	z := Abs(x)
+	switch {
+	case z > 0.5*MAXLOG:
+		if x < 0 {
+			return -1
+		}
+		return 1
+	case z >= 0.625:
+		s := Exp(2 * z)
+		z = 1 - 2/(s+1)
+		if x < 0 {
+			z = -z
+		}
+	default:
+		if x == 0 {
+			return x
+		}
+		s := x * x
+		z = x + x*s*((tanhP[0]*s+tanhP[1])*s+tanhP[2])/(((s+tanhQ[0])*s+tanhQ[1])*s+tanhQ[2])
+	}
+	return z
+}
diff --git a/src/math/tanh_s390x.s b/src/math/tanh_s390x.s
new file mode 100644
index 0000000..7e2d4dd
--- /dev/null
+++ b/src/math/tanh_s390x.s
@@ -0,0 +1,169 @@
+// Copyright 2016 The Go Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+#include "textflag.h"
+
+// Minimax polynomial approximations
+DATA tanhrodataL18<>+0(SB)/8, $-1.0
+DATA tanhrodataL18<>+8(SB)/8, $-2.0
+DATA tanhrodataL18<>+16(SB)/8, $1.0
+DATA tanhrodataL18<>+24(SB)/8, $2.0
+DATA tanhrodataL18<>+32(SB)/8, $0.20000000000000011868E+01
+DATA tanhrodataL18<>+40(SB)/8, $0.13333333333333341256E+01
+DATA tanhrodataL18<>+48(SB)/8, $0.26666666663549111502E+00
+DATA tanhrodataL18<>+56(SB)/8, $0.66666666658721844678E+00
+DATA tanhrodataL18<>+64(SB)/8, $0.88890217768964374821E-01
+DATA tanhrodataL18<>+72(SB)/8, $0.25397199429103821138E-01
+DATA tanhrodataL18<>+80(SB)/8, $-.346573590279972643E+00
+DATA tanhrodataL18<>+88(SB)/8, $20.E0
+GLOBL tanhrodataL18<>+0(SB), RODATA, $96
+
+// Constants
+DATA tanhrlog2<>+0(SB)/8, $0x4007154760000000
+GLOBL tanhrlog2<>+0(SB), RODATA, $8
+DATA tanhxadd<>+0(SB)/8, $0xc2f0000100003ff0
+GLOBL tanhxadd<>+0(SB), RODATA, $8
+DATA tanhxmone<>+0(SB)/8, $-1.0
+GLOBL tanhxmone<>+0(SB), RODATA, $8
+DATA tanhxzero<>+0(SB)/8, $0
+GLOBL tanhxzero<>+0(SB), RODATA, $8
+
+// Polynomial coefficients
+DATA tanhtab<>+0(SB)/8, $0.000000000000000000E+00
+DATA tanhtab<>+8(SB)/8, $-.171540871271399150E-01
+DATA tanhtab<>+16(SB)/8, $-.306597931864376363E-01
+DATA tanhtab<>+24(SB)/8, $-.410200970469965021E-01
+DATA tanhtab<>+32(SB)/8, $-.486343079978231466E-01
+DATA tanhtab<>+40(SB)/8, $-.538226193725835820E-01
+DATA tanhtab<>+48(SB)/8, $-.568439602538111520E-01
+DATA tanhtab<>+56(SB)/8, $-.579091847395528847E-01
+DATA tanhtab<>+64(SB)/8, $-.571909584179366341E-01
+DATA tanhtab<>+72(SB)/8, $-.548312665987204407E-01
+DATA tanhtab<>+80(SB)/8, $-.509471843643441085E-01
+DATA tanhtab<>+88(SB)/8, $-.456353588448863359E-01
+DATA tanhtab<>+96(SB)/8, $-.389755254243262365E-01
+DATA tanhtab<>+104(SB)/8, $-.310332908285244231E-01
+DATA tanhtab<>+112(SB)/8, $-.218623539150173528E-01
+DATA tanhtab<>+120(SB)/8, $-.115062908917949451E-01
+GLOBL tanhtab<>+0(SB), RODATA, $128
+
+// Tanh returns the hyperbolic tangent of the argument.
+//
+// Special cases are:
+//      Tanh(±0) = ±0
+//      Tanh(±Inf) = ±1
+//      Tanh(NaN) = NaN
+// The algorithm used is minimax polynomial approximation using a table of
+// polynomial coefficients determined with a Remez exchange algorithm.
+
+TEXT ·tanhAsm(SB),NOSPLIT,$0-16
+	FMOVD   x+0(FP), F0
+	// special case Tanh(±0) = ±0
+	FMOVD   $(0.0), F1
+	FCMPU   F0, F1
+	BEQ     tanhIsZero
+	MOVD    $tanhrodataL18<>+0(SB), R5
+	LTDBR	F0, F0
+	MOVD    $0x4034000000000000, R1
+	BLTU    L15
+	FMOVD   F0, F1
+L2:
+	MOVD    $tanhxadd<>+0(SB), R2
+	FMOVD   0(R2), F2
+	MOVD    tanhrlog2<>+0(SB), R2
+	LDGR    R2, F4
+	WFMSDB  V0, V4, V2, V4
+	MOVD    $tanhtab<>+0(SB), R3
+	LGDR    F4, R2
+	RISBGZ	$57, $60, $3, R2, R4
+	WORD    $0xED105058     //cdb %f1,.L19-.L18(%r5)
+	BYTE    $0x00
+	BYTE    $0x19
+	RISBGN	$0, $15, $48, R2, R1
+	WORD    $0x68543000     //ld %f5,0(%r4,%r3)
+	LDGR    R1, F6
+	BLT     L3
+	MOVD    $tanhxzero<>+0(SB), R1
+	FMOVD   0(R1), F2
+	WFCHDBS V0, V2, V4
+	BEQ     L9
+	WFCHDBS V2, V0, V2
+	BNE     L1
+	MOVD    $tanhxmone<>+0(SB), R1
+	FMOVD   0(R1), F0
+	FMOVD   F0, ret+8(FP)
+	RET
+
+L3:
+	FADD    F4, F2
+	FMOVD   tanhrodataL18<>+80(SB), F4
+	FMADD   F4, F2, F0
+	FMOVD   tanhrodataL18<>+72(SB), F1
+	WFMDB   V0, V0, V3
+	FMOVD   tanhrodataL18<>+64(SB), F2
+	WFMADB  V0, V1, V2, V1
+	FMOVD   tanhrodataL18<>+56(SB), F4
+	FMOVD   tanhrodataL18<>+48(SB), F2
+	WFMADB  V1, V3, V4, V1
+	FMOVD   tanhrodataL18<>+40(SB), F4
+	WFMADB  V3, V2, V4, V2
+	FMOVD   tanhrodataL18<>+32(SB), F4
+	WORD    $0xB9270022     //lhr %r2,%r2
+	WFMADB  V3, V1, V4, V1
+	FMOVD   tanhrodataL18<>+24(SB), F4
+	WFMADB  V3, V2, V4, V3
+	WFMADB  V0, V5, V0, V2
+	WFMADB  V0, V1, V3, V0
+	WORD    $0xA7183ECF     //lhi %r1,16079
+	WFMADB  V0, V2, V5, V2
+	FMUL    F6, F2
+	MOVW    R2, R10
+	MOVW    R1, R11
+	CMPBLE  R10, R11, L16
+	FMOVD   F6, F0
+	WORD    $0xED005010     //adb %f0,.L28-.L18(%r5)
+	BYTE    $0x00
+	BYTE    $0x1A
+	WORD    $0xA7184330     //lhi %r1,17200
+	FADD    F2, F0
+	MOVW    R2, R10
+	MOVW    R1, R11
+	CMPBGT  R10, R11, L17
+	WORD    $0xED605010     //sdb %f6,.L28-.L18(%r5)
+	BYTE    $0x00
+	BYTE    $0x1B
+	FADD    F6, F2
+	WFDDB   V0, V2, V0
+	FMOVD   F0, ret+8(FP)
+	RET
+
+L9:
+	FMOVD   tanhrodataL18<>+16(SB), F0
+L1:
+	FMOVD   F0, ret+8(FP)
+	RET
+
+L15:
+	FNEG    F0, F1
+	BR      L2
+L16:
+	FADD    F6, F2
+	FMOVD   tanhrodataL18<>+8(SB), F0
+	FMADD   F4, F2, F0
+	FMOVD   tanhrodataL18<>+0(SB), F4
+	FNEG    F0, F0
+	WFMADB  V0, V2, V4, V0
+	FMOVD   F0, ret+8(FP)
+	RET
+
+L17:
+	WFDDB   V0, V4, V0
+	FMOVD   tanhrodataL18<>+16(SB), F2
+	WFSDB   V0, V2, V0
+	FMOVD   F0, ret+8(FP)
+	RET
+
+tanhIsZero:      //return ±0
+	FMOVD   F0, ret+8(FP)
+	RET
diff --git a/src/math/trig_reduce.go b/src/math/trig_reduce.go
new file mode 100644
index 0000000..5ecdd83
--- /dev/null
+++ b/src/math/trig_reduce.go
@@ -0,0 +1,102 @@
+// Copyright 2018 The Go Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+package math
+
+import (
+	"math/bits"
+)
+
+// reduceThreshold is the maximum value of x where the reduction using Pi/4
+// in 3 float64 parts still gives accurate results. This threshold
+// is set by y*C being representable as a float64 without error
+// where y is given by y = floor(x * (4 / Pi)) and C is the leading partial
+// terms of 4/Pi. Since the leading terms (PI4A and PI4B in sin.go) have 30
+// and 32 trailing zero bits, y should have less than 30 significant bits.
+//
+//	y < 1<<30  -> floor(x*4/Pi) < 1<<30 -> x < (1<<30 - 1) * Pi/4
+//
+// So, conservatively we can take x < 1<<29.
+// Above this threshold Payne-Hanek range reduction must be used.
+const reduceThreshold = 1 << 29
+
+// trigReduce implements Payne-Hanek range reduction by Pi/4
+// for x > 0. It returns the integer part mod 8 (j) and
+// the fractional part (z) of x / (Pi/4).
+// The implementation is based on:
+// "ARGUMENT REDUCTION FOR HUGE ARGUMENTS: Good to the Last Bit"
+// K. C. Ng et al, March 24, 1992
+// The simulated multi-precision calculation of x*B uses 64-bit integer arithmetic.
+func trigReduce(x float64) (j uint64, z float64) {
+	const PI4 = Pi / 4
+	if x < PI4 {
+		return 0, x
+	}
+	// Extract out the integer and exponent such that,
+	// x = ix * 2 ** exp.
+	ix := Float64bits(x)
+	exp := int(ix>>shift&mask) - bias - shift
+	ix &^= mask << shift
+	ix |= 1 << shift
+	// Use the exponent to extract the 3 appropriate uint64 digits from mPi4,
+	// B ~ (z0, z1, z2), such that the product leading digit has the exponent -61.
+	// Note, exp >= -53 since x >= PI4 and exp < 971 for maximum float64.
+	digit, bitshift := uint(exp+61)/64, uint(exp+61)%64
+	z0 := (mPi4[digit] << bitshift) | (mPi4[digit+1] >> (64 - bitshift))
+	z1 := (mPi4[digit+1] << bitshift) | (mPi4[digit+2] >> (64 - bitshift))
+	z2 := (mPi4[digit+2] << bitshift) | (mPi4[digit+3] >> (64 - bitshift))
+	// Multiply mantissa by the digits and extract the upper two digits (hi, lo).
+	z2hi, _ := bits.Mul64(z2, ix)
+	z1hi, z1lo := bits.Mul64(z1, ix)
+	z0lo := z0 * ix
+	lo, c := bits.Add64(z1lo, z2hi, 0)
+	hi, _ := bits.Add64(z0lo, z1hi, c)
+	// The top 3 bits are j.
+	j = hi >> 61
+	// Extract the fraction and find its magnitude.
+	hi = hi<<3 | lo>>61
+	lz := uint(bits.LeadingZeros64(hi))
+	e := uint64(bias - (lz + 1))
+	// Clear implicit mantissa bit and shift into place.
+	hi = (hi << (lz + 1)) | (lo >> (64 - (lz + 1)))
+	hi >>= 64 - shift
+	// Include the exponent and convert to a float.
+	hi |= e << shift
+	z = Float64frombits(hi)
+	// Map zeros to origin.
+	if j&1 == 1 {
+		j++
+		j &= 7
+		z--
+	}
+	// Multiply the fractional part by pi/4.
+	return j, z * PI4
+}
+
+// mPi4 is the binary digits of 4/pi as a uint64 array,
+// that is, 4/pi = Sum mPi4[i]*2^(-64*i)
+// 19 64-bit digits and the leading one bit give 1217 bits
+// of precision to handle the largest possible float64 exponent.
+var mPi4 = [...]uint64{
+	0x0000000000000001,
+	0x45f306dc9c882a53,
+	0xf84eafa3ea69bb81,
+	0xb6c52b3278872083,
+	0xfca2c757bd778ac3,
+	0x6e48dc74849ba5c0,
+	0x0c925dd413a32439,
+	0xfc3bd63962534e7d,
+	0xd1046bea5d768909,
+	0xd338e04d68befc82,
+	0x7323ac7306a673e9,
+	0x3908bf177bf25076,
+	0x3ff12fffbc0b301f,
+	0xde5e2316b414da3e,
+	0xda6cfd9e4f96136e,
+	0x9e8c7ecd3cbfd45a,
+	0xea4f758fd7cbe2f6,
+	0x7a0e73ef14a525d4,
+	0xd7f6bf623f1aba10,
+	0xac06608df8f6d757,
+}
diff --git a/src/math/unsafe.go b/src/math/unsafe.go
new file mode 100644
index 0000000..e59f50c
--- /dev/null
+++ b/src/math/unsafe.go
@@ -0,0 +1,29 @@
+// Copyright 2009 The Go Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+package math
+
+import "unsafe"
+
+// Float32bits returns the IEEE 754 binary representation of f,
+// with the sign bit of f and the result in the same bit position.
+// Float32bits(Float32frombits(x)) == x.
+func Float32bits(f float32) uint32 { return *(*uint32)(unsafe.Pointer(&f)) }
+
+// Float32frombits returns the floating-point number corresponding
+// to the IEEE 754 binary representation b, with the sign bit of b
+// and the result in the same bit position.
+// Float32frombits(Float32bits(x)) == x.
+func Float32frombits(b uint32) float32 { return *(*float32)(unsafe.Pointer(&b)) }
+
+// Float64bits returns the IEEE 754 binary representation of f,
+// with the sign bit of f and the result in the same bit position,
+// and Float64bits(Float64frombits(x)) == x.
+func Float64bits(f float64) uint64 { return *(*uint64)(unsafe.Pointer(&f)) }
+
+// Float64frombits returns the floating-point number corresponding
+// to the IEEE 754 binary representation b, with the sign bit of b
+// and the result in the same bit position.
+// Float64frombits(Float64bits(x)) == x.
+func Float64frombits(b uint64) float64 { return *(*float64)(unsafe.Pointer(&b)) }
author	Daniel Baumann <daniel.baumann@progress-linux.org>	2024-04-16 19:19:13 +0000
committer	Daniel Baumann <daniel.baumann@progress-linux.org>	2024-04-16 19:19:13 +0000
commit	ccd992355df7192993c666236047820244914598 (patch)
tree	f00fea65147227b7743083c6148396f74cd66935 /src/math
parent	Initial commit. (diff)
download	golang-1.21-ccd992355df7192993c666236047820244914598.tar.xz golang-1.21-ccd992355df7192993c666236047820244914598.zip